15 16 12 10 70 53 11 5 7 7 14 12 4 6 3 4 5 6 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_script.c * * Copyright (C) 1996 Martin von Löwis * original #!-checking implemented by tytso. */ #include <linux/module.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/binfmts.h> #include <linux/init.h> #include <linux/file.h> #include <linux/err.h> #include <linux/fs.h> static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } static inline const char *next_non_spacetab(const char *first, const char *last) { for (; first <= last; first++) if (!spacetab(*first)) return first; return NULL; } static inline const char *next_terminator(const char *first, const char *last) { for (; first <= last; first++) if (spacetab(*first) || !*first) return first; return NULL; } static int load_script(struct linux_binprm *bprm) { const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; struct file *file; int retval; /* Not ours to exec if we don't start with "#!". */ if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; /* * This section handles parsing the #! line into separate * interpreter path and argument strings. We must be careful * because bprm->buf is not yet guaranteed to be NUL-terminated * (though the buffer will have trailing NUL padding when the * file size was smaller than the buffer size). * * We do not want to exec a truncated interpreter path, so either * we find a newline (which indicates nothing is truncated), or * we find a space/tab/NUL after the interpreter path (which * itself may be preceded by spaces/tabs). Truncating the * arguments is fine: the interpreter can re-read the script to * parse them on its own. */ buf_end = bprm->buf + sizeof(bprm->buf) - 1; i_end = strnchr(bprm->buf, sizeof(bprm->buf), '\n'); if (!i_end) { i_end = next_non_spacetab(bprm->buf + 2, buf_end); if (!i_end) return -ENOEXEC; /* Entire buf is spaces/tabs */ /* * If there is no later space/tab/NUL we must assume the * interpreter path is truncated. */ if (!next_terminator(i_end, buf_end)) return -ENOEXEC; i_end = buf_end; } /* Trim any trailing spaces/tabs from i_end */ while (spacetab(i_end[-1])) i_end--; /* Skip over leading spaces/tabs */ i_name = next_non_spacetab(bprm->buf+2, i_end); if (!i_name || (i_name == i_end)) return -ENOEXEC; /* No interpreter name found */ /* Is there an optional argument? */ i_arg = NULL; i_sep = next_terminator(i_name, i_end); if (i_sep && (*i_sep != '\0')) i_arg = next_non_spacetab(i_sep, i_end); /* * If the script filename will be inaccessible after exec, typically * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give * up now (on the assumption that the interpreter will want to load * this file). */ if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) return -ENOENT; /* * OK, we've parsed out the interpreter name and * (optional) argument. * Splice in (1) the interpreter's name for argv[0] * (2) (optional) argument to interpreter * (3) filename of shell script (replace argv[0]) * * This is done in reverse order, because of how the * user environment and arguments are stored. */ retval = remove_arg_zero(bprm); if (retval) return retval; retval = copy_string_kernel(bprm->interp, bprm); if (retval < 0) return retval; bprm->argc++; *((char *)i_end) = '\0'; if (i_arg) { *((char *)i_sep) = '\0'; retval = copy_string_kernel(i_arg, bprm); if (retval < 0) return retval; bprm->argc++; } retval = copy_string_kernel(i_name, bprm); if (retval) return retval; bprm->argc++; retval = bprm_change_interp(i_name, bprm); if (retval < 0) return retval; /* * OK, now restart the process with the interpreter's dentry. */ file = open_exec(i_name); if (IS_ERR(file)) return PTR_ERR(file); bprm->interpreter = file; return 0; } static struct linux_binfmt script_format = { .module = THIS_MODULE, .load_binary = load_script, }; static int __init init_script_binfmt(void) { register_binfmt(&script_format); return 0; } static void __exit exit_script_binfmt(void) { unregister_binfmt(&script_format); } core_initcall(init_script_binfmt); module_exit(exit_script_binfmt); MODULE_LICENSE("GPL"); |
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 4 2 4 2 2 4 2 2 1 1 1 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 | // SPDX-License-Identifier: GPL-2.0-or-later /* * imon.c: input and display driver for SoundGraph iMON IR/VFD/LCD * * Copyright(C) 2010 Jarod Wilson <jarod@wilsonet.com> * Portions based on the original lirc_imon driver, * Copyright(C) 2004 Venky Raju(dev@venky.ws) * * Huge thanks to R. Geoff Newbury for invaluable debugging on the * 0xffdc iMON devices, and for sending me one to hack on, without * which the support for them wouldn't be nearly as good. Thanks * also to the numerous 0xffdc device owners that tested auto-config * support for me and provided debug dumps from their devices. */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/ratelimit.h> #include <linux/input.h> #include <linux/usb.h> #include <linux/usb/input.h> #include <media/rc-core.h> #include <linux/timer.h> #define MOD_AUTHOR "Jarod Wilson <jarod@wilsonet.com>" #define MOD_DESC "Driver for SoundGraph iMON MultiMedia IR/Display" #define MOD_NAME "imon" #define MOD_VERSION "0.9.4" #define DISPLAY_MINOR_BASE 144 #define DEVICE_NAME "lcd%d" #define BUF_CHUNK_SIZE 8 #define BUF_SIZE 128 #define BIT_DURATION 250 /* each bit received is 250us */ #define IMON_CLOCK_ENABLE_PACKETS 2 /*** P R O T O T Y P E S ***/ /* USB Callback prototypes */ static int imon_probe(struct usb_interface *interface, const struct usb_device_id *id); static void imon_disconnect(struct usb_interface *interface); static void usb_rx_callback_intf0(struct urb *urb); static void usb_rx_callback_intf1(struct urb *urb); static void usb_tx_callback(struct urb *urb); /* suspend/resume support */ static int imon_resume(struct usb_interface *intf); static int imon_suspend(struct usb_interface *intf, pm_message_t message); /* Display file_operations function prototypes */ static int display_open(struct inode *inode, struct file *file); static int display_close(struct inode *inode, struct file *file); /* VFD write operation */ static ssize_t vfd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos); /* LCD file_operations override function prototypes */ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos); /*** G L O B A L S ***/ struct imon_panel_key_table { u64 hw_code; u32 keycode; }; struct imon_usb_dev_descr { __u16 flags; #define IMON_NO_FLAGS 0 #define IMON_NEED_20MS_PKT_DELAY 1 #define IMON_SUPPRESS_REPEATED_KEYS 2 struct imon_panel_key_table key_table[]; }; struct imon_context { struct device *dev; /* Newer devices have two interfaces */ struct usb_device *usbdev_intf0; struct usb_device *usbdev_intf1; bool display_supported; /* not all controllers do */ bool display_isopen; /* display port has been opened */ bool rf_device; /* true if iMON 2.4G LT/DT RF device */ bool rf_isassociating; /* RF remote associating */ bool dev_present_intf0; /* USB device presence, interface 0 */ bool dev_present_intf1; /* USB device presence, interface 1 */ struct mutex lock; /* to lock this object */ wait_queue_head_t remove_ok; /* For unexpected USB disconnects */ struct usb_endpoint_descriptor *rx_endpoint_intf0; struct usb_endpoint_descriptor *rx_endpoint_intf1; struct usb_endpoint_descriptor *tx_endpoint; struct urb *rx_urb_intf0; struct urb *rx_urb_intf1; struct urb *tx_urb; bool tx_control; unsigned char usb_rx_buf[8]; unsigned char usb_tx_buf[8]; unsigned int send_packet_delay; struct tx_t { unsigned char data_buf[35]; /* user data buffer */ struct completion finished; /* wait for write to finish */ bool busy; /* write in progress */ int status; /* status of tx completion */ } tx; u16 vendor; /* usb vendor ID */ u16 product; /* usb product ID */ struct rc_dev *rdev; /* rc-core device for remote */ struct input_dev *idev; /* input device for panel & IR mouse */ struct input_dev *touch; /* input device for touchscreen */ spinlock_t kc_lock; /* make sure we get keycodes right */ u32 kc; /* current input keycode */ u32 last_keycode; /* last reported input keycode */ u32 rc_scancode; /* the computed remote scancode */ u8 rc_toggle; /* the computed remote toggle bit */ u64 rc_proto; /* iMON or MCE (RC6) IR protocol? */ bool release_code; /* some keys send a release code */ u8 display_type; /* store the display type */ bool pad_mouse; /* toggle kbd(0)/mouse(1) mode */ char name_rdev[128]; /* rc input device name */ char phys_rdev[64]; /* rc input device phys path */ char name_idev[128]; /* input device name */ char phys_idev[64]; /* input device phys path */ char name_touch[128]; /* touch screen name */ char phys_touch[64]; /* touch screen phys path */ struct timer_list ttimer; /* touch screen timer */ int touch_x; /* x coordinate on touchscreen */ int touch_y; /* y coordinate on touchscreen */ const struct imon_usb_dev_descr *dev_descr; /* device description with key */ /* table for front panels */ /* * Fields for deferring free_imon_context(). * * Since reference to "struct imon_context" is stored into * "struct file"->private_data, we need to remember * how many file descriptors might access this "struct imon_context". */ refcount_t users; /* * Use a flag for telling display_open()/vfd_write()/lcd_write() that * imon_disconnect() was already called. */ bool disconnected; /* * We need to wait for RCU grace period in order to allow * display_open() to safely check ->disconnected and increment ->users. */ struct rcu_head rcu; }; #define TOUCH_TIMEOUT (HZ/30) /* vfd character device file operations */ static const struct file_operations vfd_fops = { .owner = THIS_MODULE, .open = display_open, .write = vfd_write, .release = display_close, .llseek = noop_llseek, }; /* lcd character device file operations */ static const struct file_operations lcd_fops = { .owner = THIS_MODULE, .open = display_open, .write = lcd_write, .release = display_close, .llseek = noop_llseek, }; enum { IMON_DISPLAY_TYPE_AUTO = 0, IMON_DISPLAY_TYPE_VFD = 1, IMON_DISPLAY_TYPE_LCD = 2, IMON_DISPLAY_TYPE_VGA = 3, IMON_DISPLAY_TYPE_NONE = 4, }; enum { IMON_KEY_IMON = 0, IMON_KEY_MCE = 1, IMON_KEY_PANEL = 2, }; static struct usb_class_driver imon_vfd_class = { .name = DEVICE_NAME, .fops = &vfd_fops, .minor_base = DISPLAY_MINOR_BASE, }; static struct usb_class_driver imon_lcd_class = { .name = DEVICE_NAME, .fops = &lcd_fops, .minor_base = DISPLAY_MINOR_BASE, }; /* imon receiver front panel/knob key table */ static const struct imon_usb_dev_descr imon_default_table = { .flags = IMON_NO_FLAGS, .key_table = { { 0x000000000f00ffeell, KEY_MEDIA }, /* Go */ { 0x000000001200ffeell, KEY_UP }, { 0x000000001300ffeell, KEY_DOWN }, { 0x000000001400ffeell, KEY_LEFT }, { 0x000000001500ffeell, KEY_RIGHT }, { 0x000000001600ffeell, KEY_ENTER }, { 0x000000001700ffeell, KEY_ESC }, { 0x000000001f00ffeell, KEY_AUDIO }, { 0x000000002000ffeell, KEY_VIDEO }, { 0x000000002100ffeell, KEY_CAMERA }, { 0x000000002700ffeell, KEY_DVD }, { 0x000000002300ffeell, KEY_TV }, { 0x000000002b00ffeell, KEY_EXIT }, { 0x000000002c00ffeell, KEY_SELECT }, { 0x000000002d00ffeell, KEY_MENU }, { 0x000000000500ffeell, KEY_PREVIOUS }, { 0x000000000700ffeell, KEY_REWIND }, { 0x000000000400ffeell, KEY_STOP }, { 0x000000003c00ffeell, KEY_PLAYPAUSE }, { 0x000000000800ffeell, KEY_FASTFORWARD }, { 0x000000000600ffeell, KEY_NEXT }, { 0x000000010000ffeell, KEY_RIGHT }, { 0x000001000000ffeell, KEY_LEFT }, { 0x000000003d00ffeell, KEY_SELECT }, { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000000100ffeell, KEY_MUTE }, /* 0xffdc iMON MCE VFD */ { 0x00010000ffffffeell, KEY_VOLUMEUP }, { 0x01000000ffffffeell, KEY_VOLUMEDOWN }, { 0x00000001ffffffeell, KEY_MUTE }, { 0x0000000fffffffeell, KEY_MEDIA }, { 0x00000012ffffffeell, KEY_UP }, { 0x00000013ffffffeell, KEY_DOWN }, { 0x00000014ffffffeell, KEY_LEFT }, { 0x00000015ffffffeell, KEY_RIGHT }, { 0x00000016ffffffeell, KEY_ENTER }, { 0x00000017ffffffeell, KEY_ESC }, /* iMON Knob values */ { 0x000100ffffffffeell, KEY_VOLUMEUP }, { 0x010000ffffffffeell, KEY_VOLUMEDOWN }, { 0x000008ffffffffeell, KEY_MUTE }, { 0, KEY_RESERVED }, } }; static const struct imon_usb_dev_descr imon_OEM_VFD = { .flags = IMON_NEED_20MS_PKT_DELAY, .key_table = { { 0x000000000f00ffeell, KEY_MEDIA }, /* Go */ { 0x000000001200ffeell, KEY_UP }, { 0x000000001300ffeell, KEY_DOWN }, { 0x000000001400ffeell, KEY_LEFT }, { 0x000000001500ffeell, KEY_RIGHT }, { 0x000000001600ffeell, KEY_ENTER }, { 0x000000001700ffeell, KEY_ESC }, { 0x000000001f00ffeell, KEY_AUDIO }, { 0x000000002b00ffeell, KEY_EXIT }, { 0x000000002c00ffeell, KEY_SELECT }, { 0x000000002d00ffeell, KEY_MENU }, { 0x000000000500ffeell, KEY_PREVIOUS }, { 0x000000000700ffeell, KEY_REWIND }, { 0x000000000400ffeell, KEY_STOP }, { 0x000000003c00ffeell, KEY_PLAYPAUSE }, { 0x000000000800ffeell, KEY_FASTFORWARD }, { 0x000000000600ffeell, KEY_NEXT }, { 0x000000010000ffeell, KEY_RIGHT }, { 0x000001000000ffeell, KEY_LEFT }, { 0x000000003d00ffeell, KEY_SELECT }, { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000000100ffeell, KEY_MUTE }, /* 0xffdc iMON MCE VFD */ { 0x00010000ffffffeell, KEY_VOLUMEUP }, { 0x01000000ffffffeell, KEY_VOLUMEDOWN }, { 0x00000001ffffffeell, KEY_MUTE }, { 0x0000000fffffffeell, KEY_MEDIA }, { 0x00000012ffffffeell, KEY_UP }, { 0x00000013ffffffeell, KEY_DOWN }, { 0x00000014ffffffeell, KEY_LEFT }, { 0x00000015ffffffeell, KEY_RIGHT }, { 0x00000016ffffffeell, KEY_ENTER }, { 0x00000017ffffffeell, KEY_ESC }, /* iMON Knob values */ { 0x000100ffffffffeell, KEY_VOLUMEUP }, { 0x010000ffffffffeell, KEY_VOLUMEDOWN }, { 0x000008ffffffffeell, KEY_MUTE }, { 0, KEY_RESERVED }, } }; /* imon receiver front panel/knob key table for DH102*/ static const struct imon_usb_dev_descr imon_DH102 = { .flags = IMON_NO_FLAGS, .key_table = { { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000010000ffeell, KEY_MUTE }, { 0x0000000f0000ffeell, KEY_MEDIA }, { 0x000000120000ffeell, KEY_UP }, { 0x000000130000ffeell, KEY_DOWN }, { 0x000000140000ffeell, KEY_LEFT }, { 0x000000150000ffeell, KEY_RIGHT }, { 0x000000160000ffeell, KEY_ENTER }, { 0x000000170000ffeell, KEY_ESC }, { 0x0000002b0000ffeell, KEY_EXIT }, { 0x0000002c0000ffeell, KEY_SELECT }, { 0x0000002d0000ffeell, KEY_MENU }, { 0, KEY_RESERVED } } }; /* imon ultrabay front panel key table */ static const struct imon_usb_dev_descr ultrabay_table = { .flags = IMON_SUPPRESS_REPEATED_KEYS, .key_table = { { 0x0000000f0000ffeell, KEY_MEDIA }, /* Go */ { 0x000000000100ffeell, KEY_UP }, { 0x000000000001ffeell, KEY_DOWN }, { 0x000000160000ffeell, KEY_ENTER }, { 0x0000001f0000ffeell, KEY_AUDIO }, /* Music */ { 0x000000200000ffeell, KEY_VIDEO }, /* Movie */ { 0x000000210000ffeell, KEY_CAMERA }, /* Photo */ { 0x000000270000ffeell, KEY_DVD }, /* DVD */ { 0x000000230000ffeell, KEY_TV }, /* TV */ { 0x000000050000ffeell, KEY_PREVIOUS }, /* Previous */ { 0x000000070000ffeell, KEY_REWIND }, { 0x000000040000ffeell, KEY_STOP }, { 0x000000020000ffeell, KEY_PLAYPAUSE }, { 0x000000080000ffeell, KEY_FASTFORWARD }, { 0x000000060000ffeell, KEY_NEXT }, /* Next */ { 0x000100000000ffeell, KEY_VOLUMEUP }, { 0x010000000000ffeell, KEY_VOLUMEDOWN }, { 0x000000010000ffeell, KEY_MUTE }, { 0, KEY_RESERVED }, } }; /* * USB Device ID for iMON USB Control Boards * * The Windows drivers contain 6 different inf files, more or less one for * each new device until the 0x0034-0x0046 devices, which all use the same * driver. Some of the devices in the 34-46 range haven't been definitively * identified yet. Early devices have either a TriGem Computer, Inc. or a * Samsung vendor ID (0x0aa8 and 0x04e8 respectively), while all later * devices use the SoundGraph vendor ID (0x15c2). This driver only supports * the ffdc and later devices, which do onboard decoding. */ static const struct usb_device_id imon_usb_id_table[] = { /* * Several devices with this same device ID, all use iMON_PAD.inf * SoundGraph iMON PAD (IR & VFD) * SoundGraph iMON PAD (IR & LCD) * SoundGraph iMON Knob (IR only) */ { USB_DEVICE(0x15c2, 0xffdc), .driver_info = (unsigned long)&imon_default_table }, /* * Newer devices, all driven by the latest iMON Windows driver, full * list of device IDs extracted via 'strings Setup/data1.hdr |grep 15c2' * Need user input to fill in details on unknown devices. */ /* SoundGraph iMON OEM Touch LCD (IR & 7" VGA LCD) */ { USB_DEVICE(0x15c2, 0x0034), .driver_info = (unsigned long)&imon_DH102 }, /* SoundGraph iMON OEM Touch LCD (IR & 4.3" VGA LCD) */ { USB_DEVICE(0x15c2, 0x0035), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON OEM VFD (IR & VFD) */ { USB_DEVICE(0x15c2, 0x0036), .driver_info = (unsigned long)&imon_OEM_VFD }, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x0037), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON OEM LCD (IR & LCD) */ { USB_DEVICE(0x15c2, 0x0038), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON UltraBay (IR & LCD) */ { USB_DEVICE(0x15c2, 0x0039), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003a), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003b), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON OEM Inside (IR only) */ { USB_DEVICE(0x15c2, 0x003c), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003d), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003e), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x003f), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x0040), .driver_info = (unsigned long)&imon_default_table}, /* SoundGraph iMON MINI (IR only) */ { USB_DEVICE(0x15c2, 0x0041), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station EZ External (IR only) */ { USB_DEVICE(0x15c2, 0x0042), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station Basic Internal (IR only) */ { USB_DEVICE(0x15c2, 0x0043), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station Elite (IR & VFD) */ { USB_DEVICE(0x15c2, 0x0044), .driver_info = (unsigned long)&imon_default_table}, /* Antec Veris Multimedia Station Premiere (IR & LCD) */ { USB_DEVICE(0x15c2, 0x0045), .driver_info = (unsigned long)&imon_default_table}, /* device specifics unknown */ { USB_DEVICE(0x15c2, 0x0046), .driver_info = (unsigned long)&imon_default_table}, {} }; /* USB Device data */ static struct usb_driver imon_driver = { .name = MOD_NAME, .probe = imon_probe, .disconnect = imon_disconnect, .suspend = imon_suspend, .resume = imon_resume, .id_table = imon_usb_id_table, }; /* Module bookkeeping bits */ MODULE_AUTHOR(MOD_AUTHOR); MODULE_DESCRIPTION(MOD_DESC); MODULE_VERSION(MOD_VERSION); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(usb, imon_usb_id_table); static bool debug; module_param(debug, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug, "Debug messages: 0=no, 1=yes (default: no)"); /* lcd, vfd, vga or none? should be auto-detected, but can be overridden... */ static int display_type; module_param(display_type, int, S_IRUGO); MODULE_PARM_DESC(display_type, "Type of attached display. 0=autodetect, 1=vfd, 2=lcd, 3=vga, 4=none (default: autodetect)"); static int pad_stabilize = 1; module_param(pad_stabilize, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(pad_stabilize, "Apply stabilization algorithm to iMON PAD presses in arrow key mode. 0=disable, 1=enable (default)."); /* * In certain use cases, mouse mode isn't really helpful, and could actually * cause confusion, so allow disabling it when the IR device is open. */ static bool nomouse; module_param(nomouse, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nomouse, "Disable mouse input device mode when IR device is open. 0=don't disable, 1=disable. (default: don't disable)"); /* threshold at which a pad push registers as an arrow key in kbd mode */ static int pad_thresh; module_param(pad_thresh, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(pad_thresh, "Threshold at which a pad push registers as an arrow key in kbd mode (default: 28)"); static void free_imon_context(struct imon_context *ictx) { struct device *dev = ictx->dev; usb_free_urb(ictx->tx_urb); WARN_ON(ictx->dev_present_intf0); usb_free_urb(ictx->rx_urb_intf0); WARN_ON(ictx->dev_present_intf1); usb_free_urb(ictx->rx_urb_intf1); kfree_rcu(ictx, rcu); dev_dbg(dev, "%s: iMON context freed\n", __func__); } /* * Called when the Display device (e.g. /dev/lcd0) * is opened by the application. */ static int display_open(struct inode *inode, struct file *file) { struct usb_interface *interface; struct imon_context *ictx = NULL; int subminor; int retval = 0; subminor = iminor(inode); interface = usb_find_interface(&imon_driver, subminor); if (!interface) { pr_err("could not find interface for minor %d\n", subminor); retval = -ENODEV; goto exit; } rcu_read_lock(); ictx = usb_get_intfdata(interface); if (!ictx || ictx->disconnected || !refcount_inc_not_zero(&ictx->users)) { rcu_read_unlock(); pr_err("no context found for minor %d\n", subminor); retval = -ENODEV; goto exit; } rcu_read_unlock(); mutex_lock(&ictx->lock); if (!ictx->display_supported) { pr_err("display not supported by device\n"); retval = -ENODEV; } else if (ictx->display_isopen) { pr_err("display port is already open\n"); retval = -EBUSY; } else { ictx->display_isopen = true; file->private_data = ictx; dev_dbg(ictx->dev, "display port opened\n"); } mutex_unlock(&ictx->lock); if (retval && refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); exit: return retval; } /* * Called when the display device (e.g. /dev/lcd0) * is closed by the application. */ static int display_close(struct inode *inode, struct file *file) { struct imon_context *ictx = file->private_data; int retval = 0; mutex_lock(&ictx->lock); if (!ictx->display_supported) { pr_err("display not supported by device\n"); retval = -ENODEV; } else if (!ictx->display_isopen) { pr_err("display is not open\n"); retval = -EIO; } else { ictx->display_isopen = false; dev_dbg(ictx->dev, "display port closed\n"); } mutex_unlock(&ictx->lock); if (refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); return retval; } /* * Sends a packet to the device -- this function must be called with * ictx->lock held, or its unlock/lock sequence while waiting for tx * to complete can/will lead to a deadlock. */ static int send_packet(struct imon_context *ictx) { unsigned int pipe; unsigned long timeout; int interval = 0; int retval = 0; struct usb_ctrlrequest *control_req = NULL; /* Check if we need to use control or interrupt urb */ if (!ictx->tx_control) { pipe = usb_sndintpipe(ictx->usbdev_intf0, ictx->tx_endpoint->bEndpointAddress); interval = ictx->tx_endpoint->bInterval; usb_fill_int_urb(ictx->tx_urb, ictx->usbdev_intf0, pipe, ictx->usb_tx_buf, sizeof(ictx->usb_tx_buf), usb_tx_callback, ictx, interval); ictx->tx_urb->actual_length = 0; } else { /* fill request into kmalloc'ed space: */ control_req = kmalloc(sizeof(*control_req), GFP_KERNEL); if (control_req == NULL) return -ENOMEM; /* setup packet is '21 09 0200 0001 0008' */ control_req->bRequestType = 0x21; control_req->bRequest = 0x09; control_req->wValue = cpu_to_le16(0x0200); control_req->wIndex = cpu_to_le16(0x0001); control_req->wLength = cpu_to_le16(0x0008); /* control pipe is endpoint 0x00 */ pipe = usb_sndctrlpipe(ictx->usbdev_intf0, 0); /* build the control urb */ usb_fill_control_urb(ictx->tx_urb, ictx->usbdev_intf0, pipe, (unsigned char *)control_req, ictx->usb_tx_buf, sizeof(ictx->usb_tx_buf), usb_tx_callback, ictx); ictx->tx_urb->actual_length = 0; } reinit_completion(&ictx->tx.finished); ictx->tx.busy = true; smp_rmb(); /* ensure later readers know we're busy */ retval = usb_submit_urb(ictx->tx_urb, GFP_KERNEL); if (retval) { ictx->tx.busy = false; smp_rmb(); /* ensure later readers know we're not busy */ pr_err_ratelimited("error submitting urb(%d)\n", retval); } else { /* Wait for transmission to complete (or abort) */ retval = wait_for_completion_interruptible( &ictx->tx.finished); if (retval) { usb_kill_urb(ictx->tx_urb); pr_err_ratelimited("task interrupted\n"); } ictx->tx.busy = false; retval = ictx->tx.status; if (retval) pr_err_ratelimited("packet tx failed (%d)\n", retval); } kfree(control_req); /* * Induce a mandatory delay before returning, as otherwise, * send_packet can get called so rapidly as to overwhelm the device, * particularly on faster systems and/or those with quirky usb. */ timeout = msecs_to_jiffies(ictx->send_packet_delay); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(timeout); return retval; } /* * Sends an associate packet to the iMON 2.4G. * * This might not be such a good idea, since it has an id collision with * some versions of the "IR & VFD" combo. The only way to determine if it * is an RF version is to look at the product description string. (Which * we currently do not fetch). */ static int send_associate_24g(struct imon_context *ictx) { const unsigned char packet[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20 }; if (!ictx) { pr_err("no context for device\n"); return -ENODEV; } if (!ictx->dev_present_intf0) { pr_err("no iMON device present\n"); return -ENODEV; } memcpy(ictx->usb_tx_buf, packet, sizeof(packet)); return send_packet(ictx); } /* * Sends packets to setup and show clock on iMON display * * Arguments: year - last 2 digits of year, month - 1..12, * day - 1..31, dow - day of the week (0-Sun...6-Sat), * hour - 0..23, minute - 0..59, second - 0..59 */ static int send_set_imon_clock(struct imon_context *ictx, unsigned int year, unsigned int month, unsigned int day, unsigned int dow, unsigned int hour, unsigned int minute, unsigned int second) { unsigned char clock_enable_pkt[IMON_CLOCK_ENABLE_PACKETS][8]; int retval = 0; int i; if (!ictx) { pr_err("no context for device\n"); return -ENODEV; } switch (ictx->display_type) { case IMON_DISPLAY_TYPE_LCD: clock_enable_pkt[0][0] = 0x80; clock_enable_pkt[0][1] = year; clock_enable_pkt[0][2] = month-1; clock_enable_pkt[0][3] = day; clock_enable_pkt[0][4] = hour; clock_enable_pkt[0][5] = minute; clock_enable_pkt[0][6] = second; clock_enable_pkt[1][0] = 0x80; clock_enable_pkt[1][1] = 0; clock_enable_pkt[1][2] = 0; clock_enable_pkt[1][3] = 0; clock_enable_pkt[1][4] = 0; clock_enable_pkt[1][5] = 0; clock_enable_pkt[1][6] = 0; if (ictx->product == 0xffdc) { clock_enable_pkt[0][7] = 0x50; clock_enable_pkt[1][7] = 0x51; } else { clock_enable_pkt[0][7] = 0x88; clock_enable_pkt[1][7] = 0x8a; } break; case IMON_DISPLAY_TYPE_VFD: clock_enable_pkt[0][0] = year; clock_enable_pkt[0][1] = month-1; clock_enable_pkt[0][2] = day; clock_enable_pkt[0][3] = dow; clock_enable_pkt[0][4] = hour; clock_enable_pkt[0][5] = minute; clock_enable_pkt[0][6] = second; clock_enable_pkt[0][7] = 0x40; clock_enable_pkt[1][0] = 0; clock_enable_pkt[1][1] = 0; clock_enable_pkt[1][2] = 1; clock_enable_pkt[1][3] = 0; clock_enable_pkt[1][4] = 0; clock_enable_pkt[1][5] = 0; clock_enable_pkt[1][6] = 0; clock_enable_pkt[1][7] = 0x42; break; default: return -ENODEV; } for (i = 0; i < IMON_CLOCK_ENABLE_PACKETS; i++) { memcpy(ictx->usb_tx_buf, clock_enable_pkt[i], 8); retval = send_packet(ictx); if (retval) { pr_err("send_packet failed for packet %d\n", i); break; } } return retval; } /* * These are the sysfs functions to handle the association on the iMON 2.4G LT. */ static ssize_t associate_remote_show(struct device *d, struct device_attribute *attr, char *buf) { struct imon_context *ictx = dev_get_drvdata(d); if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); if (ictx->rf_isassociating) strscpy(buf, "associating\n", PAGE_SIZE); else strscpy(buf, "closed\n", PAGE_SIZE); dev_info(d, "Visit https://www.lirc.org/html/imon-24g.html for instructions on how to associate your iMON 2.4G DT/LT remote\n"); mutex_unlock(&ictx->lock); return strlen(buf); } static ssize_t associate_remote_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct imon_context *ictx; ictx = dev_get_drvdata(d); if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); ictx->rf_isassociating = true; send_associate_24g(ictx); mutex_unlock(&ictx->lock); return count; } /* * sysfs functions to control internal imon clock */ static ssize_t imon_clock_show(struct device *d, struct device_attribute *attr, char *buf) { struct imon_context *ictx = dev_get_drvdata(d); size_t len; if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); if (!ictx->display_supported) { len = sysfs_emit(buf, "Not supported."); } else { len = sysfs_emit(buf, "To set the clock on your iMON display:\n" "# date \"+%%y %%m %%d %%w %%H %%M %%S\" > imon_clock\n" "%s", ictx->display_isopen ? "\nNOTE: imon device must be closed\n" : ""); } mutex_unlock(&ictx->lock); return len; } static ssize_t imon_clock_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct imon_context *ictx = dev_get_drvdata(d); ssize_t retval; unsigned int year, month, day, dow, hour, minute, second; if (!ictx) return -ENODEV; mutex_lock(&ictx->lock); if (!ictx->display_supported) { retval = -ENODEV; goto exit; } else if (ictx->display_isopen) { retval = -EBUSY; goto exit; } if (sscanf(buf, "%u %u %u %u %u %u %u", &year, &month, &day, &dow, &hour, &minute, &second) != 7) { retval = -EINVAL; goto exit; } if ((month < 1 || month > 12) || (day < 1 || day > 31) || (dow > 6) || (hour > 23) || (minute > 59) || (second > 59)) { retval = -EINVAL; goto exit; } retval = send_set_imon_clock(ictx, year, month, day, dow, hour, minute, second); if (retval) goto exit; retval = count; exit: mutex_unlock(&ictx->lock); return retval; } static DEVICE_ATTR_RW(imon_clock); static DEVICE_ATTR_RW(associate_remote); static struct attribute *imon_display_sysfs_entries[] = { &dev_attr_imon_clock.attr, NULL }; static const struct attribute_group imon_display_attr_group = { .attrs = imon_display_sysfs_entries }; static struct attribute *imon_rf_sysfs_entries[] = { &dev_attr_associate_remote.attr, NULL }; static const struct attribute_group imon_rf_attr_group = { .attrs = imon_rf_sysfs_entries }; /* * Writes data to the VFD. The iMON VFD is 2x16 characters * and requires data in 5 consecutive USB interrupt packets, * each packet but the last carrying 7 bytes. * * I don't know if the VFD board supports features such as * scrolling, clearing rows, blanking, etc. so at * the caller must provide a full screen of data. If fewer * than 32 bytes are provided spaces will be appended to * generate a full screen. */ static ssize_t vfd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { int i; int offset; int seq; int retval = 0; struct imon_context *ictx = file->private_data; static const unsigned char vfd_packet6[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF }; if (ictx->disconnected) return -ENODEV; if (mutex_lock_interruptible(&ictx->lock)) return -ERESTARTSYS; if (!ictx->dev_present_intf0) { pr_err_ratelimited("no iMON device present\n"); retval = -ENODEV; goto exit; } if (n_bytes <= 0 || n_bytes > 32) { pr_err_ratelimited("invalid payload size\n"); retval = -EINVAL; goto exit; } if (copy_from_user(ictx->tx.data_buf, buf, n_bytes)) { retval = -EFAULT; goto exit; } /* Pad with spaces */ for (i = n_bytes; i < 32; ++i) ictx->tx.data_buf[i] = ' '; for (i = 32; i < 35; ++i) ictx->tx.data_buf[i] = 0xFF; offset = 0; seq = 0; do { memcpy(ictx->usb_tx_buf, ictx->tx.data_buf + offset, 7); ictx->usb_tx_buf[7] = (unsigned char) seq; retval = send_packet(ictx); if (retval) { pr_err_ratelimited("send packet #%d failed\n", seq / 2); goto exit; } else { seq += 2; offset += 7; } } while (offset < 35); /* Send packet #6 */ memcpy(ictx->usb_tx_buf, &vfd_packet6, sizeof(vfd_packet6)); ictx->usb_tx_buf[7] = (unsigned char) seq; retval = send_packet(ictx); if (retval) pr_err_ratelimited("send packet #%d failed\n", seq / 2); exit: mutex_unlock(&ictx->lock); return (!retval) ? n_bytes : retval; } /* * Writes data to the LCD. The iMON OEM LCD screen expects 8-byte * packets. We accept data as 16 hexadecimal digits, followed by a * newline (to make it easy to drive the device from a command-line * -- even though the actual binary data is a bit complicated). * * The device itself is not a "traditional" text-mode display. It's * actually a 16x96 pixel bitmap display. That means if you want to * display text, you've got to have your own "font" and translate the * text into bitmaps for display. This is really flexible (you can * display whatever diacritics you need, and so on), but it's also * a lot more complicated than most LCDs... */ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { int retval = 0; struct imon_context *ictx = file->private_data; if (ictx->disconnected) return -ENODEV; mutex_lock(&ictx->lock); if (!ictx->display_supported) { pr_err_ratelimited("no iMON display present\n"); retval = -ENODEV; goto exit; } if (n_bytes != 8) { pr_err_ratelimited("invalid payload size: %d (expected 8)\n", (int)n_bytes); retval = -EINVAL; goto exit; } if (copy_from_user(ictx->usb_tx_buf, buf, 8)) { retval = -EFAULT; goto exit; } retval = send_packet(ictx); if (retval) { pr_err_ratelimited("send packet failed!\n"); goto exit; } else { dev_dbg(ictx->dev, "%s: write %d bytes to LCD\n", __func__, (int) n_bytes); } exit: mutex_unlock(&ictx->lock); return (!retval) ? n_bytes : retval; } /* * Callback function for USB core API: transmit data */ static void usb_tx_callback(struct urb *urb) { struct imon_context *ictx; if (!urb) return; ictx = (struct imon_context *)urb->context; if (!ictx) return; ictx->tx.status = urb->status; /* notify waiters that write has finished */ ictx->tx.busy = false; smp_rmb(); /* ensure later readers know we're not busy */ complete(&ictx->tx.finished); } /* * report touchscreen input */ static void imon_touch_display_timeout(struct timer_list *t) { struct imon_context *ictx = from_timer(ictx, t, ttimer); if (ictx->display_type != IMON_DISPLAY_TYPE_VGA) return; input_report_abs(ictx->touch, ABS_X, ictx->touch_x); input_report_abs(ictx->touch, ABS_Y, ictx->touch_y); input_report_key(ictx->touch, BTN_TOUCH, 0x00); input_sync(ictx->touch); } /* * iMON IR receivers support two different signal sets -- those used by * the iMON remotes, and those used by the Windows MCE remotes (which is * really just RC-6), but only one or the other at a time, as the signals * are decoded onboard the receiver. * * This function gets called two different ways, one way is from * rc_register_device, for initial protocol selection/setup, and the other is * via a userspace-initiated protocol change request, either by direct sysfs * prodding or by something like ir-keytable. In the rc_register_device case, * the imon context lock is already held, but when initiated from userspace, * it is not, so we must acquire it prior to calling send_packet, which * requires that the lock is held. */ static int imon_ir_change_protocol(struct rc_dev *rc, u64 *rc_proto) { int retval; struct imon_context *ictx = rc->priv; struct device *dev = ictx->dev; bool unlock = false; unsigned char ir_proto_packet[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86 }; if (*rc_proto && !(*rc_proto & rc->allowed_protocols)) dev_warn(dev, "Looks like you're trying to use an IR protocol this device does not support\n"); if (*rc_proto & RC_PROTO_BIT_RC6_MCE) { dev_dbg(dev, "Configuring IR receiver for MCE protocol\n"); ir_proto_packet[0] = 0x01; *rc_proto = RC_PROTO_BIT_RC6_MCE; } else if (*rc_proto & RC_PROTO_BIT_IMON) { dev_dbg(dev, "Configuring IR receiver for iMON protocol\n"); if (!pad_stabilize) dev_dbg(dev, "PAD stabilize functionality disabled\n"); /* ir_proto_packet[0] = 0x00; // already the default */ *rc_proto = RC_PROTO_BIT_IMON; } else { dev_warn(dev, "Unsupported IR protocol specified, overriding to iMON IR protocol\n"); if (!pad_stabilize) dev_dbg(dev, "PAD stabilize functionality disabled\n"); /* ir_proto_packet[0] = 0x00; // already the default */ *rc_proto = RC_PROTO_BIT_IMON; } memcpy(ictx->usb_tx_buf, &ir_proto_packet, sizeof(ir_proto_packet)); if (!mutex_is_locked(&ictx->lock)) { unlock = true; mutex_lock(&ictx->lock); } retval = send_packet(ictx); if (retval) goto out; ictx->rc_proto = *rc_proto; ictx->pad_mouse = false; out: if (unlock) mutex_unlock(&ictx->lock); return retval; } /* * The directional pad behaves a bit differently, depending on whether this is * one of the older ffdc devices or a newer device. Newer devices appear to * have a higher resolution matrix for more precise mouse movement, but it * makes things overly sensitive in keyboard mode, so we do some interesting * contortions to make it less touchy. Older devices run through the same * routine with shorter timeout and a smaller threshold. */ static int stabilize(int a, int b, u16 timeout, u16 threshold) { ktime_t ct; static ktime_t prev_time; static ktime_t hit_time; static int x, y, prev_result, hits; int result = 0; long msec, msec_hit; ct = ktime_get(); msec = ktime_ms_delta(ct, prev_time); msec_hit = ktime_ms_delta(ct, hit_time); if (msec > 100) { x = 0; y = 0; hits = 0; } x += a; y += b; prev_time = ct; if (abs(x) > threshold || abs(y) > threshold) { if (abs(y) > abs(x)) result = (y > 0) ? 0x7F : 0x80; else result = (x > 0) ? 0x7F00 : 0x8000; x = 0; y = 0; if (result == prev_result) { hits++; if (hits > 3) { switch (result) { case 0x7F: y = 17 * threshold / 30; break; case 0x80: y -= 17 * threshold / 30; break; case 0x7F00: x = 17 * threshold / 30; break; case 0x8000: x -= 17 * threshold / 30; break; } } if (hits == 2 && msec_hit < timeout) { result = 0; hits = 1; } } else { prev_result = result; hits = 1; hit_time = ct; } } return result; } static u32 imon_remote_key_lookup(struct imon_context *ictx, u32 scancode) { u32 keycode; u32 release; bool is_release_code = false; /* Look for the initial press of a button */ keycode = rc_g_keycode_from_table(ictx->rdev, scancode); ictx->rc_toggle = 0x0; ictx->rc_scancode = scancode; /* Look for the release of a button */ if (keycode == KEY_RESERVED) { release = scancode & ~0x4000; keycode = rc_g_keycode_from_table(ictx->rdev, release); if (keycode != KEY_RESERVED) is_release_code = true; } ictx->release_code = is_release_code; return keycode; } static u32 imon_mce_key_lookup(struct imon_context *ictx, u32 scancode) { u32 keycode; #define MCE_KEY_MASK 0x7000 #define MCE_TOGGLE_BIT 0x8000 /* * On some receivers, mce keys decode to 0x8000f04xx and 0x8000f84xx * (the toggle bit flipping between alternating key presses), while * on other receivers, we see 0x8000f74xx and 0x8000ff4xx. To keep * the table trim, we always or in the bits to look up 0x8000ff4xx, * but we can't or them into all codes, as some keys are decoded in * a different way w/o the same use of the toggle bit... */ if (scancode & 0x80000000) scancode = scancode | MCE_KEY_MASK | MCE_TOGGLE_BIT; ictx->rc_scancode = scancode; keycode = rc_g_keycode_from_table(ictx->rdev, scancode); /* not used in mce mode, but make sure we know its false */ ictx->release_code = false; return keycode; } static u32 imon_panel_key_lookup(struct imon_context *ictx, u64 code) { const struct imon_panel_key_table *key_table; u32 keycode = KEY_RESERVED; int i; key_table = ictx->dev_descr->key_table; for (i = 0; key_table[i].hw_code != 0; i++) { if (key_table[i].hw_code == (code | 0xffee)) { keycode = key_table[i].keycode; break; } } ictx->release_code = false; return keycode; } static bool imon_mouse_event(struct imon_context *ictx, unsigned char *buf, int len) { signed char rel_x = 0x00, rel_y = 0x00; u8 right_shift = 1; bool mouse_input = true; int dir = 0; unsigned long flags; spin_lock_irqsave(&ictx->kc_lock, flags); /* newer iMON device PAD or mouse button */ if (ictx->product != 0xffdc && (buf[0] & 0x01) && len == 5) { rel_x = buf[2]; rel_y = buf[3]; right_shift = 1; /* 0xffdc iMON PAD or mouse button input */ } else if (ictx->product == 0xffdc && (buf[0] & 0x40) && !((buf[1] & 0x01) || ((buf[1] >> 2) & 0x01))) { rel_x = (buf[1] & 0x08) | (buf[1] & 0x10) >> 2 | (buf[1] & 0x20) >> 4 | (buf[1] & 0x40) >> 6; if (buf[0] & 0x02) rel_x |= ~0x0f; rel_x = rel_x + rel_x / 2; rel_y = (buf[2] & 0x08) | (buf[2] & 0x10) >> 2 | (buf[2] & 0x20) >> 4 | (buf[2] & 0x40) >> 6; if (buf[0] & 0x01) rel_y |= ~0x0f; rel_y = rel_y + rel_y / 2; right_shift = 2; /* some ffdc devices decode mouse buttons differently... */ } else if (ictx->product == 0xffdc && (buf[0] == 0x68)) { right_shift = 2; /* ch+/- buttons, which we use for an emulated scroll wheel */ } else if (ictx->kc == KEY_CHANNELUP && (buf[2] & 0x40) != 0x40) { dir = 1; } else if (ictx->kc == KEY_CHANNELDOWN && (buf[2] & 0x40) != 0x40) { dir = -1; } else mouse_input = false; spin_unlock_irqrestore(&ictx->kc_lock, flags); if (mouse_input) { dev_dbg(ictx->dev, "sending mouse data via input subsystem\n"); if (dir) { input_report_rel(ictx->idev, REL_WHEEL, dir); } else if (rel_x || rel_y) { input_report_rel(ictx->idev, REL_X, rel_x); input_report_rel(ictx->idev, REL_Y, rel_y); } else { input_report_key(ictx->idev, BTN_LEFT, buf[1] & 0x1); input_report_key(ictx->idev, BTN_RIGHT, buf[1] >> right_shift & 0x1); } input_sync(ictx->idev); spin_lock_irqsave(&ictx->kc_lock, flags); ictx->last_keycode = ictx->kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); } return mouse_input; } static void imon_touch_event(struct imon_context *ictx, unsigned char *buf) { mod_timer(&ictx->ttimer, jiffies + TOUCH_TIMEOUT); ictx->touch_x = (buf[0] << 4) | (buf[1] >> 4); ictx->touch_y = 0xfff - ((buf[2] << 4) | (buf[1] & 0xf)); input_report_abs(ictx->touch, ABS_X, ictx->touch_x); input_report_abs(ictx->touch, ABS_Y, ictx->touch_y); input_report_key(ictx->touch, BTN_TOUCH, 0x01); input_sync(ictx->touch); } static void imon_pad_to_keys(struct imon_context *ictx, unsigned char *buf) { int dir = 0; signed char rel_x = 0x00, rel_y = 0x00; u16 timeout, threshold; u32 scancode = KEY_RESERVED; unsigned long flags; /* * The imon directional pad functions more like a touchpad. Bytes 3 & 4 * contain a position coordinate (x,y), with each component ranging * from -14 to 14. We want to down-sample this to only 4 discrete values * for up/down/left/right arrow keys. Also, when you get too close to * diagonals, it has a tendency to jump back and forth, so lets try to * ignore when they get too close. */ if (ictx->product != 0xffdc) { /* first, pad to 8 bytes so it conforms with everything else */ buf[5] = buf[6] = buf[7] = 0; timeout = 500; /* in msecs */ /* (2*threshold) x (2*threshold) square */ threshold = pad_thresh ? pad_thresh : 28; rel_x = buf[2]; rel_y = buf[3]; if (ictx->rc_proto == RC_PROTO_BIT_IMON && pad_stabilize) { if ((buf[1] == 0) && ((rel_x != 0) || (rel_y != 0))) { dir = stabilize((int)rel_x, (int)rel_y, timeout, threshold); if (!dir) { spin_lock_irqsave(&ictx->kc_lock, flags); ictx->kc = KEY_UNKNOWN; spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } buf[2] = dir & 0xFF; buf[3] = (dir >> 8) & 0xFF; scancode = be32_to_cpu(*((__be32 *)buf)); } } else { /* * Hack alert: instead of using keycodes, we have * to use hard-coded scancodes here... */ if (abs(rel_y) > abs(rel_x)) { buf[2] = (rel_y > 0) ? 0x7F : 0x80; buf[3] = 0; if (rel_y > 0) scancode = 0x01007f00; /* KEY_DOWN */ else scancode = 0x01008000; /* KEY_UP */ } else { buf[2] = 0; buf[3] = (rel_x > 0) ? 0x7F : 0x80; if (rel_x > 0) scancode = 0x0100007f; /* KEY_RIGHT */ else scancode = 0x01000080; /* KEY_LEFT */ } } /* * Handle on-board decoded pad events for e.g. older VFD/iMON-Pad * device (15c2:ffdc). The remote generates various codes from * 0x68nnnnB7 to 0x6AnnnnB7, the left mouse button generates * 0x688301b7 and the right one 0x688481b7. All other keys generate * 0x2nnnnnnn. Position coordinate is encoded in buf[1] and buf[2] with * reversed endianness. Extract direction from buffer, rotate endianness, * adjust sign and feed the values into stabilize(). The resulting codes * will be 0x01008000, 0x01007F00, which match the newer devices. */ } else { timeout = 10; /* in msecs */ /* (2*threshold) x (2*threshold) square */ threshold = pad_thresh ? pad_thresh : 15; /* buf[1] is x */ rel_x = (buf[1] & 0x08) | (buf[1] & 0x10) >> 2 | (buf[1] & 0x20) >> 4 | (buf[1] & 0x40) >> 6; if (buf[0] & 0x02) rel_x |= ~0x10+1; /* buf[2] is y */ rel_y = (buf[2] & 0x08) | (buf[2] & 0x10) >> 2 | (buf[2] & 0x20) >> 4 | (buf[2] & 0x40) >> 6; if (buf[0] & 0x01) rel_y |= ~0x10+1; buf[0] = 0x01; buf[1] = buf[4] = buf[5] = buf[6] = buf[7] = 0; if (ictx->rc_proto == RC_PROTO_BIT_IMON && pad_stabilize) { dir = stabilize((int)rel_x, (int)rel_y, timeout, threshold); if (!dir) { spin_lock_irqsave(&ictx->kc_lock, flags); ictx->kc = KEY_UNKNOWN; spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } buf[2] = dir & 0xFF; buf[3] = (dir >> 8) & 0xFF; scancode = be32_to_cpu(*((__be32 *)buf)); } else { /* * Hack alert: instead of using keycodes, we have * to use hard-coded scancodes here... */ if (abs(rel_y) > abs(rel_x)) { buf[2] = (rel_y > 0) ? 0x7F : 0x80; buf[3] = 0; if (rel_y > 0) scancode = 0x01007f00; /* KEY_DOWN */ else scancode = 0x01008000; /* KEY_UP */ } else { buf[2] = 0; buf[3] = (rel_x > 0) ? 0x7F : 0x80; if (rel_x > 0) scancode = 0x0100007f; /* KEY_RIGHT */ else scancode = 0x01000080; /* KEY_LEFT */ } } } if (scancode) { spin_lock_irqsave(&ictx->kc_lock, flags); ictx->kc = imon_remote_key_lookup(ictx, scancode); spin_unlock_irqrestore(&ictx->kc_lock, flags); } } /* * figure out if these is a press or a release. We don't actually * care about repeats, as those will be auto-generated within the IR * subsystem for repeating scancodes. */ static int imon_parse_press_type(struct imon_context *ictx, unsigned char *buf, u8 ktype) { int press_type = 0; unsigned long flags; spin_lock_irqsave(&ictx->kc_lock, flags); /* key release of 0x02XXXXXX key */ if (ictx->kc == KEY_RESERVED && buf[0] == 0x02 && buf[3] == 0x00) ictx->kc = ictx->last_keycode; /* mouse button release on (some) 0xffdc devices */ else if (ictx->kc == KEY_RESERVED && buf[0] == 0x68 && buf[1] == 0x82 && buf[2] == 0x81 && buf[3] == 0xb7) ictx->kc = ictx->last_keycode; /* mouse button release on (some other) 0xffdc devices */ else if (ictx->kc == KEY_RESERVED && buf[0] == 0x01 && buf[1] == 0x00 && buf[2] == 0x81 && buf[3] == 0xb7) ictx->kc = ictx->last_keycode; /* mce-specific button handling, no keyup events */ else if (ktype == IMON_KEY_MCE) { ictx->rc_toggle = buf[2]; press_type = 1; /* incoherent or irrelevant data */ } else if (ictx->kc == KEY_RESERVED) press_type = -EINVAL; /* key release of 0xXXXXXXb7 key */ else if (ictx->release_code) press_type = 0; /* this is a button press */ else press_type = 1; spin_unlock_irqrestore(&ictx->kc_lock, flags); return press_type; } /* * Process the incoming packet */ static void imon_incoming_packet(struct imon_context *ictx, struct urb *urb, int intf) { int len = urb->actual_length; unsigned char *buf = urb->transfer_buffer; struct device *dev = ictx->dev; unsigned long flags; u32 kc; u64 scancode; int press_type = 0; ktime_t t; static ktime_t prev_time; u8 ktype; /* filter out junk data on the older 0xffdc imon devices */ if ((buf[0] == 0xff) && (buf[1] == 0xff) && (buf[2] == 0xff)) return; /* Figure out what key was pressed */ if (len == 8 && buf[7] == 0xee) { scancode = be64_to_cpu(*((__be64 *)buf)); ktype = IMON_KEY_PANEL; kc = imon_panel_key_lookup(ictx, scancode); ictx->release_code = false; } else { scancode = be32_to_cpu(*((__be32 *)buf)); if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE) { ktype = IMON_KEY_IMON; if (buf[0] == 0x80) ktype = IMON_KEY_MCE; kc = imon_mce_key_lookup(ictx, scancode); } else { ktype = IMON_KEY_IMON; kc = imon_remote_key_lookup(ictx, scancode); } } spin_lock_irqsave(&ictx->kc_lock, flags); /* keyboard/mouse mode toggle button */ if (kc == KEY_KEYBOARD && !ictx->release_code) { ictx->last_keycode = kc; if (!nomouse) { ictx->pad_mouse = !ictx->pad_mouse; dev_dbg(dev, "toggling to %s mode\n", ictx->pad_mouse ? "mouse" : "keyboard"); spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } else { ictx->pad_mouse = false; dev_dbg(dev, "mouse mode disabled, passing key value\n"); } } ictx->kc = kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); /* send touchscreen events through input subsystem if touchpad data */ if (ictx->touch && len == 8 && buf[7] == 0x86) { imon_touch_event(ictx, buf); return; /* look for mouse events with pad in mouse mode */ } else if (ictx->pad_mouse) { if (imon_mouse_event(ictx, buf, len)) return; } /* Now for some special handling to convert pad input to arrow keys */ if (((len == 5) && (buf[0] == 0x01) && (buf[4] == 0x00)) || ((len == 8) && (buf[0] & 0x40) && !(buf[1] & 0x1 || buf[1] >> 2 & 0x1))) { len = 8; imon_pad_to_keys(ictx, buf); } if (debug) { printk(KERN_INFO "intf%d decoded packet: %*ph\n", intf, len, buf); } press_type = imon_parse_press_type(ictx, buf, ktype); if (press_type < 0) goto not_input_data; if (ktype != IMON_KEY_PANEL) { if (press_type == 0) rc_keyup(ictx->rdev); else { enum rc_proto proto; if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE) proto = RC_PROTO_RC6_MCE; else if (ictx->rc_proto == RC_PROTO_BIT_IMON) proto = RC_PROTO_IMON; else return; rc_keydown(ictx->rdev, proto, ictx->rc_scancode, ictx->rc_toggle); spin_lock_irqsave(&ictx->kc_lock, flags); ictx->last_keycode = ictx->kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); } return; } /* Only panel type events left to process now */ spin_lock_irqsave(&ictx->kc_lock, flags); t = ktime_get(); /* KEY repeats from knob and panel that need to be suppressed */ if (ictx->kc == KEY_MUTE || ictx->dev_descr->flags & IMON_SUPPRESS_REPEATED_KEYS) { if (ictx->kc == ictx->last_keycode && ktime_ms_delta(t, prev_time) < ictx->idev->rep[REP_DELAY]) { spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } } prev_time = t; kc = ictx->kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); input_report_key(ictx->idev, kc, press_type); input_sync(ictx->idev); /* panel keys don't generate a release */ input_report_key(ictx->idev, kc, 0); input_sync(ictx->idev); spin_lock_irqsave(&ictx->kc_lock, flags); ictx->last_keycode = kc; spin_unlock_irqrestore(&ictx->kc_lock, flags); return; not_input_data: if (len != 8) { dev_warn(dev, "imon %s: invalid incoming packet size (len = %d, intf%d)\n", __func__, len, intf); return; } /* iMON 2.4G associate frame */ if (buf[0] == 0x00 && buf[2] == 0xFF && /* REFID */ buf[3] == 0xFF && buf[4] == 0xFF && buf[5] == 0xFF && /* iMON 2.4G */ ((buf[6] == 0x4E && buf[7] == 0xDF) || /* LT */ (buf[6] == 0x5E && buf[7] == 0xDF))) { /* DT */ dev_warn(dev, "%s: remote associated refid=%02X\n", __func__, buf[1]); ictx->rf_isassociating = false; } } /* * Callback function for USB core API: receive data */ static void usb_rx_callback_intf0(struct urb *urb) { struct imon_context *ictx; int intfnum = 0; if (!urb) return; ictx = (struct imon_context *)urb->context; if (!ictx) return; /* * if we get a callback before we're done configuring the hardware, we * can't yet process the data, as there's nowhere to send it, but we * still need to submit a new rx URB to avoid wedging the hardware */ if (!ictx->dev_present_intf0) goto out; switch (urb->status) { case -ENOENT: /* usbcore unlink successful! */ return; case -ESHUTDOWN: /* transport endpoint was shut down */ break; case 0: imon_incoming_packet(ictx, urb, intfnum); break; default: dev_warn(ictx->dev, "imon %s: status(%d): ignored\n", __func__, urb->status); break; } out: usb_submit_urb(ictx->rx_urb_intf0, GFP_ATOMIC); } static void usb_rx_callback_intf1(struct urb *urb) { struct imon_context *ictx; int intfnum = 1; if (!urb) return; ictx = (struct imon_context *)urb->context; if (!ictx) return; /* * if we get a callback before we're done configuring the hardware, we * can't yet process the data, as there's nowhere to send it, but we * still need to submit a new rx URB to avoid wedging the hardware */ if (!ictx->dev_present_intf1) goto out; switch (urb->status) { case -ENOENT: /* usbcore unlink successful! */ return; case -ESHUTDOWN: /* transport endpoint was shut down */ break; case 0: imon_incoming_packet(ictx, urb, intfnum); break; default: dev_warn(ictx->dev, "imon %s: status(%d): ignored\n", __func__, urb->status); break; } out: usb_submit_urb(ictx->rx_urb_intf1, GFP_ATOMIC); } /* * The 0x15c2:0xffdc device ID was used for umpteen different imon * devices, and all of them constantly spew interrupts, even when there * is no actual data to report. However, byte 6 of this buffer looks like * its unique across device variants, so we're trying to key off that to * figure out which display type (if any) and what IR protocol the device * actually supports. These devices have their IR protocol hard-coded into * their firmware, they can't be changed on the fly like the newer hardware. */ static void imon_get_ffdc_type(struct imon_context *ictx) { u8 ffdc_cfg_byte = ictx->usb_rx_buf[6]; u8 detected_display_type = IMON_DISPLAY_TYPE_NONE; u64 allowed_protos = RC_PROTO_BIT_IMON; switch (ffdc_cfg_byte) { /* iMON Knob, no display, iMON IR + vol knob */ case 0x21: dev_info(ictx->dev, "0xffdc iMON Knob, iMON IR"); ictx->display_supported = false; break; /* iMON 2.4G LT (usb stick), no display, iMON RF */ case 0x4e: dev_info(ictx->dev, "0xffdc iMON 2.4G LT, iMON RF"); ictx->display_supported = false; ictx->rf_device = true; break; /* iMON VFD, no IR (does have vol knob tho) */ case 0x35: dev_info(ictx->dev, "0xffdc iMON VFD + knob, no IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; break; /* iMON VFD, iMON IR */ case 0x24: case 0x30: case 0x85: dev_info(ictx->dev, "0xffdc iMON VFD, iMON IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; break; /* iMON VFD, MCE IR */ case 0x46: case 0x9e: dev_info(ictx->dev, "0xffdc iMON VFD, MCE IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; allowed_protos = RC_PROTO_BIT_RC6_MCE; break; /* iMON VFD, iMON or MCE IR */ case 0x7e: dev_info(ictx->dev, "0xffdc iMON VFD, iMON or MCE IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; allowed_protos |= RC_PROTO_BIT_RC6_MCE; break; /* iMON LCD, MCE IR */ case 0x9f: dev_info(ictx->dev, "0xffdc iMON LCD, MCE IR"); detected_display_type = IMON_DISPLAY_TYPE_LCD; allowed_protos = RC_PROTO_BIT_RC6_MCE; break; /* no display, iMON IR */ case 0x26: dev_info(ictx->dev, "0xffdc iMON Inside, iMON IR"); ictx->display_supported = false; break; /* Soundgraph iMON UltraBay */ case 0x98: dev_info(ictx->dev, "0xffdc iMON UltraBay, LCD + IR"); detected_display_type = IMON_DISPLAY_TYPE_LCD; allowed_protos = RC_PROTO_BIT_IMON | RC_PROTO_BIT_RC6_MCE; ictx->dev_descr = &ultrabay_table; break; default: dev_info(ictx->dev, "Unknown 0xffdc device, defaulting to VFD and iMON IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; /* * We don't know which one it is, allow user to set the * RC6 one from userspace if IMON wasn't correct. */ allowed_protos |= RC_PROTO_BIT_RC6_MCE; break; } printk(KERN_CONT " (id 0x%02x)\n", ffdc_cfg_byte); ictx->display_type = detected_display_type; ictx->rc_proto = allowed_protos; } static void imon_set_display_type(struct imon_context *ictx) { u8 configured_display_type = IMON_DISPLAY_TYPE_VFD; /* * Try to auto-detect the type of display if the user hasn't set * it by hand via the display_type modparam. Default is VFD. */ if (display_type == IMON_DISPLAY_TYPE_AUTO) { switch (ictx->product) { case 0xffdc: /* set in imon_get_ffdc_type() */ configured_display_type = ictx->display_type; break; case 0x0034: case 0x0035: configured_display_type = IMON_DISPLAY_TYPE_VGA; break; case 0x0038: case 0x0039: case 0x0045: configured_display_type = IMON_DISPLAY_TYPE_LCD; break; case 0x003c: case 0x0041: case 0x0042: case 0x0043: configured_display_type = IMON_DISPLAY_TYPE_NONE; ictx->display_supported = false; break; case 0x0036: case 0x0044: default: configured_display_type = IMON_DISPLAY_TYPE_VFD; break; } } else { configured_display_type = display_type; if (display_type == IMON_DISPLAY_TYPE_NONE) ictx->display_supported = false; else ictx->display_supported = true; dev_info(ictx->dev, "%s: overriding display type to %d via modparam\n", __func__, display_type); } ictx->display_type = configured_display_type; } static struct rc_dev *imon_init_rdev(struct imon_context *ictx) { struct rc_dev *rdev; int ret; static const unsigned char fp_packet[] = { 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88 }; rdev = rc_allocate_device(RC_DRIVER_SCANCODE); if (!rdev) { dev_err(ictx->dev, "remote control dev allocation failed\n"); goto out; } snprintf(ictx->name_rdev, sizeof(ictx->name_rdev), "iMON Remote (%04x:%04x)", ictx->vendor, ictx->product); usb_make_path(ictx->usbdev_intf0, ictx->phys_rdev, sizeof(ictx->phys_rdev)); strlcat(ictx->phys_rdev, "/input0", sizeof(ictx->phys_rdev)); rdev->device_name = ictx->name_rdev; rdev->input_phys = ictx->phys_rdev; usb_to_input_id(ictx->usbdev_intf0, &rdev->input_id); rdev->dev.parent = ictx->dev; rdev->priv = ictx; /* iMON PAD or MCE */ rdev->allowed_protocols = RC_PROTO_BIT_IMON | RC_PROTO_BIT_RC6_MCE; rdev->change_protocol = imon_ir_change_protocol; rdev->driver_name = MOD_NAME; /* Enable front-panel buttons and/or knobs */ memcpy(ictx->usb_tx_buf, &fp_packet, sizeof(fp_packet)); ret = send_packet(ictx); /* Not fatal, but warn about it */ if (ret) dev_info(ictx->dev, "panel buttons/knobs setup failed\n"); if (ictx->product == 0xffdc) { imon_get_ffdc_type(ictx); rdev->allowed_protocols = ictx->rc_proto; } imon_set_display_type(ictx); if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE) rdev->map_name = RC_MAP_IMON_MCE; else rdev->map_name = RC_MAP_IMON_PAD; ret = rc_register_device(rdev); if (ret < 0) { dev_err(ictx->dev, "remote input dev register failed\n"); goto out; } return rdev; out: rc_free_device(rdev); return NULL; } static struct input_dev *imon_init_idev(struct imon_context *ictx) { const struct imon_panel_key_table *key_table; struct input_dev *idev; int ret, i; key_table = ictx->dev_descr->key_table; idev = input_allocate_device(); if (!idev) goto out; snprintf(ictx->name_idev, sizeof(ictx->name_idev), "iMON Panel, Knob and Mouse(%04x:%04x)", ictx->vendor, ictx->product); idev->name = ictx->name_idev; usb_make_path(ictx->usbdev_intf0, ictx->phys_idev, sizeof(ictx->phys_idev)); strlcat(ictx->phys_idev, "/input1", sizeof(ictx->phys_idev)); idev->phys = ictx->phys_idev; idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_REL); idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_RIGHT); idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y) | BIT_MASK(REL_WHEEL); /* panel and/or knob code support */ for (i = 0; key_table[i].hw_code != 0; i++) { u32 kc = key_table[i].keycode; __set_bit(kc, idev->keybit); } usb_to_input_id(ictx->usbdev_intf0, &idev->id); idev->dev.parent = ictx->dev; input_set_drvdata(idev, ictx); ret = input_register_device(idev); if (ret < 0) { dev_err(ictx->dev, "input dev register failed\n"); goto out; } return idev; out: input_free_device(idev); return NULL; } static struct input_dev *imon_init_touch(struct imon_context *ictx) { struct input_dev *touch; int ret; touch = input_allocate_device(); if (!touch) goto touch_alloc_failed; snprintf(ictx->name_touch, sizeof(ictx->name_touch), "iMON USB Touchscreen (%04x:%04x)", ictx->vendor, ictx->product); touch->name = ictx->name_touch; usb_make_path(ictx->usbdev_intf1, ictx->phys_touch, sizeof(ictx->phys_touch)); strlcat(ictx->phys_touch, "/input2", sizeof(ictx->phys_touch)); touch->phys = ictx->phys_touch; touch->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); touch->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); input_set_abs_params(touch, ABS_X, 0x00, 0xfff, 0, 0); input_set_abs_params(touch, ABS_Y, 0x00, 0xfff, 0, 0); input_set_drvdata(touch, ictx); usb_to_input_id(ictx->usbdev_intf1, &touch->id); touch->dev.parent = ictx->dev; ret = input_register_device(touch); if (ret < 0) { dev_info(ictx->dev, "touchscreen input dev register failed\n"); goto touch_register_failed; } return touch; touch_register_failed: input_free_device(touch); touch_alloc_failed: return NULL; } static bool imon_find_endpoints(struct imon_context *ictx, struct usb_host_interface *iface_desc) { struct usb_endpoint_descriptor *ep; struct usb_endpoint_descriptor *rx_endpoint = NULL; struct usb_endpoint_descriptor *tx_endpoint = NULL; int ifnum = iface_desc->desc.bInterfaceNumber; int num_endpts = iface_desc->desc.bNumEndpoints; int i, ep_dir, ep_type; bool ir_ep_found = false; bool display_ep_found = false; bool tx_control = false; /* * Scan the endpoint list and set: * first input endpoint = IR endpoint * first output endpoint = display endpoint */ for (i = 0; i < num_endpts && !(ir_ep_found && display_ep_found); ++i) { ep = &iface_desc->endpoint[i].desc; ep_dir = ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK; ep_type = usb_endpoint_type(ep); if (!ir_ep_found && ep_dir == USB_DIR_IN && ep_type == USB_ENDPOINT_XFER_INT) { rx_endpoint = ep; ir_ep_found = true; dev_dbg(ictx->dev, "%s: found IR endpoint\n", __func__); } else if (!display_ep_found && ep_dir == USB_DIR_OUT && ep_type == USB_ENDPOINT_XFER_INT) { tx_endpoint = ep; display_ep_found = true; dev_dbg(ictx->dev, "%s: found display endpoint\n", __func__); } } if (ifnum == 0) { ictx->rx_endpoint_intf0 = rx_endpoint; /* * tx is used to send characters to lcd/vfd, associate RF * remotes, set IR protocol, and maybe more... */ ictx->tx_endpoint = tx_endpoint; } else { ictx->rx_endpoint_intf1 = rx_endpoint; } /* * If we didn't find a display endpoint, this is probably one of the * newer iMON devices that use control urb instead of interrupt */ if (!display_ep_found) { tx_control = true; display_ep_found = true; dev_dbg(ictx->dev, "%s: device uses control endpoint, not interface OUT endpoint\n", __func__); } /* * Some iMON receivers have no display. Unfortunately, it seems * that SoundGraph recycles device IDs between devices both with * and without... :\ */ if (ictx->display_type == IMON_DISPLAY_TYPE_NONE) { display_ep_found = false; dev_dbg(ictx->dev, "%s: device has no display\n", __func__); } /* * iMON Touch devices have a VGA touchscreen, but no "display", as * that refers to e.g. /dev/lcd0 (a character device LCD or VFD). */ if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { display_ep_found = false; dev_dbg(ictx->dev, "%s: iMON Touch device found\n", __func__); } /* Input endpoint is mandatory */ if (!ir_ep_found) pr_err("no valid input (IR) endpoint found\n"); ictx->tx_control = tx_control; if (display_ep_found) ictx->display_supported = true; return ir_ep_found; } static struct imon_context *imon_init_intf0(struct usb_interface *intf, const struct usb_device_id *id) { struct imon_context *ictx; struct urb *rx_urb; struct urb *tx_urb; struct device *dev = &intf->dev; struct usb_host_interface *iface_desc; int ret = -ENOMEM; ictx = kzalloc(sizeof(*ictx), GFP_KERNEL); if (!ictx) goto exit; rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!rx_urb) goto rx_urb_alloc_failed; tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!tx_urb) goto tx_urb_alloc_failed; mutex_init(&ictx->lock); spin_lock_init(&ictx->kc_lock); mutex_lock(&ictx->lock); ictx->dev = dev; ictx->usbdev_intf0 = usb_get_dev(interface_to_usbdev(intf)); ictx->rx_urb_intf0 = rx_urb; ictx->tx_urb = tx_urb; ictx->rf_device = false; init_completion(&ictx->tx.finished); ictx->vendor = le16_to_cpu(ictx->usbdev_intf0->descriptor.idVendor); ictx->product = le16_to_cpu(ictx->usbdev_intf0->descriptor.idProduct); /* save drive info for later accessing the panel/knob key table */ ictx->dev_descr = (struct imon_usb_dev_descr *)id->driver_info; /* default send_packet delay is 5ms but some devices need more */ ictx->send_packet_delay = ictx->dev_descr->flags & IMON_NEED_20MS_PKT_DELAY ? 20 : 5; ret = -ENODEV; iface_desc = intf->cur_altsetting; if (!imon_find_endpoints(ictx, iface_desc)) { goto find_endpoint_failed; } usb_fill_int_urb(ictx->rx_urb_intf0, ictx->usbdev_intf0, usb_rcvintpipe(ictx->usbdev_intf0, ictx->rx_endpoint_intf0->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf0, ictx, ictx->rx_endpoint_intf0->bInterval); ret = usb_submit_urb(ictx->rx_urb_intf0, GFP_KERNEL); if (ret) { pr_err("usb_submit_urb failed for intf0 (%d)\n", ret); goto urb_submit_failed; } ictx->idev = imon_init_idev(ictx); if (!ictx->idev) { dev_err(dev, "%s: input device setup failed\n", __func__); goto idev_setup_failed; } ictx->rdev = imon_init_rdev(ictx); if (!ictx->rdev) { dev_err(dev, "%s: rc device setup failed\n", __func__); goto rdev_setup_failed; } ictx->dev_present_intf0 = true; mutex_unlock(&ictx->lock); return ictx; rdev_setup_failed: input_unregister_device(ictx->idev); idev_setup_failed: usb_kill_urb(ictx->rx_urb_intf0); urb_submit_failed: find_endpoint_failed: usb_put_dev(ictx->usbdev_intf0); mutex_unlock(&ictx->lock); usb_free_urb(tx_urb); tx_urb_alloc_failed: usb_free_urb(rx_urb); rx_urb_alloc_failed: kfree(ictx); exit: dev_err(dev, "unable to initialize intf0, err %d\n", ret); return NULL; } static struct imon_context *imon_init_intf1(struct usb_interface *intf, struct imon_context *ictx) { struct urb *rx_urb; struct usb_host_interface *iface_desc; int ret = -ENOMEM; rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!rx_urb) goto rx_urb_alloc_failed; mutex_lock(&ictx->lock); if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { timer_setup(&ictx->ttimer, imon_touch_display_timeout, 0); } ictx->usbdev_intf1 = usb_get_dev(interface_to_usbdev(intf)); ictx->rx_urb_intf1 = rx_urb; ret = -ENODEV; iface_desc = intf->cur_altsetting; if (!imon_find_endpoints(ictx, iface_desc)) goto find_endpoint_failed; if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { ictx->touch = imon_init_touch(ictx); if (!ictx->touch) goto touch_setup_failed; } else ictx->touch = NULL; usb_fill_int_urb(ictx->rx_urb_intf1, ictx->usbdev_intf1, usb_rcvintpipe(ictx->usbdev_intf1, ictx->rx_endpoint_intf1->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf1, ictx, ictx->rx_endpoint_intf1->bInterval); ret = usb_submit_urb(ictx->rx_urb_intf1, GFP_KERNEL); if (ret) { pr_err("usb_submit_urb failed for intf1 (%d)\n", ret); goto urb_submit_failed; } ictx->dev_present_intf1 = true; mutex_unlock(&ictx->lock); return ictx; urb_submit_failed: if (ictx->touch) input_unregister_device(ictx->touch); touch_setup_failed: find_endpoint_failed: usb_put_dev(ictx->usbdev_intf1); ictx->usbdev_intf1 = NULL; mutex_unlock(&ictx->lock); usb_free_urb(rx_urb); ictx->rx_urb_intf1 = NULL; rx_urb_alloc_failed: dev_err(ictx->dev, "unable to initialize intf1, err %d\n", ret); return NULL; } static void imon_init_display(struct imon_context *ictx, struct usb_interface *intf) { int ret; dev_dbg(ictx->dev, "Registering iMON display with sysfs\n"); /* set up sysfs entry for built-in clock */ ret = sysfs_create_group(&intf->dev.kobj, &imon_display_attr_group); if (ret) dev_err(ictx->dev, "Could not create display sysfs entries(%d)", ret); if (ictx->display_type == IMON_DISPLAY_TYPE_LCD) ret = usb_register_dev(intf, &imon_lcd_class); else ret = usb_register_dev(intf, &imon_vfd_class); if (ret) /* Not a fatal error, so ignore */ dev_info(ictx->dev, "could not get a minor number for display\n"); } /* * Callback function for USB core API: Probe */ static int imon_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *usbdev = NULL; struct usb_host_interface *iface_desc = NULL; struct usb_interface *first_if; struct device *dev = &interface->dev; int ifnum, sysfs_err; int ret = 0; struct imon_context *ictx = NULL; u16 vendor, product; usbdev = usb_get_dev(interface_to_usbdev(interface)); iface_desc = interface->cur_altsetting; ifnum = iface_desc->desc.bInterfaceNumber; vendor = le16_to_cpu(usbdev->descriptor.idVendor); product = le16_to_cpu(usbdev->descriptor.idProduct); dev_dbg(dev, "%s: found iMON device (%04x:%04x, intf%d)\n", __func__, vendor, product, ifnum); first_if = usb_ifnum_to_if(usbdev, 0); if (!first_if) { ret = -ENODEV; goto fail; } if (first_if->dev.driver != interface->dev.driver) { dev_err(&interface->dev, "inconsistent driver matching\n"); ret = -EINVAL; goto fail; } if (ifnum == 0) { ictx = imon_init_intf0(interface, id); if (!ictx) { pr_err("failed to initialize context!\n"); ret = -ENODEV; goto fail; } refcount_set(&ictx->users, 1); } else { /* this is the secondary interface on the device */ struct imon_context *first_if_ctx = usb_get_intfdata(first_if); /* fail early if first intf failed to register */ if (!first_if_ctx) { ret = -ENODEV; goto fail; } ictx = imon_init_intf1(interface, first_if_ctx); if (!ictx) { pr_err("failed to attach to context!\n"); ret = -ENODEV; goto fail; } refcount_inc(&ictx->users); } usb_set_intfdata(interface, ictx); if (ifnum == 0) { if (product == 0xffdc && ictx->rf_device) { sysfs_err = sysfs_create_group(&interface->dev.kobj, &imon_rf_attr_group); if (sysfs_err) pr_err("Could not create RF sysfs entries(%d)\n", sysfs_err); } if (ictx->display_supported) imon_init_display(ictx, interface); } dev_info(dev, "iMON device (%04x:%04x, intf%d) on usb<%d:%d> initialized\n", vendor, product, ifnum, usbdev->bus->busnum, usbdev->devnum); usb_put_dev(usbdev); return 0; fail: usb_put_dev(usbdev); dev_err(dev, "unable to register, err %d\n", ret); return ret; } /* * Callback function for USB core API: disconnect */ static void imon_disconnect(struct usb_interface *interface) { struct imon_context *ictx; struct device *dev; int ifnum; ictx = usb_get_intfdata(interface); ictx->disconnected = true; dev = ictx->dev; ifnum = interface->cur_altsetting->desc.bInterfaceNumber; /* * sysfs_remove_group is safe to call even if sysfs_create_group * hasn't been called */ sysfs_remove_group(&interface->dev.kobj, &imon_display_attr_group); sysfs_remove_group(&interface->dev.kobj, &imon_rf_attr_group); usb_set_intfdata(interface, NULL); /* Abort ongoing write */ if (ictx->tx.busy) { usb_kill_urb(ictx->tx_urb); complete(&ictx->tx.finished); } if (ifnum == 0) { ictx->dev_present_intf0 = false; usb_kill_urb(ictx->rx_urb_intf0); input_unregister_device(ictx->idev); rc_unregister_device(ictx->rdev); if (ictx->display_supported) { if (ictx->display_type == IMON_DISPLAY_TYPE_LCD) usb_deregister_dev(interface, &imon_lcd_class); else if (ictx->display_type == IMON_DISPLAY_TYPE_VFD) usb_deregister_dev(interface, &imon_vfd_class); } usb_put_dev(ictx->usbdev_intf0); } else { ictx->dev_present_intf1 = false; usb_kill_urb(ictx->rx_urb_intf1); if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { del_timer_sync(&ictx->ttimer); input_unregister_device(ictx->touch); } usb_put_dev(ictx->usbdev_intf1); } if (refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); dev_dbg(dev, "%s: iMON device (intf%d) disconnected\n", __func__, ifnum); } static int imon_suspend(struct usb_interface *intf, pm_message_t message) { struct imon_context *ictx = usb_get_intfdata(intf); int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum == 0) usb_kill_urb(ictx->rx_urb_intf0); else usb_kill_urb(ictx->rx_urb_intf1); return 0; } static int imon_resume(struct usb_interface *intf) { int rc = 0; struct imon_context *ictx = usb_get_intfdata(intf); int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum == 0) { usb_fill_int_urb(ictx->rx_urb_intf0, ictx->usbdev_intf0, usb_rcvintpipe(ictx->usbdev_intf0, ictx->rx_endpoint_intf0->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf0, ictx, ictx->rx_endpoint_intf0->bInterval); rc = usb_submit_urb(ictx->rx_urb_intf0, GFP_NOIO); } else { usb_fill_int_urb(ictx->rx_urb_intf1, ictx->usbdev_intf1, usb_rcvintpipe(ictx->usbdev_intf1, ictx->rx_endpoint_intf1->bEndpointAddress), ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf), usb_rx_callback_intf1, ictx, ictx->rx_endpoint_intf1->bInterval); rc = usb_submit_urb(ictx->rx_urb_intf1, GFP_NOIO); } return rc; } module_usb_driver(imon_driver); |
2 16415 2191 52 31 52 86 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NODEMASK_H #define __LINUX_NODEMASK_H /* * Nodemasks provide a bitmap suitable for representing the * set of Node's in a system, one bit position per Node number. * * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * * For details of nodemask_parse_user(), see bitmap_parse_user() in * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in * lib/bitmap.c. * * The available nodemask operations are: * * void node_set(node, mask) turn on bit 'node' in mask * void node_clear(node, mask) turn off bit 'node' in mask * void nodes_setall(mask) set all bits * void nodes_clear(mask) clear all bits * int node_isset(node, mask) true iff bit 'node' set in mask * int node_test_and_set(node, mask) test and set bit 'node' in mask * * void nodes_and(dst, src1, src2) dst = src1 & src2 [intersection] * void nodes_or(dst, src1, src2) dst = src1 | src2 [union] * void nodes_xor(dst, src1, src2) dst = src1 ^ src2 * void nodes_andnot(dst, src1, src2) dst = src1 & ~src2 * void nodes_complement(dst, src) dst = ~src * * int nodes_equal(mask1, mask2) Does mask1 == mask2? * int nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect? * int nodes_subset(mask1, mask2) Is mask1 a subset of mask2? * int nodes_empty(mask) Is mask empty (no bits sets)? * int nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * * void nodes_shift_right(dst, src, n) Shift right * void nodes_shift_left(dst, src, n) Shift left * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES * unsigned int first_unset_node(mask) First node not set in mask, or * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set * NODE_MASK_ALL Initializer - all bits set * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) * void nodes_onto(dst, orig, relmap) *dst = orig relative to relmap * void nodes_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_node_mask(node, mask) for-loop node over mask * * int num_online_nodes() Number of online Nodes * int num_possible_nodes() Number of all possible Nodes * * int node_random(mask) Random node with set bit in mask * * int node_online(node) Is some node online? * int node_possible(node) Is some node possible? * * node_set_online(node) set bit 'node' in node_online_map * node_set_offline(node) clear bit 'node' in node_online_map * * for_each_node(node) for-loop node over node_possible_map * for_each_online_node(node) for-loop node over node_online_map * * Subtlety: * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway) * to generate slightly worse code. So use a simple one-line #define * for node_isset(), instead of wrapping an inline inside a macro, the * way we do the other calls. * * NODEMASK_SCRATCH * When doing above logical AND, OR, XOR, Remap operations the callers tend to * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large, * nodemask_t's consume too much stack space. NODEMASK_SCRATCH is a helper * for such situations. See below and CPUMASK_ALLOC also. */ #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/minmax.h> #include <linux/nodemask_types.h> #include <linux/numa.h> #include <linux/random.h> extern nodemask_t _unused_nodemask_arg_; /** * nodemask_pr_args - printf args to output a nodemask * @maskp: nodemask to be printed * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions * are called in both __init and non-__init functions, if they are not * inlined we will end up with a section mismatch error (of the type of * freeable items not being freed). So we must use __always_inline here * to fix the problem. If other functions in the future also end up in * this situation they will also need to be annotated as __always_inline */ #define node_set(node, dst) __node_set((node), &(dst)) static __always_inline void __node_set(int node, volatile nodemask_t *dstp) { set_bit(node, dstp->bits); } #define node_clear(node, dst) __node_clear((node), &(dst)) static inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } /* No static inline type checking - see Subtlety (1) above. */ #define node_isset(node, nodemask) test_bit((node), (nodemask).bits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) static inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); } #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) static inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); } #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) static inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); } #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) static inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } #define nodes_shift_right(dst, src, n) \ __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES) static inline void __nodes_shift_right(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); } #define nodes_shift_left(dst, src, n) \ __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES) static inline void __nodes_shift_left(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } /* FIXME: better would be to fix all architectures to never return > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) static inline unsigned int __first_node(const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) static inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } /* * Find the next present node in src, starting after node n, wrapping around to * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) { unsigned int ret = __next_node(node, srcp); if (ret == MAX_NUMNODES) ret = __first_node(srcp); return ret; } static inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); } #define nodemask_of_node(node) \ ({ \ typeof(_unused_nodemask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL << (node); \ } else { \ init_nodemask_of_node(&m, (node)); \ } \ m; \ }) #define first_unset_node(mask) __first_unset_node(&(mask)) static inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); } #define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) #if MAX_NUMNODES <= BITS_PER_LONG #define NODE_MASK_ALL \ ((nodemask_t) { { \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #else #define NODE_MASK_ALL \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL, \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #endif #define NODE_MASK_NONE \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] = 0UL \ } }) #define nodes_addr(src) ((src).bits) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) static inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); } #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); } #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); } #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); } #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \ (node) < MAX_NUMNODES; \ (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ #define for_each_node_mask(node, mask) \ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) #endif /* MAX_NUMNODES */ /* * Bitmasks that are kept for all the nodes. */ enum node_states { N_POSSIBLE, /* The node could become online at some point */ N_ONLINE, /* The node is online */ N_NORMAL_MEMORY, /* The node has regular memory */ #ifdef CONFIG_HIGHMEM N_HIGH_MEMORY, /* The node has regular or high memory */ #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif N_MEMORY, /* The node has memory(regular, high, movable) */ N_CPU, /* The node has one or more cpus */ N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */ NR_NODE_STATES }; /* * The following particular system nodemasks and operations * on them manage all possible and online nodes. */ extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 static inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } static inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } static inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } static inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } #define for_each_node_state(__node, __state) \ for_each_node_mask((__node), node_states[__state]) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) static inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } static inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } extern unsigned int nr_node_ids; extern unsigned int nr_online_nodes; static inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } static inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } #else static inline int node_state(int node, enum node_states state) { return node == 0; } static inline void node_set_state(int node, enum node_states state) { } static inline void node_clear_state(int node, enum node_states state) { } static inline int num_node_state(enum node_states state) { return 1; } #define for_each_node_state(node, __state) \ for ( (node) = 0; (node) == 0; (node) = 1) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) #define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U #define node_set_online(node) node_set_state((node), N_ONLINE) #define node_set_offline(node) node_clear_state((node), N_ONLINE) #endif static inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) int w, bit; w = nodes_weight(*maskp); switch (w) { case 0: bit = NUMA_NO_NODE; break; case 1: bit = first_node(*maskp); break; default: bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w)); break; } return bit; #else return 0; #endif } #define node_online_map node_states[N_ONLINE] #define node_possible_map node_states[N_POSSIBLE] #define num_online_nodes() num_node_state(N_ONLINE) #define num_possible_nodes() num_node_state(N_POSSIBLE) #define node_online(node) node_state((node), N_ONLINE) #define node_possible(node) node_state((node), N_POSSIBLE) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) /* * For nodemask scratch area. * NODEMASK_ALLOC(type, name) allocates an object with a specified type and * name. */ #if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */ #define NODEMASK_ALLOC(type, name, gfp_flags) \ type *name = kmalloc(sizeof(*name), gfp_flags) #define NODEMASK_FREE(m) kfree(m) #else #define NODEMASK_ALLOC(type, name, gfp_flags) type _##name, *name = &_##name #define NODEMASK_FREE(m) do {} while (0) #endif /* Example structure for using NODEMASK_ALLOC, used in mempolicy. */ struct nodemask_scratch { nodemask_t mask1; nodemask_t mask2; }; #define NODEMASK_SCRATCH(x) \ NODEMASK_ALLOC(struct nodemask_scratch, x, \ GFP_KERNEL | __GFP_NORETRY) #define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x) #endif /* __LINUX_NODEMASK_H */ |
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 | // SPDX-License-Identifier: GPL-2.0 /* * platform.c - platform 'pseudo' bus for legacy devices * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * * Please see Documentation/driver-api/driver-model/platform.rst for more * information. */ #include <linux/string.h> #include <linux/platform_device.h> #include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/module.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/dma-mapping.h> #include <linux/memblock.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/pm_runtime.h> #include <linux/pm_domain.h> #include <linux/idr.h> #include <linux/acpi.h> #include <linux/clk/clk-conf.h> #include <linux/limits.h> #include <linux/property.h> #include <linux/kmemleak.h> #include <linux/types.h> #include <linux/iommu.h> #include <linux/dma-map-ops.h> #include "base.h" #include "power/power.h" /* For automatically allocated device IDs */ static DEFINE_IDA(platform_devid_ida); struct device platform_bus = { .init_name = "platform", }; EXPORT_SYMBOL_GPL(platform_bus); /** * platform_get_resource - get a resource for a device * @dev: platform device * @type: resource type * @num: resource index * * Return: a pointer to the resource or NULL on failure. */ struct resource *platform_get_resource(struct platform_device *dev, unsigned int type, unsigned int num) { u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; if (type == resource_type(r) && num-- == 0) return r; } return NULL; } EXPORT_SYMBOL_GPL(platform_get_resource); struct resource *platform_get_mem_or_io(struct platform_device *dev, unsigned int num) { u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; if ((resource_type(r) & (IORESOURCE_MEM|IORESOURCE_IO)) && num-- == 0) return r; } return NULL; } EXPORT_SYMBOL_GPL(platform_get_mem_or_io); #ifdef CONFIG_HAS_IOMEM /** * devm_platform_get_and_ioremap_resource - call devm_ioremap_resource() for a * platform device and get resource * * @pdev: platform device to use both for memory resource lookup as well as * resource management * @index: resource index * @res: optional output parameter to store a pointer to the obtained resource. * * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code * on failure. */ void __iomem * devm_platform_get_and_ioremap_resource(struct platform_device *pdev, unsigned int index, struct resource **res) { struct resource *r; r = platform_get_resource(pdev, IORESOURCE_MEM, index); if (res) *res = r; return devm_ioremap_resource(&pdev->dev, r); } EXPORT_SYMBOL_GPL(devm_platform_get_and_ioremap_resource); /** * devm_platform_ioremap_resource - call devm_ioremap_resource() for a platform * device * * @pdev: platform device to use both for memory resource lookup as well as * resource management * @index: resource index * * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code * on failure. */ void __iomem *devm_platform_ioremap_resource(struct platform_device *pdev, unsigned int index) { return devm_platform_get_and_ioremap_resource(pdev, index, NULL); } EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource); /** * devm_platform_ioremap_resource_byname - call devm_ioremap_resource for * a platform device, retrieve the * resource by name * * @pdev: platform device to use both for memory resource lookup as well as * resource management * @name: name of the resource * * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code * on failure. */ void __iomem * devm_platform_ioremap_resource_byname(struct platform_device *pdev, const char *name) { struct resource *res; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); return devm_ioremap_resource(&pdev->dev, res); } EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname); #endif /* CONFIG_HAS_IOMEM */ /** * platform_get_irq_optional - get an optional IRQ for a device * @dev: platform device * @num: IRQ number index * * Gets an IRQ for a platform device. Device drivers should check the return * value for errors so as to not pass a negative integer value to the * request_irq() APIs. This is the same as platform_get_irq(), except that it * does not print an error message if an IRQ can not be obtained. * * For example:: * * int irq = platform_get_irq_optional(pdev, 0); * if (irq < 0) * return irq; * * Return: non-zero IRQ number on success, negative error number on failure. */ int platform_get_irq_optional(struct platform_device *dev, unsigned int num) { int ret; #ifdef CONFIG_SPARC /* sparc does not have irqs represented as IORESOURCE_IRQ resources */ if (!dev || num >= dev->archdata.num_irqs) goto out_not_found; ret = dev->archdata.irqs[num]; goto out; #else struct fwnode_handle *fwnode = dev_fwnode(&dev->dev); struct resource *r; if (is_of_node(fwnode)) { ret = of_irq_get(to_of_node(fwnode), num); if (ret > 0 || ret == -EPROBE_DEFER) goto out; } r = platform_get_resource(dev, IORESOURCE_IRQ, num); if (is_acpi_device_node(fwnode)) { if (r && r->flags & IORESOURCE_DISABLED) { ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), num, r); if (ret) goto out; } } /* * The resources may pass trigger flags to the irqs that need * to be set up. It so happens that the trigger flags for * IORESOURCE_BITS correspond 1-to-1 to the IRQF_TRIGGER* * settings. */ if (r && r->flags & IORESOURCE_BITS) { struct irq_data *irqd; irqd = irq_get_irq_data(r->start); if (!irqd) goto out_not_found; irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS); } if (r) { ret = r->start; goto out; } /* * For the index 0 interrupt, allow falling back to GpioInt * resources. While a device could have both Interrupt and GpioInt * resources, making this fallback ambiguous, in many common cases * the device will only expose one IRQ, and this fallback * allows a common code path across either kind of resource. */ if (num == 0 && is_acpi_device_node(fwnode)) { ret = acpi_dev_gpio_irq_get(to_acpi_device_node(fwnode), num); /* Our callers expect -ENXIO for missing IRQs. */ if (ret >= 0 || ret == -EPROBE_DEFER) goto out; } #endif out_not_found: ret = -ENXIO; out: if (WARN(!ret, "0 is an invalid IRQ number\n")) return -EINVAL; return ret; } EXPORT_SYMBOL_GPL(platform_get_irq_optional); /** * platform_get_irq - get an IRQ for a device * @dev: platform device * @num: IRQ number index * * Gets an IRQ for a platform device and prints an error message if finding the * IRQ fails. Device drivers should check the return value for errors so as to * not pass a negative integer value to the request_irq() APIs. * * For example:: * * int irq = platform_get_irq(pdev, 0); * if (irq < 0) * return irq; * * Return: non-zero IRQ number on success, negative error number on failure. */ int platform_get_irq(struct platform_device *dev, unsigned int num) { int ret; ret = platform_get_irq_optional(dev, num); if (ret < 0) return dev_err_probe(&dev->dev, ret, "IRQ index %u not found\n", num); return ret; } EXPORT_SYMBOL_GPL(platform_get_irq); /** * platform_irq_count - Count the number of IRQs a platform device uses * @dev: platform device * * Return: Number of IRQs a platform device uses or EPROBE_DEFER */ int platform_irq_count(struct platform_device *dev) { int ret, nr = 0; while ((ret = platform_get_irq_optional(dev, nr)) >= 0) nr++; if (ret == -EPROBE_DEFER) return ret; return nr; } EXPORT_SYMBOL_GPL(platform_irq_count); struct irq_affinity_devres { unsigned int count; unsigned int irq[] __counted_by(count); }; static void platform_disable_acpi_irq(struct platform_device *pdev, int index) { struct resource *r; r = platform_get_resource(pdev, IORESOURCE_IRQ, index); if (r) irqresource_disabled(r, 0); } static void devm_platform_get_irqs_affinity_release(struct device *dev, void *res) { struct irq_affinity_devres *ptr = res; int i; for (i = 0; i < ptr->count; i++) { irq_dispose_mapping(ptr->irq[i]); if (is_acpi_device_node(dev_fwnode(dev))) platform_disable_acpi_irq(to_platform_device(dev), i); } } /** * devm_platform_get_irqs_affinity - devm method to get a set of IRQs for a * device using an interrupt affinity descriptor * @dev: platform device pointer * @affd: affinity descriptor * @minvec: minimum count of interrupt vectors * @maxvec: maximum count of interrupt vectors * @irqs: pointer holder for IRQ numbers * * Gets a set of IRQs for a platform device, and updates IRQ afffinty according * to the passed affinity descriptor * * Return: Number of vectors on success, negative error number on failure. */ int devm_platform_get_irqs_affinity(struct platform_device *dev, struct irq_affinity *affd, unsigned int minvec, unsigned int maxvec, int **irqs) { struct irq_affinity_devres *ptr; struct irq_affinity_desc *desc; size_t size; int i, ret, nvec; if (!affd) return -EPERM; if (maxvec < minvec) return -ERANGE; nvec = platform_irq_count(dev); if (nvec < 0) return nvec; if (nvec < minvec) return -ENOSPC; nvec = irq_calc_affinity_vectors(minvec, nvec, affd); if (nvec < minvec) return -ENOSPC; if (nvec > maxvec) nvec = maxvec; size = sizeof(*ptr) + sizeof(unsigned int) * nvec; ptr = devres_alloc(devm_platform_get_irqs_affinity_release, size, GFP_KERNEL); if (!ptr) return -ENOMEM; ptr->count = nvec; for (i = 0; i < nvec; i++) { int irq = platform_get_irq(dev, i); if (irq < 0) { ret = irq; goto err_free_devres; } ptr->irq[i] = irq; } desc = irq_create_affinity_masks(nvec, affd); if (!desc) { ret = -ENOMEM; goto err_free_devres; } for (i = 0; i < nvec; i++) { ret = irq_update_affinity_desc(ptr->irq[i], &desc[i]); if (ret) { dev_err(&dev->dev, "failed to update irq%d affinity descriptor (%d)\n", ptr->irq[i], ret); goto err_free_desc; } } devres_add(&dev->dev, ptr); kfree(desc); *irqs = ptr->irq; return nvec; err_free_desc: kfree(desc); err_free_devres: devres_free(ptr); return ret; } EXPORT_SYMBOL_GPL(devm_platform_get_irqs_affinity); /** * platform_get_resource_byname - get a resource for a device by name * @dev: platform device * @type: resource type * @name: resource name */ struct resource *platform_get_resource_byname(struct platform_device *dev, unsigned int type, const char *name) { u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; if (unlikely(!r->name)) continue; if (type == resource_type(r) && !strcmp(r->name, name)) return r; } return NULL; } EXPORT_SYMBOL_GPL(platform_get_resource_byname); static int __platform_get_irq_byname(struct platform_device *dev, const char *name) { struct resource *r; int ret; ret = fwnode_irq_get_byname(dev_fwnode(&dev->dev), name); if (ret > 0 || ret == -EPROBE_DEFER) return ret; r = platform_get_resource_byname(dev, IORESOURCE_IRQ, name); if (r) { if (WARN(!r->start, "0 is an invalid IRQ number\n")) return -EINVAL; return r->start; } return -ENXIO; } /** * platform_get_irq_byname - get an IRQ for a device by name * @dev: platform device * @name: IRQ name * * Get an IRQ like platform_get_irq(), but then by name rather then by index. * * Return: non-zero IRQ number on success, negative error number on failure. */ int platform_get_irq_byname(struct platform_device *dev, const char *name) { int ret; ret = __platform_get_irq_byname(dev, name); if (ret < 0) return dev_err_probe(&dev->dev, ret, "IRQ %s not found\n", name); return ret; } EXPORT_SYMBOL_GPL(platform_get_irq_byname); /** * platform_get_irq_byname_optional - get an optional IRQ for a device by name * @dev: platform device * @name: IRQ name * * Get an optional IRQ by name like platform_get_irq_byname(). Except that it * does not print an error message if an IRQ can not be obtained. * * Return: non-zero IRQ number on success, negative error number on failure. */ int platform_get_irq_byname_optional(struct platform_device *dev, const char *name) { return __platform_get_irq_byname(dev, name); } EXPORT_SYMBOL_GPL(platform_get_irq_byname_optional); /** * platform_add_devices - add a numbers of platform devices * @devs: array of platform devices to add * @num: number of platform devices in array * * Return: 0 on success, negative error number on failure. */ int platform_add_devices(struct platform_device **devs, int num) { int i, ret = 0; for (i = 0; i < num; i++) { ret = platform_device_register(devs[i]); if (ret) { while (--i >= 0) platform_device_unregister(devs[i]); break; } } return ret; } EXPORT_SYMBOL_GPL(platform_add_devices); struct platform_object { struct platform_device pdev; char name[]; }; /* * Set up default DMA mask for platform devices if the they weren't * previously set by the architecture / DT. */ static void setup_pdev_dma_masks(struct platform_device *pdev) { pdev->dev.dma_parms = &pdev->dma_parms; if (!pdev->dev.coherent_dma_mask) pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); if (!pdev->dev.dma_mask) { pdev->platform_dma_mask = DMA_BIT_MASK(32); pdev->dev.dma_mask = &pdev->platform_dma_mask; } }; /** * platform_device_put - destroy a platform device * @pdev: platform device to free * * Free all memory associated with a platform device. This function must * _only_ be externally called in error cases. All other usage is a bug. */ void platform_device_put(struct platform_device *pdev) { if (!IS_ERR_OR_NULL(pdev)) put_device(&pdev->dev); } EXPORT_SYMBOL_GPL(platform_device_put); static void platform_device_release(struct device *dev) { struct platform_object *pa = container_of(dev, struct platform_object, pdev.dev); of_node_put(pa->pdev.dev.of_node); kfree(pa->pdev.dev.platform_data); kfree(pa->pdev.mfd_cell); kfree(pa->pdev.resource); kfree(pa->pdev.driver_override); kfree(pa); } /** * platform_device_alloc - create a platform device * @name: base name of the device we're adding * @id: instance id * * Create a platform device object which can have other objects attached * to it, and which will have attached objects freed when it is released. */ struct platform_device *platform_device_alloc(const char *name, int id) { struct platform_object *pa; pa = kzalloc(sizeof(*pa) + strlen(name) + 1, GFP_KERNEL); if (pa) { strcpy(pa->name, name); pa->pdev.name = pa->name; pa->pdev.id = id; device_initialize(&pa->pdev.dev); pa->pdev.dev.release = platform_device_release; setup_pdev_dma_masks(&pa->pdev); } return pa ? &pa->pdev : NULL; } EXPORT_SYMBOL_GPL(platform_device_alloc); /** * platform_device_add_resources - add resources to a platform device * @pdev: platform device allocated by platform_device_alloc to add resources to * @res: set of resources that needs to be allocated for the device * @num: number of resources * * Add a copy of the resources to the platform device. The memory * associated with the resources will be freed when the platform device is * released. */ int platform_device_add_resources(struct platform_device *pdev, const struct resource *res, unsigned int num) { struct resource *r = NULL; if (res) { r = kmemdup(res, sizeof(struct resource) * num, GFP_KERNEL); if (!r) return -ENOMEM; } kfree(pdev->resource); pdev->resource = r; pdev->num_resources = num; return 0; } EXPORT_SYMBOL_GPL(platform_device_add_resources); /** * platform_device_add_data - add platform-specific data to a platform device * @pdev: platform device allocated by platform_device_alloc to add resources to * @data: platform specific data for this platform device * @size: size of platform specific data * * Add a copy of platform specific data to the platform device's * platform_data pointer. The memory associated with the platform data * will be freed when the platform device is released. */ int platform_device_add_data(struct platform_device *pdev, const void *data, size_t size) { void *d = NULL; if (data) { d = kmemdup(data, size, GFP_KERNEL); if (!d) return -ENOMEM; } kfree(pdev->dev.platform_data); pdev->dev.platform_data = d; return 0; } EXPORT_SYMBOL_GPL(platform_device_add_data); /** * platform_device_add - add a platform device to device hierarchy * @pdev: platform device we're adding * * This is part 2 of platform_device_register(), though may be called * separately _iff_ pdev was allocated by platform_device_alloc(). */ int platform_device_add(struct platform_device *pdev) { struct device *dev = &pdev->dev; u32 i; int ret; if (!dev->parent) dev->parent = &platform_bus; dev->bus = &platform_bus_type; switch (pdev->id) { default: dev_set_name(dev, "%s.%d", pdev->name, pdev->id); break; case PLATFORM_DEVID_NONE: dev_set_name(dev, "%s", pdev->name); break; case PLATFORM_DEVID_AUTO: /* * Automatically allocated device ID. We mark it as such so * that we remember it must be freed, and we append a suffix * to avoid namespace collision with explicit IDs. */ ret = ida_alloc(&platform_devid_ida, GFP_KERNEL); if (ret < 0) return ret; pdev->id = ret; pdev->id_auto = true; dev_set_name(dev, "%s.%d.auto", pdev->name, pdev->id); break; } for (i = 0; i < pdev->num_resources; i++) { struct resource *p, *r = &pdev->resource[i]; if (r->name == NULL) r->name = dev_name(dev); p = r->parent; if (!p) { if (resource_type(r) == IORESOURCE_MEM) p = &iomem_resource; else if (resource_type(r) == IORESOURCE_IO) p = &ioport_resource; } if (p) { ret = insert_resource(p, r); if (ret) { dev_err(dev, "failed to claim resource %d: %pR\n", i, r); goto failed; } } } pr_debug("Registering platform device '%s'. Parent at %s\n", dev_name(dev), dev_name(dev->parent)); ret = device_add(dev); if (ret) goto failed; return 0; failed: if (pdev->id_auto) { ida_free(&platform_devid_ida, pdev->id); pdev->id = PLATFORM_DEVID_AUTO; } while (i--) { struct resource *r = &pdev->resource[i]; if (r->parent) release_resource(r); } return ret; } EXPORT_SYMBOL_GPL(platform_device_add); /** * platform_device_del - remove a platform-level device * @pdev: platform device we're removing * * Note that this function will also release all memory- and port-based * resources owned by the device (@dev->resource). This function must * _only_ be externally called in error cases. All other usage is a bug. */ void platform_device_del(struct platform_device *pdev) { u32 i; if (!IS_ERR_OR_NULL(pdev)) { device_del(&pdev->dev); if (pdev->id_auto) { ida_free(&platform_devid_ida, pdev->id); pdev->id = PLATFORM_DEVID_AUTO; } for (i = 0; i < pdev->num_resources; i++) { struct resource *r = &pdev->resource[i]; if (r->parent) release_resource(r); } } } EXPORT_SYMBOL_GPL(platform_device_del); /** * platform_device_register - add a platform-level device * @pdev: platform device we're adding * * NOTE: _Never_ directly free @pdev after calling this function, even if it * returned an error! Always use platform_device_put() to give up the * reference initialised in this function instead. */ int platform_device_register(struct platform_device *pdev) { device_initialize(&pdev->dev); setup_pdev_dma_masks(pdev); return platform_device_add(pdev); } EXPORT_SYMBOL_GPL(platform_device_register); /** * platform_device_unregister - unregister a platform-level device * @pdev: platform device we're unregistering * * Unregistration is done in 2 steps. First we release all resources * and remove it from the subsystem, then we drop reference count by * calling platform_device_put(). */ void platform_device_unregister(struct platform_device *pdev) { platform_device_del(pdev); platform_device_put(pdev); } EXPORT_SYMBOL_GPL(platform_device_unregister); /** * platform_device_register_full - add a platform-level device with * resources and platform-specific data * * @pdevinfo: data used to create device * * Returns &struct platform_device pointer on success, or ERR_PTR() on error. */ struct platform_device *platform_device_register_full( const struct platform_device_info *pdevinfo) { int ret; struct platform_device *pdev; pdev = platform_device_alloc(pdevinfo->name, pdevinfo->id); if (!pdev) return ERR_PTR(-ENOMEM); pdev->dev.parent = pdevinfo->parent; pdev->dev.fwnode = pdevinfo->fwnode; pdev->dev.of_node = of_node_get(to_of_node(pdev->dev.fwnode)); pdev->dev.of_node_reused = pdevinfo->of_node_reused; if (pdevinfo->dma_mask) { pdev->platform_dma_mask = pdevinfo->dma_mask; pdev->dev.dma_mask = &pdev->platform_dma_mask; pdev->dev.coherent_dma_mask = pdevinfo->dma_mask; } ret = platform_device_add_resources(pdev, pdevinfo->res, pdevinfo->num_res); if (ret) goto err; ret = platform_device_add_data(pdev, pdevinfo->data, pdevinfo->size_data); if (ret) goto err; if (pdevinfo->properties) { ret = device_create_managed_software_node(&pdev->dev, pdevinfo->properties, NULL); if (ret) goto err; } ret = platform_device_add(pdev); if (ret) { err: ACPI_COMPANION_SET(&pdev->dev, NULL); platform_device_put(pdev); return ERR_PTR(ret); } return pdev; } EXPORT_SYMBOL_GPL(platform_device_register_full); /** * __platform_driver_register - register a driver for platform-level devices * @drv: platform driver structure * @owner: owning module/driver */ int __platform_driver_register(struct platform_driver *drv, struct module *owner) { drv->driver.owner = owner; drv->driver.bus = &platform_bus_type; return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(__platform_driver_register); /** * platform_driver_unregister - unregister a driver for platform-level devices * @drv: platform driver structure */ void platform_driver_unregister(struct platform_driver *drv) { driver_unregister(&drv->driver); } EXPORT_SYMBOL_GPL(platform_driver_unregister); static int platform_probe_fail(struct platform_device *pdev) { return -ENXIO; } static int is_bound_to_driver(struct device *dev, void *driver) { if (dev->driver == driver) return 1; return 0; } /** * __platform_driver_probe - register driver for non-hotpluggable device * @drv: platform driver structure * @probe: the driver probe routine, probably from an __init section * @module: module which will be the owner of the driver * * Use this instead of platform_driver_register() when you know the device * is not hotpluggable and has already been registered, and you want to * remove its run-once probe() infrastructure from memory after the driver * has bound to the device. * * One typical use for this would be with drivers for controllers integrated * into system-on-chip processors, where the controller devices have been * configured as part of board setup. * * Note that this is incompatible with deferred probing. * * Returns zero if the driver registered and bound to a device, else returns * a negative error code and with the driver not registered. */ int __init_or_module __platform_driver_probe(struct platform_driver *drv, int (*probe)(struct platform_device *), struct module *module) { int retval; if (drv->driver.probe_type == PROBE_PREFER_ASYNCHRONOUS) { pr_err("%s: drivers registered with %s can not be probed asynchronously\n", drv->driver.name, __func__); return -EINVAL; } /* * We have to run our probes synchronously because we check if * we find any devices to bind to and exit with error if there * are any. */ drv->driver.probe_type = PROBE_FORCE_SYNCHRONOUS; /* * Prevent driver from requesting probe deferral to avoid further * futile probe attempts. */ drv->prevent_deferred_probe = true; /* make sure driver won't have bind/unbind attributes */ drv->driver.suppress_bind_attrs = true; /* temporary section violation during probe() */ drv->probe = probe; retval = __platform_driver_register(drv, module); if (retval) return retval; /* Force all new probes of this driver to fail */ drv->probe = platform_probe_fail; /* Walk all platform devices and see if any actually bound to this driver. * If not, return an error as the device should have done so by now. */ if (!bus_for_each_dev(&platform_bus_type, NULL, &drv->driver, is_bound_to_driver)) { retval = -ENODEV; platform_driver_unregister(drv); } return retval; } EXPORT_SYMBOL_GPL(__platform_driver_probe); /** * __platform_create_bundle - register driver and create corresponding device * @driver: platform driver structure * @probe: the driver probe routine, probably from an __init section * @res: set of resources that needs to be allocated for the device * @n_res: number of resources * @data: platform specific data for this platform device * @size: size of platform specific data * @module: module which will be the owner of the driver * * Use this in legacy-style modules that probe hardware directly and * register a single platform device and corresponding platform driver. * * Returns &struct platform_device pointer on success, or ERR_PTR() on error. */ struct platform_device * __init_or_module __platform_create_bundle( struct platform_driver *driver, int (*probe)(struct platform_device *), struct resource *res, unsigned int n_res, const void *data, size_t size, struct module *module) { struct platform_device *pdev; int error; pdev = platform_device_alloc(driver->driver.name, -1); if (!pdev) { error = -ENOMEM; goto err_out; } error = platform_device_add_resources(pdev, res, n_res); if (error) goto err_pdev_put; error = platform_device_add_data(pdev, data, size); if (error) goto err_pdev_put; error = platform_device_add(pdev); if (error) goto err_pdev_put; error = __platform_driver_probe(driver, probe, module); if (error) goto err_pdev_del; return pdev; err_pdev_del: platform_device_del(pdev); err_pdev_put: platform_device_put(pdev); err_out: return ERR_PTR(error); } EXPORT_SYMBOL_GPL(__platform_create_bundle); /** * __platform_register_drivers - register an array of platform drivers * @drivers: an array of drivers to register * @count: the number of drivers to register * @owner: module owning the drivers * * Registers platform drivers specified by an array. On failure to register a * driver, all previously registered drivers will be unregistered. Callers of * this API should use platform_unregister_drivers() to unregister drivers in * the reverse order. * * Returns: 0 on success or a negative error code on failure. */ int __platform_register_drivers(struct platform_driver * const *drivers, unsigned int count, struct module *owner) { unsigned int i; int err; for (i = 0; i < count; i++) { pr_debug("registering platform driver %ps\n", drivers[i]); err = __platform_driver_register(drivers[i], owner); if (err < 0) { pr_err("failed to register platform driver %ps: %d\n", drivers[i], err); goto error; } } return 0; error: while (i--) { pr_debug("unregistering platform driver %ps\n", drivers[i]); platform_driver_unregister(drivers[i]); } return err; } EXPORT_SYMBOL_GPL(__platform_register_drivers); /** * platform_unregister_drivers - unregister an array of platform drivers * @drivers: an array of drivers to unregister * @count: the number of drivers to unregister * * Unregisters platform drivers specified by an array. This is typically used * to complement an earlier call to platform_register_drivers(). Drivers are * unregistered in the reverse order in which they were registered. */ void platform_unregister_drivers(struct platform_driver * const *drivers, unsigned int count) { while (count--) { pr_debug("unregistering platform driver %ps\n", drivers[count]); platform_driver_unregister(drivers[count]); } } EXPORT_SYMBOL_GPL(platform_unregister_drivers); static const struct platform_device_id *platform_match_id( const struct platform_device_id *id, struct platform_device *pdev) { while (id->name[0]) { if (strcmp(pdev->name, id->name) == 0) { pdev->id_entry = id; return id; } id++; } return NULL; } #ifdef CONFIG_PM_SLEEP static int platform_legacy_suspend(struct device *dev, pm_message_t mesg) { struct platform_driver *pdrv = to_platform_driver(dev->driver); struct platform_device *pdev = to_platform_device(dev); int ret = 0; if (dev->driver && pdrv->suspend) ret = pdrv->suspend(pdev, mesg); return ret; } static int platform_legacy_resume(struct device *dev) { struct platform_driver *pdrv = to_platform_driver(dev->driver); struct platform_device *pdev = to_platform_device(dev); int ret = 0; if (dev->driver && pdrv->resume) ret = pdrv->resume(pdev); return ret; } #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND int platform_pm_suspend(struct device *dev) { struct device_driver *drv = dev->driver; int ret = 0; if (!drv) return 0; if (drv->pm) { if (drv->pm->suspend) ret = drv->pm->suspend(dev); } else { ret = platform_legacy_suspend(dev, PMSG_SUSPEND); } return ret; } int platform_pm_resume(struct device *dev) { struct device_driver *drv = dev->driver; int ret = 0; if (!drv) return 0; if (drv->pm) { if (drv->pm->resume) ret = drv->pm->resume(dev); } else { ret = platform_legacy_resume(dev); } return ret; } #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS int platform_pm_freeze(struct device *dev) { struct device_driver *drv = dev->driver; int ret = 0; if (!drv) return 0; if (drv->pm) { if (drv->pm->freeze) ret = drv->pm->freeze(dev); } else { ret = platform_legacy_suspend(dev, PMSG_FREEZE); } return ret; } int platform_pm_thaw(struct device *dev) { struct device_driver *drv = dev->driver; int ret = 0; if (!drv) return 0; if (drv->pm) { if (drv->pm->thaw) ret = drv->pm->thaw(dev); } else { ret = platform_legacy_resume(dev); } return ret; } int platform_pm_poweroff(struct device *dev) { struct device_driver *drv = dev->driver; int ret = 0; if (!drv) return 0; if (drv->pm) { if (drv->pm->poweroff) ret = drv->pm->poweroff(dev); } else { ret = platform_legacy_suspend(dev, PMSG_HIBERNATE); } return ret; } int platform_pm_restore(struct device *dev) { struct device_driver *drv = dev->driver; int ret = 0; if (!drv) return 0; if (drv->pm) { if (drv->pm->restore) ret = drv->pm->restore(dev); } else { ret = platform_legacy_resume(dev); } return ret; } #endif /* CONFIG_HIBERNATE_CALLBACKS */ /* modalias support enables more hands-off userspace setup: * (a) environment variable lets new-style hotplug events work once system is * fully running: "modprobe $MODALIAS" * (b) sysfs attribute lets new-style coldplug recover from hotplug events * mishandled before system is fully running: "modprobe $(cat modalias)" */ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { struct platform_device *pdev = to_platform_device(dev); int len; len = of_device_modalias(dev, buf, PAGE_SIZE); if (len != -ENODEV) return len; len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1); if (len != -ENODEV) return len; return sysfs_emit(buf, "platform:%s\n", pdev->name); } static DEVICE_ATTR_RO(modalias); static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", dev_to_node(dev)); } static DEVICE_ATTR_RO(numa_node); static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct platform_device *pdev = to_platform_device(dev); ssize_t len; device_lock(dev); len = sysfs_emit(buf, "%s\n", pdev->driver_override); device_unlock(dev); return len; } static ssize_t driver_override_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct platform_device *pdev = to_platform_device(dev); int ret; ret = driver_set_override(dev, &pdev->driver_override, buf, count); if (ret) return ret; return count; } static DEVICE_ATTR_RW(driver_override); static struct attribute *platform_dev_attrs[] = { &dev_attr_modalias.attr, &dev_attr_numa_node.attr, &dev_attr_driver_override.attr, NULL, }; static umode_t platform_dev_attrs_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, typeof(*dev), kobj); if (a == &dev_attr_numa_node.attr && dev_to_node(dev) == NUMA_NO_NODE) return 0; return a->mode; } static const struct attribute_group platform_dev_group = { .attrs = platform_dev_attrs, .is_visible = platform_dev_attrs_visible, }; __ATTRIBUTE_GROUPS(platform_dev); /** * platform_match - bind platform device to platform driver. * @dev: device. * @drv: driver. * * Platform device IDs are assumed to be encoded like this: * "<name><instance>", where <name> is a short description of the type of * device, like "pci" or "floppy", and <instance> is the enumerated * instance of the device, like '0' or '42'. Driver IDs are simply * "<name>". So, extract the <name> from the platform_device structure, * and compare it against the name of the driver. Return whether they match * or not. */ static int platform_match(struct device *dev, struct device_driver *drv) { struct platform_device *pdev = to_platform_device(dev); struct platform_driver *pdrv = to_platform_driver(drv); /* When driver_override is set, only bind to the matching driver */ if (pdev->driver_override) return !strcmp(pdev->driver_override, drv->name); /* Attempt an OF style match first */ if (of_driver_match_device(dev, drv)) return 1; /* Then try ACPI style match */ if (acpi_driver_match_device(dev, drv)) return 1; /* Then try to match against the id table */ if (pdrv->id_table) return platform_match_id(pdrv->id_table, pdev) != NULL; /* fall-back to driver name match */ return (strcmp(pdev->name, drv->name) == 0); } static int platform_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct platform_device *pdev = to_platform_device(dev); int rc; /* Some devices have extra OF data and an OF-style MODALIAS */ rc = of_device_uevent_modalias(dev, env); if (rc != -ENODEV) return rc; rc = acpi_device_uevent_modalias(dev, env); if (rc != -ENODEV) return rc; add_uevent_var(env, "MODALIAS=%s%s", PLATFORM_MODULE_PREFIX, pdev->name); return 0; } static int platform_probe(struct device *_dev) { struct platform_driver *drv = to_platform_driver(_dev->driver); struct platform_device *dev = to_platform_device(_dev); int ret; /* * A driver registered using platform_driver_probe() cannot be bound * again later because the probe function usually lives in __init code * and so is gone. For these drivers .probe is set to * platform_probe_fail in __platform_driver_probe(). Don't even prepare * clocks and PM domains for these to match the traditional behaviour. */ if (unlikely(drv->probe == platform_probe_fail)) return -ENXIO; ret = of_clk_set_defaults(_dev->of_node, false); if (ret < 0) return ret; ret = dev_pm_domain_attach(_dev, true); if (ret) goto out; if (drv->probe) { ret = drv->probe(dev); if (ret) dev_pm_domain_detach(_dev, true); } out: if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) { dev_warn(_dev, "probe deferral not supported\n"); ret = -ENXIO; } return ret; } static void platform_remove(struct device *_dev) { struct platform_driver *drv = to_platform_driver(_dev->driver); struct platform_device *dev = to_platform_device(_dev); if (drv->remove_new) { drv->remove_new(dev); } else if (drv->remove) { int ret = drv->remove(dev); if (ret) dev_warn(_dev, "remove callback returned a non-zero value. This will be ignored.\n"); } dev_pm_domain_detach(_dev, true); } static void platform_shutdown(struct device *_dev) { struct platform_device *dev = to_platform_device(_dev); struct platform_driver *drv; if (!_dev->driver) return; drv = to_platform_driver(_dev->driver); if (drv->shutdown) drv->shutdown(dev); } static int platform_dma_configure(struct device *dev) { struct platform_driver *drv = to_platform_driver(dev->driver); struct fwnode_handle *fwnode = dev_fwnode(dev); enum dev_dma_attr attr; int ret = 0; if (is_of_node(fwnode)) { ret = of_dma_configure(dev, to_of_node(fwnode), true); } else if (is_acpi_device_node(fwnode)) { attr = acpi_get_dma_attr(to_acpi_device_node(fwnode)); ret = acpi_dma_configure(dev, attr); } if (ret || drv->driver_managed_dma) return ret; ret = iommu_device_use_default_domain(dev); if (ret) arch_teardown_dma_ops(dev); return ret; } static void platform_dma_cleanup(struct device *dev) { struct platform_driver *drv = to_platform_driver(dev->driver); if (!drv->driver_managed_dma) iommu_device_unuse_default_domain(dev); } static const struct dev_pm_ops platform_dev_pm_ops = { SET_RUNTIME_PM_OPS(pm_generic_runtime_suspend, pm_generic_runtime_resume, NULL) USE_PLATFORM_PM_SLEEP_OPS }; struct bus_type platform_bus_type = { .name = "platform", .dev_groups = platform_dev_groups, .match = platform_match, .uevent = platform_uevent, .probe = platform_probe, .remove = platform_remove, .shutdown = platform_shutdown, .dma_configure = platform_dma_configure, .dma_cleanup = platform_dma_cleanup, .pm = &platform_dev_pm_ops, }; EXPORT_SYMBOL_GPL(platform_bus_type); static inline int __platform_match(struct device *dev, const void *drv) { return platform_match(dev, (struct device_driver *)drv); } /** * platform_find_device_by_driver - Find a platform device with a given * driver. * @start: The device to start the search from. * @drv: The device driver to look for. */ struct device *platform_find_device_by_driver(struct device *start, const struct device_driver *drv) { return bus_find_device(&platform_bus_type, start, drv, __platform_match); } EXPORT_SYMBOL_GPL(platform_find_device_by_driver); void __weak __init early_platform_cleanup(void) { } int __init platform_bus_init(void) { int error; early_platform_cleanup(); error = device_register(&platform_bus); if (error) { put_device(&platform_bus); return error; } error = bus_register(&platform_bus_type); if (error) device_unregister(&platform_bus); return error; } |
16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 | /* * Non-physical true random number generator based on timing jitter -- * Jitter RNG standalone code. * * Copyright Stephan Mueller <smueller@chronox.de>, 2015 - 2023 * * Design * ====== * * See https://www.chronox.de/jent.html * * License * ======= * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL2 are * required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ /* * This Jitterentropy RNG is based on the jitterentropy library * version 3.4.0 provided at https://www.chronox.de/jent.html */ #ifdef __OPTIMIZE__ #error "The CPU Jitter random number generator must not be compiled with optimizations. See documentation. Use the compiler switch -O0 for compiling jitterentropy.c." #endif typedef unsigned long long __u64; typedef long long __s64; typedef unsigned int __u32; typedef unsigned char u8; #define NULL ((void *) 0) /* The entropy pool */ struct rand_data { /* SHA3-256 is used as conditioner */ #define DATA_SIZE_BITS 256 /* all data values that are vital to maintain the security * of the RNG are marked as SENSITIVE. A user must not * access that information while the RNG executes its loops to * calculate the next random value. */ void *hash_state; /* SENSITIVE hash state entropy pool */ __u64 prev_time; /* SENSITIVE Previous time stamp */ __u64 last_delta; /* SENSITIVE stuck test */ __s64 last_delta2; /* SENSITIVE stuck test */ unsigned int flags; /* Flags used to initialize */ unsigned int osr; /* Oversample rate */ #define JENT_MEMORY_ACCESSLOOPS 128 #define JENT_MEMORY_SIZE \ (CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS * \ CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE) unsigned char *mem; /* Memory access location with size of * memblocks * memblocksize */ unsigned int memlocation; /* Pointer to byte in *mem */ unsigned int memblocks; /* Number of memory blocks in *mem */ unsigned int memblocksize; /* Size of one memory block in bytes */ unsigned int memaccessloops; /* Number of memory accesses per random * bit generation */ /* Repetition Count Test */ unsigned int rct_count; /* Number of stuck values */ /* Adaptive Proportion Test cutoff values */ unsigned int apt_cutoff; /* Intermittent health test failure */ unsigned int apt_cutoff_permanent; /* Permanent health test failure */ #define JENT_APT_WINDOW_SIZE 512 /* Data window size */ /* LSB of time stamp to process */ #define JENT_APT_LSB 16 #define JENT_APT_WORD_MASK (JENT_APT_LSB - 1) unsigned int apt_observations; /* Number of collected observations */ unsigned int apt_count; /* APT counter */ unsigned int apt_base; /* APT base reference */ unsigned int health_failure; /* Record health failure */ unsigned int apt_base_set:1; /* APT base reference set? */ }; /* Flags that can be used to initialize the RNG */ #define JENT_DISABLE_MEMORY_ACCESS (1<<2) /* Disable memory access for more * entropy, saves MEMORY_SIZE RAM for * entropy collector */ /* -- error codes for init function -- */ #define JENT_ENOTIME 1 /* Timer service not available */ #define JENT_ECOARSETIME 2 /* Timer too coarse for RNG */ #define JENT_ENOMONOTONIC 3 /* Timer is not monotonic increasing */ #define JENT_EVARVAR 5 /* Timer does not produce variations of * variations (2nd derivation of time is * zero). */ #define JENT_ESTUCK 8 /* Too many stuck results during init. */ #define JENT_EHEALTH 9 /* Health test failed during initialization */ #define JENT_ERCT 10 /* RCT failed during initialization */ #define JENT_EHASH 11 /* Hash self test failed */ #define JENT_EMEM 12 /* Can't allocate memory for initialization */ #define JENT_RCT_FAILURE 1 /* Failure in RCT health test. */ #define JENT_APT_FAILURE 2 /* Failure in APT health test. */ #define JENT_PERMANENT_FAILURE_SHIFT 16 #define JENT_PERMANENT_FAILURE(x) (x << JENT_PERMANENT_FAILURE_SHIFT) #define JENT_RCT_FAILURE_PERMANENT JENT_PERMANENT_FAILURE(JENT_RCT_FAILURE) #define JENT_APT_FAILURE_PERMANENT JENT_PERMANENT_FAILURE(JENT_APT_FAILURE) /* * The output n bits can receive more than n bits of min entropy, of course, * but the fixed output of the conditioning function can only asymptotically * approach the output size bits of min entropy, not attain that bound. Random * maps will tend to have output collisions, which reduces the creditable * output entropy (that is what SP 800-90B Section 3.1.5.1.2 attempts to bound). * * The value "64" is justified in Appendix A.4 of the current 90C draft, * and aligns with NIST's in "epsilon" definition in this document, which is * that a string can be considered "full entropy" if you can bound the min * entropy in each bit of output to at least 1-epsilon, where epsilon is * required to be <= 2^(-32). */ #define JENT_ENTROPY_SAFETY_FACTOR 64 #include <linux/fips.h> #include "jitterentropy.h" /*************************************************************************** * Adaptive Proportion Test * * This test complies with SP800-90B section 4.4.2. ***************************************************************************/ /* * See the SP 800-90B comment #10b for the corrected cutoff for the SP 800-90B * APT. * https://www.untruth.org/~josh/sp80090b/UL%20SP800-90B-final%20comments%20v1.9%2020191212.pdf * In the syntax of R, this is C = 2 + qbinom(1 − 2^(−30), 511, 2^(-1/osr)). * (The original formula wasn't correct because the first symbol must * necessarily have been observed, so there is no chance of observing 0 of these * symbols.) * * For the alpha < 2^-53, R cannot be used as it uses a float data type without * arbitrary precision. A SageMath script is used to calculate those cutoff * values. * * For any value above 14, this yields the maximal allowable value of 512 * (by FIPS 140-2 IG 7.19 Resolution # 16, we cannot choose a cutoff value that * renders the test unable to fail). */ static const unsigned int jent_apt_cutoff_lookup[15] = { 325, 422, 459, 477, 488, 494, 499, 502, 505, 507, 508, 509, 510, 511, 512 }; static const unsigned int jent_apt_cutoff_permanent_lookup[15] = { 355, 447, 479, 494, 502, 507, 510, 512, 512, 512, 512, 512, 512, 512, 512 }; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static void jent_apt_init(struct rand_data *ec, unsigned int osr) { /* * Establish the apt_cutoff based on the presumed entropy rate of * 1/osr. */ if (osr >= ARRAY_SIZE(jent_apt_cutoff_lookup)) { ec->apt_cutoff = jent_apt_cutoff_lookup[ ARRAY_SIZE(jent_apt_cutoff_lookup) - 1]; ec->apt_cutoff_permanent = jent_apt_cutoff_permanent_lookup[ ARRAY_SIZE(jent_apt_cutoff_permanent_lookup) - 1]; } else { ec->apt_cutoff = jent_apt_cutoff_lookup[osr - 1]; ec->apt_cutoff_permanent = jent_apt_cutoff_permanent_lookup[osr - 1]; } } /* * Reset the APT counter * * @ec [in] Reference to entropy collector */ static void jent_apt_reset(struct rand_data *ec, unsigned int delta_masked) { /* Reset APT counter */ ec->apt_count = 0; ec->apt_base = delta_masked; ec->apt_observations = 0; } /* * Insert a new entropy event into APT * * @ec [in] Reference to entropy collector * @delta_masked [in] Masked time delta to process */ static void jent_apt_insert(struct rand_data *ec, unsigned int delta_masked) { /* Initialize the base reference */ if (!ec->apt_base_set) { ec->apt_base = delta_masked; ec->apt_base_set = 1; return; } if (delta_masked == ec->apt_base) { ec->apt_count++; /* Note, ec->apt_count starts with one. */ if (ec->apt_count >= ec->apt_cutoff_permanent) ec->health_failure |= JENT_APT_FAILURE_PERMANENT; else if (ec->apt_count >= ec->apt_cutoff) ec->health_failure |= JENT_APT_FAILURE; } ec->apt_observations++; if (ec->apt_observations >= JENT_APT_WINDOW_SIZE) jent_apt_reset(ec, delta_masked); } /*************************************************************************** * Stuck Test and its use as Repetition Count Test * * The Jitter RNG uses an enhanced version of the Repetition Count Test * (RCT) specified in SP800-90B section 4.4.1. Instead of counting identical * back-to-back values, the input to the RCT is the counting of the stuck * values during the generation of one Jitter RNG output block. * * The RCT is applied with an alpha of 2^{-30} compliant to FIPS 140-2 IG 9.8. * * During the counting operation, the Jitter RNG always calculates the RCT * cut-off value of C. If that value exceeds the allowed cut-off value, * the Jitter RNG output block will be calculated completely but discarded at * the end. The caller of the Jitter RNG is informed with an error code. ***************************************************************************/ /* * Repetition Count Test as defined in SP800-90B section 4.4.1 * * @ec [in] Reference to entropy collector * @stuck [in] Indicator whether the value is stuck */ static void jent_rct_insert(struct rand_data *ec, int stuck) { if (stuck) { ec->rct_count++; /* * The cutoff value is based on the following consideration: * alpha = 2^-30 or 2^-60 as recommended in SP800-90B. * In addition, we require an entropy value H of 1/osr as this * is the minimum entropy required to provide full entropy. * Note, we collect (DATA_SIZE_BITS + ENTROPY_SAFETY_FACTOR)*osr * deltas for inserting them into the entropy pool which should * then have (close to) DATA_SIZE_BITS bits of entropy in the * conditioned output. * * Note, ec->rct_count (which equals to value B in the pseudo * code of SP800-90B section 4.4.1) starts with zero. Hence * we need to subtract one from the cutoff value as calculated * following SP800-90B. Thus C = ceil(-log_2(alpha)/H) = 30*osr * or 60*osr. */ if ((unsigned int)ec->rct_count >= (60 * ec->osr)) { ec->rct_count = -1; ec->health_failure |= JENT_RCT_FAILURE_PERMANENT; } else if ((unsigned int)ec->rct_count >= (30 * ec->osr)) { ec->rct_count = -1; ec->health_failure |= JENT_RCT_FAILURE; } } else { /* Reset RCT */ ec->rct_count = 0; } } static inline __u64 jent_delta(__u64 prev, __u64 next) { #define JENT_UINT64_MAX (__u64)(~((__u64) 0)) return (prev < next) ? (next - prev) : (JENT_UINT64_MAX - prev + 1 + next); } /* * Stuck test by checking the: * 1st derivative of the jitter measurement (time delta) * 2nd derivative of the jitter measurement (delta of time deltas) * 3rd derivative of the jitter measurement (delta of delta of time deltas) * * All values must always be non-zero. * * @ec [in] Reference to entropy collector * @current_delta [in] Jitter time delta * * @return * 0 jitter measurement not stuck (good bit) * 1 jitter measurement stuck (reject bit) */ static int jent_stuck(struct rand_data *ec, __u64 current_delta) { __u64 delta2 = jent_delta(ec->last_delta, current_delta); __u64 delta3 = jent_delta(ec->last_delta2, delta2); ec->last_delta = current_delta; ec->last_delta2 = delta2; /* * Insert the result of the comparison of two back-to-back time * deltas. */ jent_apt_insert(ec, current_delta); if (!current_delta || !delta2 || !delta3) { /* RCT with a stuck bit */ jent_rct_insert(ec, 1); return 1; } /* RCT with a non-stuck bit */ jent_rct_insert(ec, 0); return 0; } /* * Report any health test failures * * @ec [in] Reference to entropy collector * * @return a bitmask indicating which tests failed * 0 No health test failure * 1 RCT failure * 2 APT failure * 1<<JENT_PERMANENT_FAILURE_SHIFT RCT permanent failure * 2<<JENT_PERMANENT_FAILURE_SHIFT APT permanent failure */ static unsigned int jent_health_failure(struct rand_data *ec) { /* Test is only enabled in FIPS mode */ if (!fips_enabled) return 0; return ec->health_failure; } /*************************************************************************** * Noise sources ***************************************************************************/ /* * Update of the loop count used for the next round of * an entropy collection. * * Input: * @bits is the number of low bits of the timer to consider * @min is the number of bits we shift the timer value to the right at * the end to make sure we have a guaranteed minimum value * * @return Newly calculated loop counter */ static __u64 jent_loop_shuffle(unsigned int bits, unsigned int min) { __u64 time = 0; __u64 shuffle = 0; unsigned int i = 0; unsigned int mask = (1<<bits) - 1; jent_get_nstime(&time); /* * We fold the time value as much as possible to ensure that as many * bits of the time stamp are included as possible. */ for (i = 0; ((DATA_SIZE_BITS + bits - 1) / bits) > i; i++) { shuffle ^= time & mask; time = time >> bits; } /* * We add a lower boundary value to ensure we have a minimum * RNG loop count. */ return (shuffle + (1<<min)); } /* * CPU Jitter noise source -- this is the noise source based on the CPU * execution time jitter * * This function injects the individual bits of the time value into the * entropy pool using a hash. * * ec [in] entropy collector * time [in] time stamp to be injected * stuck [in] Is the time stamp identified as stuck? * * Output: * updated hash context in the entropy collector or error code */ static int jent_condition_data(struct rand_data *ec, __u64 time, int stuck) { #define SHA3_HASH_LOOP (1<<3) struct { int rct_count; unsigned int apt_observations; unsigned int apt_count; unsigned int apt_base; } addtl = { ec->rct_count, ec->apt_observations, ec->apt_count, ec->apt_base }; return jent_hash_time(ec->hash_state, time, (u8 *)&addtl, sizeof(addtl), SHA3_HASH_LOOP, stuck); } /* * Memory Access noise source -- this is a noise source based on variations in * memory access times * * This function performs memory accesses which will add to the timing * variations due to an unknown amount of CPU wait states that need to be * added when accessing memory. The memory size should be larger than the L1 * caches as outlined in the documentation and the associated testing. * * The L1 cache has a very high bandwidth, albeit its access rate is usually * slower than accessing CPU registers. Therefore, L1 accesses only add minimal * variations as the CPU has hardly to wait. Starting with L2, significant * variations are added because L2 typically does not belong to the CPU any more * and therefore a wider range of CPU wait states is necessary for accesses. * L3 and real memory accesses have even a wider range of wait states. However, * to reliably access either L3 or memory, the ec->mem memory must be quite * large which is usually not desirable. * * @ec [in] Reference to the entropy collector with the memory access data -- if * the reference to the memory block to be accessed is NULL, this noise * source is disabled * @loop_cnt [in] if a value not equal to 0 is set, use the given value * number of loops to perform the LFSR */ static void jent_memaccess(struct rand_data *ec, __u64 loop_cnt) { unsigned int wrap = 0; __u64 i = 0; #define MAX_ACC_LOOP_BIT 7 #define MIN_ACC_LOOP_BIT 0 __u64 acc_loop_cnt = jent_loop_shuffle(MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT); if (NULL == ec || NULL == ec->mem) return; wrap = ec->memblocksize * ec->memblocks; /* * testing purposes -- allow test app to set the counter, not * needed during runtime */ if (loop_cnt) acc_loop_cnt = loop_cnt; for (i = 0; i < (ec->memaccessloops + acc_loop_cnt); i++) { unsigned char *tmpval = ec->mem + ec->memlocation; /* * memory access: just add 1 to one byte, * wrap at 255 -- memory access implies read * from and write to memory location */ *tmpval = (*tmpval + 1) & 0xff; /* * Addition of memblocksize - 1 to pointer * with wrap around logic to ensure that every * memory location is hit evenly */ ec->memlocation = ec->memlocation + ec->memblocksize - 1; ec->memlocation = ec->memlocation % wrap; } } /*************************************************************************** * Start of entropy processing logic ***************************************************************************/ /* * This is the heart of the entropy generation: calculate time deltas and * use the CPU jitter in the time deltas. The jitter is injected into the * entropy pool. * * WARNING: ensure that ->prev_time is primed before using the output * of this function! This can be done by calling this function * and not using its result. * * @ec [in] Reference to entropy collector * * @return result of stuck test */ static int jent_measure_jitter(struct rand_data *ec, __u64 *ret_current_delta) { __u64 time = 0; __u64 current_delta = 0; int stuck; /* Invoke one noise source before time measurement to add variations */ jent_memaccess(ec, 0); /* * Get time stamp and calculate time delta to previous * invocation to measure the timing variations */ jent_get_nstime(&time); current_delta = jent_delta(ec->prev_time, time); ec->prev_time = time; /* Check whether we have a stuck measurement. */ stuck = jent_stuck(ec, current_delta); /* Now call the next noise sources which also injects the data */ if (jent_condition_data(ec, current_delta, stuck)) stuck = 1; /* return the raw entropy value */ if (ret_current_delta) *ret_current_delta = current_delta; return stuck; } /* * Generator of one 64 bit random number * Function fills rand_data->hash_state * * @ec [in] Reference to entropy collector */ static void jent_gen_entropy(struct rand_data *ec) { unsigned int k = 0, safety_factor = 0; if (fips_enabled) safety_factor = JENT_ENTROPY_SAFETY_FACTOR; /* priming of the ->prev_time value */ jent_measure_jitter(ec, NULL); while (!jent_health_failure(ec)) { /* If a stuck measurement is received, repeat measurement */ if (jent_measure_jitter(ec, NULL)) continue; /* * We multiply the loop value with ->osr to obtain the * oversampling rate requested by the caller */ if (++k >= ((DATA_SIZE_BITS + safety_factor) * ec->osr)) break; } } /* * Entry function: Obtain entropy for the caller. * * This function invokes the entropy gathering logic as often to generate * as many bytes as requested by the caller. The entropy gathering logic * creates 64 bit per invocation. * * This function truncates the last 64 bit entropy value output to the exact * size specified by the caller. * * @ec [in] Reference to entropy collector * @data [in] pointer to buffer for storing random data -- buffer must already * exist * @len [in] size of the buffer, specifying also the requested number of random * in bytes * * @return 0 when request is fulfilled or an error * * The following error codes can occur: * -1 entropy_collector is NULL or the generation failed * -2 Intermittent health failure * -3 Permanent health failure */ int jent_read_entropy(struct rand_data *ec, unsigned char *data, unsigned int len) { unsigned char *p = data; if (!ec) return -1; while (len > 0) { unsigned int tocopy, health_test_result; jent_gen_entropy(ec); health_test_result = jent_health_failure(ec); if (health_test_result > JENT_PERMANENT_FAILURE_SHIFT) { /* * At this point, the Jitter RNG instance is considered * as a failed instance. There is no rerun of the * startup test any more, because the caller * is assumed to not further use this instance. */ return -3; } else if (health_test_result) { /* * Perform startup health tests and return permanent * error if it fails. */ if (jent_entropy_init(0, 0, NULL, ec)) { /* Mark the permanent error */ ec->health_failure &= JENT_RCT_FAILURE_PERMANENT | JENT_APT_FAILURE_PERMANENT; return -3; } return -2; } if ((DATA_SIZE_BITS / 8) < len) tocopy = (DATA_SIZE_BITS / 8); else tocopy = len; if (jent_read_random_block(ec->hash_state, p, tocopy)) return -1; len -= tocopy; p += tocopy; } return 0; } /*************************************************************************** * Initialization logic ***************************************************************************/ struct rand_data *jent_entropy_collector_alloc(unsigned int osr, unsigned int flags, void *hash_state) { struct rand_data *entropy_collector; entropy_collector = jent_zalloc(sizeof(struct rand_data)); if (!entropy_collector) return NULL; if (!(flags & JENT_DISABLE_MEMORY_ACCESS)) { /* Allocate memory for adding variations based on memory * access */ entropy_collector->mem = jent_kvzalloc(JENT_MEMORY_SIZE); if (!entropy_collector->mem) { jent_zfree(entropy_collector); return NULL; } entropy_collector->memblocksize = CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE; entropy_collector->memblocks = CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS; entropy_collector->memaccessloops = JENT_MEMORY_ACCESSLOOPS; } /* verify and set the oversampling rate */ if (osr == 0) osr = 1; /* H_submitter = 1 / osr */ entropy_collector->osr = osr; entropy_collector->flags = flags; entropy_collector->hash_state = hash_state; /* Initialize the APT */ jent_apt_init(entropy_collector, osr); /* fill the data pad with non-zero values */ jent_gen_entropy(entropy_collector); return entropy_collector; } void jent_entropy_collector_free(struct rand_data *entropy_collector) { jent_kvzfree(entropy_collector->mem, JENT_MEMORY_SIZE); entropy_collector->mem = NULL; jent_zfree(entropy_collector); } int jent_entropy_init(unsigned int osr, unsigned int flags, void *hash_state, struct rand_data *p_ec) { /* * If caller provides an allocated ec, reuse it which implies that the * health test entropy data is used to further still the available * entropy pool. */ struct rand_data *ec = p_ec; int i, time_backwards = 0, ret = 0, ec_free = 0; unsigned int health_test_result; if (!ec) { ec = jent_entropy_collector_alloc(osr, flags, hash_state); if (!ec) return JENT_EMEM; ec_free = 1; } else { /* Reset the APT */ jent_apt_reset(ec, 0); /* Ensure that a new APT base is obtained */ ec->apt_base_set = 0; /* Reset the RCT */ ec->rct_count = 0; /* Reset intermittent, leave permanent health test result */ ec->health_failure &= (~JENT_RCT_FAILURE); ec->health_failure &= (~JENT_APT_FAILURE); } /* We could perform statistical tests here, but the problem is * that we only have a few loop counts to do testing. These * loop counts may show some slight skew and we produce * false positives. * * Moreover, only old systems show potentially problematic * jitter entropy that could potentially be caught here. But * the RNG is intended for hardware that is available or widely * used, but not old systems that are long out of favor. Thus, * no statistical tests. */ /* * We could add a check for system capabilities such as clock_getres or * check for CONFIG_X86_TSC, but it does not make much sense as the * following sanity checks verify that we have a high-resolution * timer. */ /* * TESTLOOPCOUNT needs some loops to identify edge systems. 100 is * definitely too little. * * SP800-90B requires at least 1024 initial test cycles. */ #define TESTLOOPCOUNT 1024 #define CLEARCACHE 100 for (i = 0; (TESTLOOPCOUNT + CLEARCACHE) > i; i++) { __u64 start_time = 0, end_time = 0, delta = 0; /* Invoke core entropy collection logic */ jent_measure_jitter(ec, &delta); end_time = ec->prev_time; start_time = ec->prev_time - delta; /* test whether timer works */ if (!start_time || !end_time) { ret = JENT_ENOTIME; goto out; } /* * test whether timer is fine grained enough to provide * delta even when called shortly after each other -- this * implies that we also have a high resolution timer */ if (!delta || (end_time == start_time)) { ret = JENT_ECOARSETIME; goto out; } /* * up to here we did not modify any variable that will be * evaluated later, but we already performed some work. Thus we * already have had an impact on the caches, branch prediction, * etc. with the goal to clear it to get the worst case * measurements. */ if (i < CLEARCACHE) continue; /* test whether we have an increasing timer */ if (!(end_time > start_time)) time_backwards++; } /* * we allow up to three times the time running backwards. * CLOCK_REALTIME is affected by adjtime and NTP operations. Thus, * if such an operation just happens to interfere with our test, it * should not fail. The value of 3 should cover the NTP case being * performed during our test run. */ if (time_backwards > 3) { ret = JENT_ENOMONOTONIC; goto out; } /* Did we encounter a health test failure? */ health_test_result = jent_health_failure(ec); if (health_test_result) { ret = (health_test_result & JENT_RCT_FAILURE) ? JENT_ERCT : JENT_EHEALTH; goto out; } out: if (ec_free) jent_entropy_collector_free(ec); return ret; } |
1 2 2 1 1 1 4 4 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 | // SPDX-License-Identifier: GPL-2.0 /* * Interface for controlling IO bandwidth on a request queue * * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/blktrace_api.h> #include "blk.h" #include "blk-cgroup-rwstat.h" #include "blk-stat.h" #include "blk-throttle.h" /* Max dispatch from a group in 1 round */ #define THROTL_GRP_QUANTUM 8 /* Total max dispatch from all groups in one round */ #define THROTL_QUANTUM 32 /* Throttling is performed over a slice and after that slice is renewed */ #define DFL_THROTL_SLICE_HD (HZ / 10) #define DFL_THROTL_SLICE_SSD (HZ / 50) #define MAX_THROTL_SLICE (HZ) /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) /* We measure latency for request size from <= 4k to >= 1M */ #define LATENCY_BUCKET_SIZE 9 struct latency_bucket { unsigned long total_latency; /* ns / 1024 */ int samples; }; struct avg_latency_bucket { unsigned long latency; /* ns / 1024 */ bool valid; }; struct throtl_data { /* service tree for active throtl groups */ struct throtl_service_queue service_queue; struct request_queue *queue; /* Total Number of queued bios on READ and WRITE lists */ unsigned int nr_queued[2]; unsigned int throtl_slice; /* Work for dispatching throttled bios */ struct work_struct dispatch_work; bool track_bio_latency; }; static void throtl_pending_timer_fn(struct timer_list *t); static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg) { return pd_to_blkg(&tg->pd); } /** * sq_to_tg - return the throl_grp the specified service queue belongs to * @sq: the throtl_service_queue of interest * * Return the throtl_grp @sq belongs to. If @sq is the top-level one * embedded in throtl_data, %NULL is returned. */ static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq) { if (sq && sq->parent_sq) return container_of(sq, struct throtl_grp, service_queue); else return NULL; } /** * sq_to_td - return throtl_data the specified service queue belongs to * @sq: the throtl_service_queue of interest * * A service_queue can be embedded in either a throtl_grp or throtl_data. * Determine the associated throtl_data accordingly and return it. */ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq) { struct throtl_grp *tg = sq_to_tg(sq); if (tg) return tg->td; else return container_of(sq, struct throtl_data, service_queue); } static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw) { struct blkcg_gq *blkg = tg_to_blkg(tg); if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) return U64_MAX; return tg->bps[rw]; } static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) { struct blkcg_gq *blkg = tg_to_blkg(tg); if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) return UINT_MAX; return tg->iops[rw]; } #define request_bucket_index(sectors) \ clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1) /** * throtl_log - log debug message via blktrace * @sq: the service_queue being reported * @fmt: printf format string * @args: printf args * * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a * throtl_grp; otherwise, just "throtl". */ #define throtl_log(sq, fmt, args...) do { \ struct throtl_grp *__tg = sq_to_tg((sq)); \ struct throtl_data *__td = sq_to_td((sq)); \ \ (void)__td; \ if (likely(!blk_trace_note_message_enabled(__td->queue))) \ break; \ if ((__tg)) { \ blk_add_cgroup_trace_msg(__td->queue, \ &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\ } else { \ blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \ } \ } while (0) static inline unsigned int throtl_bio_data_size(struct bio *bio) { /* assume it's one sector */ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) return 512; return bio->bi_iter.bi_size; } static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(&qn->node); bio_list_init(&qn->bios); qn->tg = tg; } /** * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it * @bio: bio being added * @qn: qnode to add bio to * @queued: the service_queue->queued[] list @qn belongs to * * Add @bio to @qn and put @qn on @queued if it's not already on. * @qn->tg's reference count is bumped when @qn is activated. See the * comment on top of throtl_qnode definition for details. */ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, struct list_head *queued) { bio_list_add(&qn->bios, bio); if (list_empty(&qn->node)) { list_add_tail(&qn->node, queued); blkg_get(tg_to_blkg(qn->tg)); } } /** * throtl_peek_queued - peek the first bio on a qnode list * @queued: the qnode list to peek */ static struct bio *throtl_peek_queued(struct list_head *queued) { struct throtl_qnode *qn; struct bio *bio; if (list_empty(queued)) return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); bio = bio_list_peek(&qn->bios); WARN_ON_ONCE(!bio); return bio; } /** * throtl_pop_queued - pop the first bio form a qnode list * @queued: the qnode list to pop a bio from * @tg_to_put: optional out argument for throtl_grp to put * * Pop the first bio from the qnode list @queued. After popping, the first * qnode is removed from @queued if empty or moved to the end of @queued so * that the popping order is round-robin. * * When the first qnode is removed, its associated throtl_grp should be put * too. If @tg_to_put is NULL, this function automatically puts it; * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is * responsible for putting it. */ static struct bio *throtl_pop_queued(struct list_head *queued, struct throtl_grp **tg_to_put) { struct throtl_qnode *qn; struct bio *bio; if (list_empty(queued)) return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); bio = bio_list_pop(&qn->bios); WARN_ON_ONCE(!bio); if (bio_list_empty(&qn->bios)) { list_del_init(&qn->node); if (tg_to_put) *tg_to_put = qn->tg; else blkg_put(tg_to_blkg(qn->tg)); } else { list_move_tail(&qn->node, queued); } return bio; } /* init a service_queue, assumes the caller zeroed it */ static void throtl_service_queue_init(struct throtl_service_queue *sq) { INIT_LIST_HEAD(&sq->queued[READ]); INIT_LIST_HEAD(&sq->queued[WRITE]); sq->pending_tree = RB_ROOT_CACHED; timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); } static struct blkg_policy_data *throtl_pd_alloc(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { struct throtl_grp *tg; int rw; tg = kzalloc_node(sizeof(*tg), gfp, disk->node_id); if (!tg) return NULL; if (blkg_rwstat_init(&tg->stat_bytes, gfp)) goto err_free_tg; if (blkg_rwstat_init(&tg->stat_ios, gfp)) goto err_exit_stat_bytes; throtl_service_queue_init(&tg->service_queue); for (rw = READ; rw <= WRITE; rw++) { throtl_qnode_init(&tg->qnode_on_self[rw], tg); throtl_qnode_init(&tg->qnode_on_parent[rw], tg); } RB_CLEAR_NODE(&tg->rb_node); tg->bps[READ] = U64_MAX; tg->bps[WRITE] = U64_MAX; tg->iops[READ] = UINT_MAX; tg->iops[WRITE] = UINT_MAX; return &tg->pd; err_exit_stat_bytes: blkg_rwstat_exit(&tg->stat_bytes); err_free_tg: kfree(tg); return NULL; } static void throtl_pd_init(struct blkg_policy_data *pd) { struct throtl_grp *tg = pd_to_tg(pd); struct blkcg_gq *blkg = tg_to_blkg(tg); struct throtl_data *td = blkg->q->td; struct throtl_service_queue *sq = &tg->service_queue; /* * If on the default hierarchy, we switch to properly hierarchical * behavior where limits on a given throtl_grp are applied to the * whole subtree rather than just the group itself. e.g. If 16M * read_bps limit is set on a parent group, summary bps of * parent group and its subtree groups can't exceed 16M for the * device. * * If not on the default hierarchy, the broken flat hierarchy * behavior is retained where all throtl_grps are treated as if * they're all separate root groups right below throtl_data. * Limits of a group don't interact with limits of other groups * regardless of the position of the group in the hierarchy. */ sq->parent_sq = &td->service_queue; if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; tg->td = td; } /* * Set has_rules[] if @tg or any of its parents have limits configured. * This doesn't require walking up to the top of the hierarchy as the * parent's has_rules[] is guaranteed to be correct. */ static void tg_update_has_rules(struct throtl_grp *tg) { struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq); int rw; for (rw = READ; rw <= WRITE; rw++) { tg->has_rules_iops[rw] = (parent_tg && parent_tg->has_rules_iops[rw]) || tg_iops_limit(tg, rw) != UINT_MAX; tg->has_rules_bps[rw] = (parent_tg && parent_tg->has_rules_bps[rw]) || tg_bps_limit(tg, rw) != U64_MAX; } } static void throtl_pd_online(struct blkg_policy_data *pd) { struct throtl_grp *tg = pd_to_tg(pd); /* * We don't want new groups to escape the limits of its ancestors. * Update has_rules[] after a new group is brought online. */ tg_update_has_rules(tg); } static void throtl_pd_free(struct blkg_policy_data *pd) { struct throtl_grp *tg = pd_to_tg(pd); del_timer_sync(&tg->service_queue.pending_timer); blkg_rwstat_exit(&tg->stat_bytes); blkg_rwstat_exit(&tg->stat_ios); kfree(tg); } static struct throtl_grp * throtl_rb_first(struct throtl_service_queue *parent_sq) { struct rb_node *n; n = rb_first_cached(&parent_sq->pending_tree); WARN_ON_ONCE(!n); if (!n) return NULL; return rb_entry_tg(n); } static void throtl_rb_erase(struct rb_node *n, struct throtl_service_queue *parent_sq) { rb_erase_cached(n, &parent_sq->pending_tree); RB_CLEAR_NODE(n); } static void update_min_dispatch_time(struct throtl_service_queue *parent_sq) { struct throtl_grp *tg; tg = throtl_rb_first(parent_sq); if (!tg) return; parent_sq->first_pending_disptime = tg->disptime; } static void tg_service_queue_add(struct throtl_grp *tg) { struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node; struct rb_node *parent = NULL; struct throtl_grp *__tg; unsigned long key = tg->disptime; bool leftmost = true; while (*node != NULL) { parent = *node; __tg = rb_entry_tg(parent); if (time_before(key, __tg->disptime)) node = &parent->rb_left; else { node = &parent->rb_right; leftmost = false; } } rb_link_node(&tg->rb_node, parent, node); rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree, leftmost); } static void throtl_enqueue_tg(struct throtl_grp *tg) { if (!(tg->flags & THROTL_TG_PENDING)) { tg_service_queue_add(tg); tg->flags |= THROTL_TG_PENDING; tg->service_queue.parent_sq->nr_pending++; } } static void throtl_dequeue_tg(struct throtl_grp *tg) { if (tg->flags & THROTL_TG_PENDING) { struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; throtl_rb_erase(&tg->rb_node, parent_sq); --parent_sq->nr_pending; tg->flags &= ~THROTL_TG_PENDING; } } /* Call with queue lock held */ static void throtl_schedule_pending_timer(struct throtl_service_queue *sq, unsigned long expires) { unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice; /* * Since we are adjusting the throttle limit dynamically, the sleep * time calculated according to previous limit might be invalid. It's * possible the cgroup sleep time is very long and no other cgroups * have IO running so notify the limit changes. Make sure the cgroup * doesn't sleep too long to avoid the missed notification. */ if (time_after(expires, max_expire)) expires = max_expire; mod_timer(&sq->pending_timer, expires); throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu", expires - jiffies, jiffies); } /** * throtl_schedule_next_dispatch - schedule the next dispatch cycle * @sq: the service_queue to schedule dispatch for * @force: force scheduling * * Arm @sq->pending_timer so that the next dispatch cycle starts on the * dispatch time of the first pending child. Returns %true if either timer * is armed or there's no pending child left. %false if the current * dispatch window is still open and the caller should continue * dispatching. * * If @force is %true, the dispatch timer is always scheduled and this * function is guaranteed to return %true. This is to be used when the * caller can't dispatch itself and needs to invoke pending_timer * unconditionally. Note that forced scheduling is likely to induce short * delay before dispatch starts even if @sq->first_pending_disptime is not * in the future and thus shouldn't be used in hot paths. */ static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq, bool force) { /* any pending children left? */ if (!sq->nr_pending) return true; update_min_dispatch_time(sq); /* is the next dispatch time in the future? */ if (force || time_after(sq->first_pending_disptime, jiffies)) { throtl_schedule_pending_timer(sq, sq->first_pending_disptime); return true; } /* tell the caller to continue dispatching */ return false; } static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg, bool rw, unsigned long start) { tg->bytes_disp[rw] = 0; tg->io_disp[rw] = 0; tg->carryover_bytes[rw] = 0; tg->carryover_ios[rw] = 0; /* * Previous slice has expired. We must have trimmed it after last * bio dispatch. That means since start of last slice, we never used * that bandwidth. Do try to make use of that bandwidth while giving * credit. */ if (time_after(start, tg->slice_start[rw])) tg->slice_start[rw] = start; tg->slice_end[rw] = jiffies + tg->td->throtl_slice; throtl_log(&tg->service_queue, "[%c] new slice with credit start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', tg->slice_start[rw], tg->slice_end[rw], jiffies); } static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw, bool clear_carryover) { tg->bytes_disp[rw] = 0; tg->io_disp[rw] = 0; tg->slice_start[rw] = jiffies; tg->slice_end[rw] = jiffies + tg->td->throtl_slice; if (clear_carryover) { tg->carryover_bytes[rw] = 0; tg->carryover_ios[rw] = 0; } throtl_log(&tg->service_queue, "[%c] new slice start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', tg->slice_start[rw], tg->slice_end[rw], jiffies); } static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw, unsigned long jiffy_end) { tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice); } static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, unsigned long jiffy_end) { throtl_set_slice_end(tg, rw, jiffy_end); throtl_log(&tg->service_queue, "[%c] extend slice start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', tg->slice_start[rw], tg->slice_end[rw], jiffies); } /* Determine if previously allocated or extended slice is complete or not */ static bool throtl_slice_used(struct throtl_grp *tg, bool rw) { if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) return false; return true; } static unsigned int calculate_io_allowed(u32 iops_limit, unsigned long jiffy_elapsed) { unsigned int io_allowed; u64 tmp; /* * jiffy_elapsed should not be a big value as minimum iops can be * 1 then at max jiffy elapsed should be equivalent of 1 second as we * will allow dispatch after 1 second and after that slice should * have been trimmed. */ tmp = (u64)iops_limit * jiffy_elapsed; do_div(tmp, HZ); if (tmp > UINT_MAX) io_allowed = UINT_MAX; else io_allowed = tmp; return io_allowed; } static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed) { /* * Can result be wider than 64 bits? * We check against 62, not 64, due to ilog2 truncation. */ if (ilog2(bps_limit) + ilog2(jiffy_elapsed) - ilog2(HZ) > 62) return U64_MAX; return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ); } /* Trim the used slices and adjust slice start accordingly */ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw) { unsigned long time_elapsed; long long bytes_trim; int io_trim; BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw])); /* * If bps are unlimited (-1), then time slice don't get * renewed. Don't try to trim the slice if slice is used. A new * slice will start when appropriate. */ if (throtl_slice_used(tg, rw)) return; /* * A bio has been dispatched. Also adjust slice_end. It might happen * that initially cgroup limit was very low resulting in high * slice_end, but later limit was bumped up and bio was dispatched * sooner, then we need to reduce slice_end. A high bogus slice_end * is bad because it does not allow new slice to start. */ throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice); time_elapsed = rounddown(jiffies - tg->slice_start[rw], tg->td->throtl_slice); if (!time_elapsed) return; bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw), time_elapsed) + tg->carryover_bytes[rw]; io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed) + tg->carryover_ios[rw]; if (bytes_trim <= 0 && io_trim <= 0) return; tg->carryover_bytes[rw] = 0; if ((long long)tg->bytes_disp[rw] >= bytes_trim) tg->bytes_disp[rw] -= bytes_trim; else tg->bytes_disp[rw] = 0; tg->carryover_ios[rw] = 0; if ((int)tg->io_disp[rw] >= io_trim) tg->io_disp[rw] -= io_trim; else tg->io_disp[rw] = 0; tg->slice_start[rw] += time_elapsed; throtl_log(&tg->service_queue, "[%c] trim slice nr=%lu bytes=%lld io=%d start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', time_elapsed / tg->td->throtl_slice, bytes_trim, io_trim, tg->slice_start[rw], tg->slice_end[rw], jiffies); } static void __tg_update_carryover(struct throtl_grp *tg, bool rw) { unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw]; u64 bps_limit = tg_bps_limit(tg, rw); u32 iops_limit = tg_iops_limit(tg, rw); /* * If config is updated while bios are still throttled, calculate and * accumulate how many bytes/ios are waited across changes. And * carryover_bytes/ios will be used to calculate new wait time under new * configuration. */ if (bps_limit != U64_MAX) tg->carryover_bytes[rw] += calculate_bytes_allowed(bps_limit, jiffy_elapsed) - tg->bytes_disp[rw]; if (iops_limit != UINT_MAX) tg->carryover_ios[rw] += calculate_io_allowed(iops_limit, jiffy_elapsed) - tg->io_disp[rw]; } static void tg_update_carryover(struct throtl_grp *tg) { if (tg->service_queue.nr_queued[READ]) __tg_update_carryover(tg, READ); if (tg->service_queue.nr_queued[WRITE]) __tg_update_carryover(tg, WRITE); /* see comments in struct throtl_grp for meaning of these fields. */ throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__, tg->carryover_bytes[READ], tg->carryover_bytes[WRITE], tg->carryover_ios[READ], tg->carryover_ios[WRITE]); } static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio, u32 iops_limit) { bool rw = bio_data_dir(bio); int io_allowed; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; if (iops_limit == UINT_MAX) { return 0; } jiffy_elapsed = jiffies - tg->slice_start[rw]; /* Round up to the next throttle slice, wait time must be nonzero */ jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice); io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) + tg->carryover_ios[rw]; if (io_allowed > 0 && tg->io_disp[rw] + 1 <= io_allowed) return 0; /* Calc approx time to dispatch */ jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed; return jiffy_wait; } static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio, u64 bps_limit) { bool rw = bio_data_dir(bio); long long bytes_allowed; u64 extra_bytes; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned int bio_size = throtl_bio_data_size(bio); /* no need to throttle if this bio's bytes have been accounted */ if (bps_limit == U64_MAX || bio_flagged(bio, BIO_BPS_THROTTLED)) { return 0; } jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; /* Slice has just started. Consider one slice interval */ if (!jiffy_elapsed) jiffy_elapsed_rnd = tg->td->throtl_slice; jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) + tg->carryover_bytes[rw]; if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed) return 0; /* Calc approx time to dispatch */ extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit); if (!jiffy_wait) jiffy_wait = 1; /* * This wait time is without taking into consideration the rounding * up we did. Add that time also. */ jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed); return jiffy_wait; } /* * Returns whether one can dispatch a bio or not. Also returns approx number * of jiffies to wait before this bio is with-in IO rate and can be dispatched */ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, unsigned long *wait) { bool rw = bio_data_dir(bio); unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; u64 bps_limit = tg_bps_limit(tg, rw); u32 iops_limit = tg_iops_limit(tg, rw); /* * Currently whole state machine of group depends on first bio * queued in the group bio list. So one should not be calling * this function with a different bio if there are other bios * queued. */ BUG_ON(tg->service_queue.nr_queued[rw] && bio != throtl_peek_queued(&tg->service_queue.queued[rw])); /* If tg->bps = -1, then BW is unlimited */ if ((bps_limit == U64_MAX && iops_limit == UINT_MAX) || tg->flags & THROTL_TG_CANCELING) { if (wait) *wait = 0; return true; } /* * If previous slice expired, start a new one otherwise renew/extend * existing slice to make sure it is at least throtl_slice interval * long since now. New slice is started only for empty throttle group. * If there is queued bio, that means there should be an active * slice and it should be extended instead. */ if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw])) throtl_start_new_slice(tg, rw, true); else { if (time_before(tg->slice_end[rw], jiffies + tg->td->throtl_slice)) throtl_extend_slice(tg, rw, jiffies + tg->td->throtl_slice); } bps_wait = tg_within_bps_limit(tg, bio, bps_limit); iops_wait = tg_within_iops_limit(tg, bio, iops_limit); if (bps_wait + iops_wait == 0) { if (wait) *wait = 0; return true; } max_wait = max(bps_wait, iops_wait); if (wait) *wait = max_wait; if (time_before(tg->slice_end[rw], jiffies + max_wait)) throtl_extend_slice(tg, rw, jiffies + max_wait); return false; } static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); unsigned int bio_size = throtl_bio_data_size(bio); /* Charge the bio to the group */ if (!bio_flagged(bio, BIO_BPS_THROTTLED)) { tg->bytes_disp[rw] += bio_size; tg->last_bytes_disp[rw] += bio_size; } tg->io_disp[rw]++; tg->last_io_disp[rw]++; } /** * throtl_add_bio_tg - add a bio to the specified throtl_grp * @bio: bio to add * @qn: qnode to use * @tg: the target throtl_grp * * Add @bio to @tg's service_queue using @qn. If @qn is not specified, * tg->qnode_on_self[] is used. */ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn, struct throtl_grp *tg) { struct throtl_service_queue *sq = &tg->service_queue; bool rw = bio_data_dir(bio); if (!qn) qn = &tg->qnode_on_self[rw]; /* * If @tg doesn't currently have any bios queued in the same * direction, queueing @bio can change when @tg should be * dispatched. Mark that @tg was empty. This is automatically * cleared on the next tg_update_disptime(). */ if (!sq->nr_queued[rw]) tg->flags |= THROTL_TG_WAS_EMPTY; throtl_qnode_add_bio(bio, qn, &sq->queued[rw]); sq->nr_queued[rw]++; throtl_enqueue_tg(tg); } static void tg_update_disptime(struct throtl_grp *tg) { struct throtl_service_queue *sq = &tg->service_queue; unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; struct bio *bio; bio = throtl_peek_queued(&sq->queued[READ]); if (bio) tg_may_dispatch(tg, bio, &read_wait); bio = throtl_peek_queued(&sq->queued[WRITE]); if (bio) tg_may_dispatch(tg, bio, &write_wait); min_wait = min(read_wait, write_wait); disptime = jiffies + min_wait; /* Update dispatch time */ throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq); tg->disptime = disptime; tg_service_queue_add(tg); /* see throtl_add_bio_tg() */ tg->flags &= ~THROTL_TG_WAS_EMPTY; } static void start_parent_slice_with_credit(struct throtl_grp *child_tg, struct throtl_grp *parent_tg, bool rw) { if (throtl_slice_used(parent_tg, rw)) { throtl_start_new_slice_with_credit(parent_tg, rw, child_tg->slice_start[rw]); } } static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw) { struct throtl_service_queue *sq = &tg->service_queue; struct throtl_service_queue *parent_sq = sq->parent_sq; struct throtl_grp *parent_tg = sq_to_tg(parent_sq); struct throtl_grp *tg_to_put = NULL; struct bio *bio; /* * @bio is being transferred from @tg to @parent_sq. Popping a bio * from @tg may put its reference and @parent_sq might end up * getting released prematurely. Remember the tg to put and put it * after @bio is transferred to @parent_sq. */ bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); sq->nr_queued[rw]--; throtl_charge_bio(tg, bio); /* * If our parent is another tg, we just need to transfer @bio to * the parent using throtl_add_bio_tg(). If our parent is * @td->service_queue, @bio is ready to be issued. Put it on its * bio_lists[] and decrease total number queued. The caller is * responsible for issuing these bios. */ if (parent_tg) { throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg); start_parent_slice_with_credit(tg, parent_tg, rw); } else { bio_set_flag(bio, BIO_BPS_THROTTLED); throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw], &parent_sq->queued[rw]); BUG_ON(tg->td->nr_queued[rw] <= 0); tg->td->nr_queued[rw]--; } throtl_trim_slice(tg, rw); if (tg_to_put) blkg_put(tg_to_blkg(tg_to_put)); } static int throtl_dispatch_tg(struct throtl_grp *tg) { struct throtl_service_queue *sq = &tg->service_queue; unsigned int nr_reads = 0, nr_writes = 0; unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4; unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads; struct bio *bio; /* Try to dispatch 75% READS and 25% WRITES */ while ((bio = throtl_peek_queued(&sq->queued[READ])) && tg_may_dispatch(tg, bio, NULL)) { tg_dispatch_one_bio(tg, READ); nr_reads++; if (nr_reads >= max_nr_reads) break; } while ((bio = throtl_peek_queued(&sq->queued[WRITE])) && tg_may_dispatch(tg, bio, NULL)) { tg_dispatch_one_bio(tg, WRITE); nr_writes++; if (nr_writes >= max_nr_writes) break; } return nr_reads + nr_writes; } static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) { unsigned int nr_disp = 0; while (1) { struct throtl_grp *tg; struct throtl_service_queue *sq; if (!parent_sq->nr_pending) break; tg = throtl_rb_first(parent_sq); if (!tg) break; if (time_before(jiffies, tg->disptime)) break; nr_disp += throtl_dispatch_tg(tg); sq = &tg->service_queue; if (sq->nr_queued[READ] || sq->nr_queued[WRITE]) tg_update_disptime(tg); else throtl_dequeue_tg(tg); if (nr_disp >= THROTL_QUANTUM) break; } return nr_disp; } /** * throtl_pending_timer_fn - timer function for service_queue->pending_timer * @t: the pending_timer member of the throtl_service_queue being serviced * * This timer is armed when a child throtl_grp with active bio's become * pending and queued on the service_queue's pending_tree and expires when * the first child throtl_grp should be dispatched. This function * dispatches bio's from the children throtl_grps to the parent * service_queue. * * If the parent's parent is another throtl_grp, dispatching is propagated * by either arming its pending_timer or repeating dispatch directly. If * the top-level service_tree is reached, throtl_data->dispatch_work is * kicked so that the ready bio's are issued. */ static void throtl_pending_timer_fn(struct timer_list *t) { struct throtl_service_queue *sq = from_timer(sq, t, pending_timer); struct throtl_grp *tg = sq_to_tg(sq); struct throtl_data *td = sq_to_td(sq); struct throtl_service_queue *parent_sq; struct request_queue *q; bool dispatched; int ret; /* throtl_data may be gone, so figure out request queue by blkg */ if (tg) q = tg->pd.blkg->q; else q = td->queue; spin_lock_irq(&q->queue_lock); if (!q->root_blkg) goto out_unlock; again: parent_sq = sq->parent_sq; dispatched = false; while (true) { throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u", sq->nr_queued[READ] + sq->nr_queued[WRITE], sq->nr_queued[READ], sq->nr_queued[WRITE]); ret = throtl_select_dispatch(sq); if (ret) { throtl_log(sq, "bios disp=%u", ret); dispatched = true; } if (throtl_schedule_next_dispatch(sq, false)) break; /* this dispatch windows is still open, relax and repeat */ spin_unlock_irq(&q->queue_lock); cpu_relax(); spin_lock_irq(&q->queue_lock); } if (!dispatched) goto out_unlock; if (parent_sq) { /* @parent_sq is another throl_grp, propagate dispatch */ if (tg->flags & THROTL_TG_WAS_EMPTY) { tg_update_disptime(tg); if (!throtl_schedule_next_dispatch(parent_sq, false)) { /* window is already open, repeat dispatching */ sq = parent_sq; tg = sq_to_tg(sq); goto again; } } } else { /* reached the top-level, queue issuing */ queue_work(kthrotld_workqueue, &td->dispatch_work); } out_unlock: spin_unlock_irq(&q->queue_lock); } /** * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work * @work: work item being executed * * This function is queued for execution when bios reach the bio_lists[] * of throtl_data->service_queue. Those bios are ready and issued by this * function. */ static void blk_throtl_dispatch_work_fn(struct work_struct *work) { struct throtl_data *td = container_of(work, struct throtl_data, dispatch_work); struct throtl_service_queue *td_sq = &td->service_queue; struct request_queue *q = td->queue; struct bio_list bio_list_on_stack; struct bio *bio; struct blk_plug plug; int rw; bio_list_init(&bio_list_on_stack); spin_lock_irq(&q->queue_lock); for (rw = READ; rw <= WRITE; rw++) while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL))) bio_list_add(&bio_list_on_stack, bio); spin_unlock_irq(&q->queue_lock); if (!bio_list_empty(&bio_list_on_stack)) { blk_start_plug(&plug); while ((bio = bio_list_pop(&bio_list_on_stack))) submit_bio_noacct_nocheck(bio); blk_finish_plug(&plug); } } static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct throtl_grp *tg = pd_to_tg(pd); u64 v = *(u64 *)((void *)tg + off); if (v == U64_MAX) return 0; return __blkg_prfill_u64(sf, pd, v); } static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct throtl_grp *tg = pd_to_tg(pd); unsigned int v = *(unsigned int *)((void *)tg + off); if (v == UINT_MAX) return 0; return __blkg_prfill_u64(sf, pd, v); } static int tg_print_conf_u64(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64, &blkcg_policy_throtl, seq_cft(sf)->private, false); return 0; } static int tg_print_conf_uint(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint, &blkcg_policy_throtl, seq_cft(sf)->private, false); return 0; } static void tg_conf_updated(struct throtl_grp *tg, bool global) { struct throtl_service_queue *sq = &tg->service_queue; struct cgroup_subsys_state *pos_css; struct blkcg_gq *blkg; throtl_log(&tg->service_queue, "limit change rbps=%llu wbps=%llu riops=%u wiops=%u", tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE), tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE)); rcu_read_lock(); /* * Update has_rules[] flags for the updated tg's subtree. A tg is * considered to have rules if either the tg itself or any of its * ancestors has rules. This identifies groups without any * restrictions in the whole hierarchy and allows them to bypass * blk-throttle. */ blkg_for_each_descendant_pre(blkg, pos_css, global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) { struct throtl_grp *this_tg = blkg_to_tg(blkg); tg_update_has_rules(this_tg); /* ignore root/second level */ if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent || !blkg->parent->parent) continue; } rcu_read_unlock(); /* * We're already holding queue_lock and know @tg is valid. Let's * apply the new config directly. * * Restart the slices for both READ and WRITES. It might happen * that a group's limit are dropped suddenly and we don't want to * account recently dispatched IO with new low rate. */ throtl_start_new_slice(tg, READ, false); throtl_start_new_slice(tg, WRITE, false); if (tg->flags & THROTL_TG_PENDING) { tg_update_disptime(tg); throtl_schedule_next_dispatch(sq->parent_sq, true); } } static int blk_throtl_init(struct gendisk *disk) { struct request_queue *q = disk->queue; struct throtl_data *td; int ret; td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); if (!td) return -ENOMEM; INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn); throtl_service_queue_init(&td->service_queue); /* * Freeze queue before activating policy, to synchronize with IO path, * which is protected by 'q_usage_counter'. */ blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); q->td = td; td->queue = q; /* activate policy */ ret = blkcg_activate_policy(disk, &blkcg_policy_throtl); if (ret) { q->td = NULL; kfree(td); goto out; } if (blk_queue_nonrot(q)) td->throtl_slice = DFL_THROTL_SLICE_SSD; else td->throtl_slice = DFL_THROTL_SLICE_HD; td->track_bio_latency = !queue_is_mq(q); if (!td->track_bio_latency) blk_stat_enable_accounting(q); out: blk_mq_unquiesce_queue(disk->queue); blk_mq_unfreeze_queue(disk->queue); return ret; } static ssize_t tg_set_conf(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off, bool is_u64) { struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkg_conf_ctx ctx; struct throtl_grp *tg; int ret; u64 v; blkg_conf_init(&ctx, buf); ret = blkg_conf_open_bdev(&ctx); if (ret) goto out_finish; if (!blk_throtl_activated(ctx.bdev->bd_queue)) { ret = blk_throtl_init(ctx.bdev->bd_disk); if (ret) goto out_finish; } ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx); if (ret) goto out_finish; ret = -EINVAL; if (sscanf(ctx.body, "%llu", &v) != 1) goto out_finish; if (!v) v = U64_MAX; tg = blkg_to_tg(ctx.blkg); tg_update_carryover(tg); if (is_u64) *(u64 *)((void *)tg + of_cft(of)->private) = v; else *(unsigned int *)((void *)tg + of_cft(of)->private) = v; tg_conf_updated(tg, false); ret = 0; out_finish: blkg_conf_exit(&ctx); return ret ?: nbytes; } static ssize_t tg_set_conf_u64(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return tg_set_conf(of, buf, nbytes, off, true); } static ssize_t tg_set_conf_uint(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return tg_set_conf(of, buf, nbytes, off, false); } static int tg_print_rwstat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, &blkcg_policy_throtl, seq_cft(sf)->private, true); return 0; } static u64 tg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample sum; blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); } static int tg_print_rwstat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_rwstat_recursive, &blkcg_policy_throtl, seq_cft(sf)->private, true); return 0; } static struct cftype throtl_legacy_files[] = { { .name = "throttle.read_bps_device", .private = offsetof(struct throtl_grp, bps[READ]), .seq_show = tg_print_conf_u64, .write = tg_set_conf_u64, }, { .name = "throttle.write_bps_device", .private = offsetof(struct throtl_grp, bps[WRITE]), .seq_show = tg_print_conf_u64, .write = tg_set_conf_u64, }, { .name = "throttle.read_iops_device", .private = offsetof(struct throtl_grp, iops[READ]), .seq_show = tg_print_conf_uint, .write = tg_set_conf_uint, }, { .name = "throttle.write_iops_device", .private = offsetof(struct throtl_grp, iops[WRITE]), .seq_show = tg_print_conf_uint, .write = tg_set_conf_uint, }, { .name = "throttle.io_service_bytes", .private = offsetof(struct throtl_grp, stat_bytes), .seq_show = tg_print_rwstat, }, { .name = "throttle.io_service_bytes_recursive", .private = offsetof(struct throtl_grp, stat_bytes), .seq_show = tg_print_rwstat_recursive, }, { .name = "throttle.io_serviced", .private = offsetof(struct throtl_grp, stat_ios), .seq_show = tg_print_rwstat, }, { .name = "throttle.io_serviced_recursive", .private = offsetof(struct throtl_grp, stat_ios), .seq_show = tg_print_rwstat_recursive, }, { } /* terminate */ }; static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct throtl_grp *tg = pd_to_tg(pd); const char *dname = blkg_dev_name(pd->blkg); u64 bps_dft; unsigned int iops_dft; if (!dname) return 0; bps_dft = U64_MAX; iops_dft = UINT_MAX; if (tg->bps_conf[READ] == bps_dft && tg->bps_conf[WRITE] == bps_dft && tg->iops_conf[READ] == iops_dft && tg->iops_conf[WRITE] == iops_dft) return 0; seq_printf(sf, "%s", dname); if (tg->bps_conf[READ] == U64_MAX) seq_printf(sf, " rbps=max"); else seq_printf(sf, " rbps=%llu", tg->bps_conf[READ]); if (tg->bps_conf[WRITE] == U64_MAX) seq_printf(sf, " wbps=max"); else seq_printf(sf, " wbps=%llu", tg->bps_conf[WRITE]); if (tg->iops_conf[READ] == UINT_MAX) seq_printf(sf, " riops=max"); else seq_printf(sf, " riops=%u", tg->iops_conf[READ]); if (tg->iops_conf[WRITE] == UINT_MAX) seq_printf(sf, " wiops=max"); else seq_printf(sf, " wiops=%u", tg->iops_conf[WRITE]); seq_printf(sf, "\n"); return 0; } static int tg_print_limit(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit, &blkcg_policy_throtl, seq_cft(sf)->private, false); return 0; } static ssize_t tg_set_limit(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkg_conf_ctx ctx; struct throtl_grp *tg; u64 v[4]; int ret; blkg_conf_init(&ctx, buf); ret = blkg_conf_open_bdev(&ctx); if (ret) goto out_finish; if (!blk_throtl_activated(ctx.bdev->bd_queue)) { ret = blk_throtl_init(ctx.bdev->bd_disk); if (ret) goto out_finish; } ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx); if (ret) goto out_finish; tg = blkg_to_tg(ctx.blkg); tg_update_carryover(tg); v[0] = tg->bps[READ]; v[1] = tg->bps[WRITE]; v[2] = tg->iops[READ]; v[3] = tg->iops[WRITE]; while (true) { char tok[27]; /* wiops=18446744073709551616 */ char *p; u64 val = U64_MAX; int len; if (sscanf(ctx.body, "%26s%n", tok, &len) != 1) break; if (tok[0] == '\0') break; ctx.body += len; ret = -EINVAL; p = tok; strsep(&p, "="); if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max"))) goto out_finish; ret = -ERANGE; if (!val) goto out_finish; ret = -EINVAL; if (!strcmp(tok, "rbps") && val > 1) v[0] = val; else if (!strcmp(tok, "wbps") && val > 1) v[1] = val; else if (!strcmp(tok, "riops") && val > 1) v[2] = min_t(u64, val, UINT_MAX); else if (!strcmp(tok, "wiops") && val > 1) v[3] = min_t(u64, val, UINT_MAX); else goto out_finish; } tg->bps[READ] = v[0]; tg->bps[WRITE] = v[1]; tg->iops[READ] = v[2]; tg->iops[WRITE] = v[3]; tg_conf_updated(tg, false); ret = 0; out_finish: blkg_conf_exit(&ctx); return ret ?: nbytes; } static struct cftype throtl_files[] = { { .name = "max", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = tg_print_limit, .write = tg_set_limit, }, { } /* terminate */ }; static void throtl_shutdown_wq(struct request_queue *q) { struct throtl_data *td = q->td; cancel_work_sync(&td->dispatch_work); } struct blkcg_policy blkcg_policy_throtl = { .dfl_cftypes = throtl_files, .legacy_cftypes = throtl_legacy_files, .pd_alloc_fn = throtl_pd_alloc, .pd_init_fn = throtl_pd_init, .pd_online_fn = throtl_pd_online, .pd_free_fn = throtl_pd_free, }; void blk_throtl_cancel_bios(struct gendisk *disk) { struct request_queue *q = disk->queue; struct cgroup_subsys_state *pos_css; struct blkcg_gq *blkg; if (!blk_throtl_activated(q)) return; spin_lock_irq(&q->queue_lock); /* * queue_lock is held, rcu lock is not needed here technically. * However, rcu lock is still held to emphasize that following * path need RCU protection and to prevent warning from lockdep. */ rcu_read_lock(); blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq = &tg->service_queue; /* * Set the flag to make sure throtl_pending_timer_fn() won't * stop until all throttled bios are dispatched. */ tg->flags |= THROTL_TG_CANCELING; /* * Do not dispatch cgroup without THROTL_TG_PENDING or cgroup * will be inserted to service queue without THROTL_TG_PENDING * set in tg_update_disptime below. Then IO dispatched from * child in tg_dispatch_one_bio will trigger double insertion * and corrupt the tree. */ if (!(tg->flags & THROTL_TG_PENDING)) continue; /* * Update disptime after setting the above flag to make sure * throtl_select_dispatch() won't exit without dispatching. */ tg_update_disptime(tg); throtl_schedule_pending_timer(sq, jiffies + 1); } rcu_read_unlock(); spin_unlock_irq(&q->queue_lock); } bool __blk_throtl_bio(struct bio *bio) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); struct blkcg_gq *blkg = bio->bi_blkg; struct throtl_qnode *qn = NULL; struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq; bool rw = bio_data_dir(bio); bool throttled = false; struct throtl_data *td = tg->td; rcu_read_lock(); spin_lock_irq(&q->queue_lock); sq = &tg->service_queue; while (true) { if (tg->last_low_overflow_time[rw] == 0) tg->last_low_overflow_time[rw] = jiffies; /* throtl is FIFO - if bios are already queued, should queue */ if (sq->nr_queued[rw]) break; /* if above limits, break to queue */ if (!tg_may_dispatch(tg, bio, NULL)) { tg->last_low_overflow_time[rw] = jiffies; break; } /* within limits, let's charge and dispatch directly */ throtl_charge_bio(tg, bio); /* * We need to trim slice even when bios are not being queued * otherwise it might happen that a bio is not queued for * a long time and slice keeps on extending and trim is not * called for a long time. Now if limits are reduced suddenly * we take into account all the IO dispatched so far at new * low rate and * newly queued IO gets a really long dispatch * time. * * So keep on trimming slice even if bio is not queued. */ throtl_trim_slice(tg, rw); /* * @bio passed through this layer without being throttled. * Climb up the ladder. If we're already at the top, it * can be executed directly. */ qn = &tg->qnode_on_parent[rw]; sq = sq->parent_sq; tg = sq_to_tg(sq); if (!tg) { bio_set_flag(bio, BIO_BPS_THROTTLED); goto out_unlock; } } /* out-of-limit, queue to @tg */ throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d", rw == READ ? 'R' : 'W', tg->bytes_disp[rw], bio->bi_iter.bi_size, tg_bps_limit(tg, rw), tg->io_disp[rw], tg_iops_limit(tg, rw), sq->nr_queued[READ], sq->nr_queued[WRITE]); tg->last_low_overflow_time[rw] = jiffies; td->nr_queued[rw]++; throtl_add_bio_tg(bio, qn, tg); throttled = true; /* * Update @tg's dispatch time and force schedule dispatch if @tg * was empty before @bio. The forced scheduling isn't likely to * cause undue delay as @bio is likely to be dispatched directly if * its @tg's disptime is not in the future. */ if (tg->flags & THROTL_TG_WAS_EMPTY) { tg_update_disptime(tg); throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true); } out_unlock: spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); return throttled; } void blk_throtl_exit(struct gendisk *disk) { struct request_queue *q = disk->queue; if (!blk_throtl_activated(q)) return; del_timer_sync(&q->td->service_queue.pending_timer); throtl_shutdown_wq(q); blkcg_deactivate_policy(disk, &blkcg_policy_throtl); kfree(q->td); } static int __init throtl_init(void) { kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0); if (!kthrotld_workqueue) panic("Failed to create kthrotld\n"); return blkcg_policy_register(&blkcg_policy_throtl); } module_init(throtl_init); |
8 8 8 8 5 2 1 8 1 6 4 2 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | // SPDX-License-Identifier: GPL-2.0-or-later /* ASN.1 Object identifier (OID) registry * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/module.h> #include <linux/export.h> #include <linux/oid_registry.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/bug.h> #include <linux/asn1.h> #include "oid_registry_data.c" MODULE_DESCRIPTION("OID Registry"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); /** * look_up_OID - Find an OID registration for the specified data * @data: Binary representation of the OID * @datasize: Size of the binary representation */ enum OID look_up_OID(const void *data, size_t datasize) { const unsigned char *octets = data; enum OID oid; unsigned char xhash; unsigned i, j, k, hash; size_t len; /* Hash the OID data */ hash = datasize - 1; for (i = 0; i < datasize; i++) hash += octets[i] * 33; hash = (hash >> 24) ^ (hash >> 16) ^ (hash >> 8) ^ hash; hash &= 0xff; /* Binary search the OID registry. OIDs are stored in ascending order * of hash value then ascending order of size and then in ascending * order of reverse value. */ i = 0; k = OID__NR; while (i < k) { j = (i + k) / 2; xhash = oid_search_table[j].hash; if (xhash > hash) { k = j; continue; } if (xhash < hash) { i = j + 1; continue; } oid = oid_search_table[j].oid; len = oid_index[oid + 1] - oid_index[oid]; if (len > datasize) { k = j; continue; } if (len < datasize) { i = j + 1; continue; } /* Variation is most likely to be at the tail end of the * OID, so do the comparison in reverse. */ while (len > 0) { unsigned char a = oid_data[oid_index[oid] + --len]; unsigned char b = octets[len]; if (a > b) { k = j; goto next; } if (a < b) { i = j + 1; goto next; } } return oid; next: ; } return OID__NR; } EXPORT_SYMBOL_GPL(look_up_OID); /** * parse_OID - Parse an OID from a bytestream * @data: Binary representation of the header + OID * @datasize: Size of the binary representation * @oid: Pointer to oid to return result * * Parse an OID from a bytestream that holds the OID in the format * ASN1_OID | length | oid. The length indicator must equal to datasize - 2. * -EBADMSG is returned if the bytestream is too short. */ int parse_OID(const void *data, size_t datasize, enum OID *oid) { const unsigned char *v = data; /* we need 2 bytes of header and at least 1 byte for oid */ if (datasize < 3 || v[0] != ASN1_OID || v[1] != datasize - 2) return -EBADMSG; *oid = look_up_OID(data + 2, datasize - 2); return 0; } EXPORT_SYMBOL_GPL(parse_OID); /* * sprint_OID - Print an Object Identifier into a buffer * @data: The encoded OID to print * @datasize: The size of the encoded OID * @buffer: The buffer to render into * @bufsize: The size of the buffer * * The OID is rendered into the buffer in "a.b.c.d" format and the number of * bytes is returned. -EBADMSG is returned if the data could not be interpreted * and -ENOBUFS if the buffer was too small. */ int sprint_oid(const void *data, size_t datasize, char *buffer, size_t bufsize) { const unsigned char *v = data, *end = v + datasize; unsigned long num; unsigned char n; size_t ret; int count; if (v >= end) goto bad; n = *v++; ret = count = snprintf(buffer, bufsize, "%u.%u", n / 40, n % 40); if (count >= bufsize) return -ENOBUFS; buffer += count; bufsize -= count; while (v < end) { n = *v++; if (!(n & 0x80)) { num = n; } else { num = n & 0x7f; do { if (v >= end) goto bad; n = *v++; num <<= 7; num |= n & 0x7f; } while (n & 0x80); } ret += count = snprintf(buffer, bufsize, ".%lu", num); if (count >= bufsize) return -ENOBUFS; buffer += count; bufsize -= count; } return ret; bad: snprintf(buffer, bufsize, "(bad)"); return -EBADMSG; } EXPORT_SYMBOL_GPL(sprint_oid); /** * sprint_OID - Print an Object Identifier into a buffer * @oid: The OID to print * @buffer: The buffer to render into * @bufsize: The size of the buffer * * The OID is rendered into the buffer in "a.b.c.d" format and the number of * bytes is returned. */ int sprint_OID(enum OID oid, char *buffer, size_t bufsize) { int ret; BUG_ON(oid >= OID__NR); ret = sprint_oid(oid_data + oid_index[oid], oid_index[oid + 1] - oid_index[oid], buffer, bufsize); BUG_ON(ret == -EBADMSG); return ret; } EXPORT_SYMBOL_GPL(sprint_OID); |
5 99 95 95 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 | // SPDX-License-Identifier: GPL-2.0 or MIT /* * Copyright (C) 2016 Noralf Trønnes * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ #include <linux/io.h> #include <linux/iosys-map.h> #include <linux/module.h> #include <linux/slab.h> #include <drm/drm_device.h> #include <drm/drm_format_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_fourcc.h> #include <drm/drm_print.h> #include <drm/drm_rect.h> /** * drm_format_conv_state_init - Initialize format-conversion state * @state: The state to initialize * * Clears all fields in struct drm_format_conv_state. The state will * be empty with no preallocated resources. */ void drm_format_conv_state_init(struct drm_format_conv_state *state) { state->tmp.mem = NULL; state->tmp.size = 0; state->tmp.preallocated = false; } EXPORT_SYMBOL(drm_format_conv_state_init); /** * drm_format_conv_state_copy - Copy format-conversion state * @state: Destination state * @old_state: Source state * * Copies format-conversion state from @old_state to @state; except for * temporary storage. */ void drm_format_conv_state_copy(struct drm_format_conv_state *state, const struct drm_format_conv_state *old_state) { /* * So far, there's only temporary storage here, which we don't * duplicate. Just clear the fields. */ state->tmp.mem = NULL; state->tmp.size = 0; state->tmp.preallocated = false; } EXPORT_SYMBOL(drm_format_conv_state_copy); /** * drm_format_conv_state_reserve - Allocates storage for format conversion * @state: The format-conversion state * @new_size: The minimum allocation size * @flags: Flags for kmalloc() * * Allocates at least @new_size bytes and returns a pointer to the memory * range. After calling this function, previously returned memory blocks * are invalid. It's best to collect all memory requirements of a format * conversion and call this function once to allocate the range. * * Returns: * A pointer to the allocated memory range, or NULL otherwise. */ void *drm_format_conv_state_reserve(struct drm_format_conv_state *state, size_t new_size, gfp_t flags) { void *mem; if (new_size <= state->tmp.size) goto out; else if (state->tmp.preallocated) return NULL; mem = krealloc(state->tmp.mem, new_size, flags); if (!mem) return NULL; state->tmp.mem = mem; state->tmp.size = new_size; out: return state->tmp.mem; } EXPORT_SYMBOL(drm_format_conv_state_reserve); /** * drm_format_conv_state_release - Releases an format-conversion storage * @state: The format-conversion state * * Releases the memory range references by the format-conversion state. * After this call, all pointers to the memory are invalid. Prefer * drm_format_conv_state_init() for cleaning up and unloading a driver. */ void drm_format_conv_state_release(struct drm_format_conv_state *state) { if (state->tmp.preallocated) return; kfree(state->tmp.mem); state->tmp.mem = NULL; state->tmp.size = 0; } EXPORT_SYMBOL(drm_format_conv_state_release); static unsigned int clip_offset(const struct drm_rect *clip, unsigned int pitch, unsigned int cpp) { return clip->y1 * pitch + clip->x1 * cpp; } /** * drm_fb_clip_offset - Returns the clipping rectangles byte-offset in a framebuffer * @pitch: Framebuffer line pitch in byte * @format: Framebuffer format * @clip: Clip rectangle * * Returns: * The byte offset of the clip rectangle's top-left corner within the framebuffer. */ unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format, const struct drm_rect *clip) { return clip_offset(clip, pitch, format->cpp[0]); } EXPORT_SYMBOL(drm_fb_clip_offset); /* TODO: Make this function work with multi-plane formats. */ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); unsigned long lines = drm_rect_height(clip); size_t sbuf_len = linepixels * fb->format->cpp[0]; void *stmp = NULL; unsigned long i; const void *sbuf; /* * Some source buffers, such as DMA memory, use write-combine * caching, so reads are uncached. Speed up access by fetching * one line at a time. */ if (!vaddr_cached_hint) { stmp = drm_format_conv_state_reserve(state, sbuf_len, GFP_KERNEL); if (!stmp) return -ENOMEM; } if (!dst_pitch) dst_pitch = drm_rect_width(clip) * dst_pixsize; vaddr += clip_offset(clip, fb->pitches[0], fb->format->cpp[0]); for (i = 0; i < lines; ++i) { if (stmp) sbuf = memcpy(stmp, vaddr, sbuf_len); else sbuf = vaddr; xfrm_line(dst, sbuf, linepixels); vaddr += fb->pitches[0]; dst += dst_pitch; } return 0; } /* TODO: Make this function work with multi-plane formats. */ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); unsigned long lines = drm_rect_height(clip); size_t dbuf_len = linepixels * dst_pixsize; size_t stmp_off = round_up(dbuf_len, ARCH_KMALLOC_MINALIGN); /* for sbuf alignment */ size_t sbuf_len = linepixels * fb->format->cpp[0]; void *stmp = NULL; unsigned long i; const void *sbuf; void *dbuf; if (vaddr_cached_hint) { dbuf = drm_format_conv_state_reserve(state, dbuf_len, GFP_KERNEL); } else { dbuf = drm_format_conv_state_reserve(state, stmp_off + sbuf_len, GFP_KERNEL); stmp = dbuf + stmp_off; } if (!dbuf) return -ENOMEM; if (!dst_pitch) dst_pitch = linepixels * dst_pixsize; vaddr += clip_offset(clip, fb->pitches[0], fb->format->cpp[0]); for (i = 0; i < lines; ++i) { if (stmp) sbuf = memcpy(stmp, vaddr, sbuf_len); else sbuf = vaddr; xfrm_line(dbuf, sbuf, linepixels); memcpy_toio(dst, dbuf, dbuf_len); vaddr += fb->pitches[0]; dst += dst_pitch; } return 0; } /* TODO: Make this function work with multi-plane formats. */ static int drm_fb_xfrm(struct iosys_map *dst, const unsigned int *dst_pitch, const u8 *dst_pixsize, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; if (!dst_pitch) dst_pitch = default_dst_pitch; /* TODO: handle src in I/O memory here */ if (dst[0].is_iomem) return __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0], src[0].vaddr, fb, clip, vaddr_cached_hint, state, xfrm_line); else return __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0], src[0].vaddr, fb, clip, vaddr_cached_hint, state, xfrm_line); } /** * drm_fb_memcpy - Copy clip buffer * @dst: Array of destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * * This function copies parts of a framebuffer to display memory. Destination and * framebuffer formats must match. No conversion takes place. The parameters @dst, * @dst_pitch and @src refer to arrays. Each array must have at least as many entries * as there are planes in @fb's format. Each entry stores the value for the format's * respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). */ void drm_fb_memcpy(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; const struct drm_format_info *format = fb->format; unsigned int i, y, lines = drm_rect_height(clip); if (!dst_pitch) dst_pitch = default_dst_pitch; for (i = 0; i < format->num_planes; ++i) { unsigned int bpp_i = drm_format_info_bpp(format, i); unsigned int cpp_i = DIV_ROUND_UP(bpp_i, 8); size_t len_i = DIV_ROUND_UP(drm_rect_width(clip) * bpp_i, 8); unsigned int dst_pitch_i = dst_pitch[i]; struct iosys_map dst_i = dst[i]; struct iosys_map src_i = src[i]; if (!dst_pitch_i) dst_pitch_i = len_i; iosys_map_incr(&src_i, clip_offset(clip, fb->pitches[i], cpp_i)); for (y = 0; y < lines; y++) { /* TODO: handle src_i in I/O memory here */ iosys_map_memcpy_to(&dst_i, 0, src_i.vaddr, len_i); iosys_map_incr(&src_i, fb->pitches[i]); iosys_map_incr(&dst_i, dst_pitch_i); } } } EXPORT_SYMBOL(drm_fb_memcpy); static void drm_fb_swab16_line(void *dbuf, const void *sbuf, unsigned int pixels) { u16 *dbuf16 = dbuf; const u16 *sbuf16 = sbuf; const u16 *send16 = sbuf16 + pixels; while (sbuf16 < send16) *dbuf16++ = swab16(*sbuf16++); } static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels) { u32 *dbuf32 = dbuf; const u32 *sbuf32 = sbuf; const u32 *send32 = sbuf32 + pixels; while (sbuf32 < send32) *dbuf32++ = swab32(*sbuf32++); } /** * drm_fb_swab - Swap bytes into clip buffer * @dst: Array of destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @cached: Source buffer is mapped cached (eg. not write-combined) * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and swaps per-pixel * bytes during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. If @cached is * false a temporary buffer is used to cache one pixel line at a time to speed up * slow uncached reads. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). */ void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool cached, struct drm_format_conv_state *state) { const struct drm_format_info *format = fb->format; u8 cpp = DIV_ROUND_UP(drm_format_info_bpp(format, 0), 8); void (*swab_line)(void *dbuf, const void *sbuf, unsigned int npixels); switch (cpp) { case 4: swab_line = drm_fb_swab32_line; break; case 2: swab_line = drm_fb_swab16_line; break; default: drm_warn_once(fb->dev, "Format %p4cc has unsupported pixel size.\n", &format->format); return; } drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, state, swab_line); } EXPORT_SYMBOL(drm_fb_swab); static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigned int pixels) { u8 *dbuf8 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); dbuf8[x] = ((pix & 0x00e00000) >> 16) | ((pix & 0x0000e000) >> 11) | ((pix & 0x000000c0) >> 6); } } /** * drm_fb_xrgb8888_to_rgb332 - Convert XRGB8888 to RGB332 clip buffer * @dst: Array of RGB332 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGB332 devices that don't support XRGB8888 natively. */ void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb332_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb332); static void drm_fb_xrgb8888_to_rgb565_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le16 *dbuf16 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u16 val16; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); val16 = ((pix & 0x00F80000) >> 8) | ((pix & 0x0000FC00) >> 5) | ((pix & 0x000000F8) >> 3); dbuf16[x] = cpu_to_le16(val16); } } /* TODO: implement this helper as conversion to RGB565|BIG_ENDIAN */ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le16 *dbuf16 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u16 val16; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); val16 = ((pix & 0x00F80000) >> 8) | ((pix & 0x0000FC00) >> 5) | ((pix & 0x000000F8) >> 3); dbuf16[x] = cpu_to_le16(swab16(val16)); } } /** * drm_fb_xrgb8888_to_rgb565 - Convert XRGB8888 to RGB565 clip buffer * @dst: Array of RGB565 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * @swab: Swap bytes * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGB565 devices that don't support XRGB8888 natively. */ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state, bool swab) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels); if (swab) xfrm_line = drm_fb_xrgb8888_to_rgb565_swab_line; else xfrm_line = drm_fb_xrgb8888_to_rgb565_line; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, xfrm_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le16 *dbuf16 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u16 val16; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); val16 = ((pix & 0x00f80000) >> 9) | ((pix & 0x0000f800) >> 6) | ((pix & 0x000000f8) >> 3); dbuf16[x] = cpu_to_le16(val16); } } /** * drm_fb_xrgb8888_to_xrgb1555 - Convert XRGB8888 to XRGB1555 clip buffer * @dst: Array of XRGB1555 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for XRGB1555 devices that don't support * XRGB8888 natively. */ void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb1555); static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le16 *dbuf16 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u16 val16; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); val16 = BIT(15) | /* set alpha bit */ ((pix & 0x00f80000) >> 9) | ((pix & 0x0000f800) >> 6) | ((pix & 0x000000f8) >> 3); dbuf16[x] = cpu_to_le16(val16); } } /** * drm_fb_xrgb8888_to_argb1555 - Convert XRGB8888 to ARGB1555 clip buffer * @dst: Array of ARGB1555 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for ARGB1555 devices that don't support * XRGB8888 natively. It sets an opaque alpha channel as part of the conversion. */ void drm_fb_xrgb8888_to_argb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb1555); static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le16 *dbuf16 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u16 val16; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); val16 = ((pix & 0x00f80000) >> 8) | ((pix & 0x0000f800) >> 5) | ((pix & 0x000000f8) >> 2) | BIT(0); /* set alpha bit */ dbuf16[x] = cpu_to_le16(val16); } } /** * drm_fb_xrgb8888_to_rgba5551 - Convert XRGB8888 to RGBA5551 clip buffer * @dst: Array of RGBA5551 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGBA5551 devices that don't support * XRGB8888 natively. It sets an opaque alpha channel as part of the conversion. */ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgba5551_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgba5551); static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigned int pixels) { u8 *dbuf8 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); /* write blue-green-red to output in little endianness */ *dbuf8++ = (pix & 0x000000FF) >> 0; *dbuf8++ = (pix & 0x0000FF00) >> 8; *dbuf8++ = (pix & 0x00FF0000) >> 16; } } /** * drm_fb_xrgb8888_to_rgb888 - Convert XRGB8888 to RGB888 clip buffer * @dst: Array of RGB888 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for RGB888 devices that don't natively * support XRGB8888. */ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 3, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888); static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le32 *dbuf32 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); pix |= GENMASK(31, 24); /* fill alpha bits */ dbuf32[x] = cpu_to_le32(pix); } } /** * drm_fb_xrgb8888_to_argb8888 - Convert XRGB8888 to ARGB8888 clip buffer * @dst: Array of ARGB8888 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. The parameters @dst, @dst_pitch and @src refer * to arrays. Each array must have at least as many entries as there are planes in * @fb's format. Each entry stores the value for the format's respective color plane * at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for ARGB8888 devices that don't support XRGB8888 * natively. It sets an opaque alpha channel as part of the conversion. */ void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb8888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb8888); static void drm_fb_xrgb8888_to_abgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le32 *dbuf32 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); pix = ((pix & 0x00ff0000) >> 16) << 0 | ((pix & 0x0000ff00) >> 8) << 8 | ((pix & 0x000000ff) >> 0) << 16 | GENMASK(31, 24); /* fill alpha bits */ *dbuf32++ = cpu_to_le32(pix); } } static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_abgr8888_line); } static void drm_fb_xrgb8888_to_xbgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le32 *dbuf32 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); pix = ((pix & 0x00ff0000) >> 16) << 0 | ((pix & 0x0000ff00) >> 8) << 8 | ((pix & 0x000000ff) >> 0) << 16 | ((pix & 0xff000000) >> 24) << 24; *dbuf32++ = cpu_to_le32(pix); } } static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xbgr8888_line); } static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le32 *dbuf32 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u32 val32; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); val32 = ((pix & 0x000000FF) << 2) | ((pix & 0x0000FF00) << 4) | ((pix & 0x00FF0000) << 6); pix = val32 | ((val32 >> 8) & 0x00300C03); *dbuf32++ = cpu_to_le32(pix); } } /** * drm_fb_xrgb8888_to_xrgb2101010 - Convert XRGB8888 to XRGB2101010 clip buffer * @dst: Array of XRGB2101010 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for XRGB2101010 devices that don't support XRGB8888 * natively. */ void drm_fb_xrgb8888_to_xrgb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb2101010); static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, unsigned int pixels) { __le32 *dbuf32 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; u32 val32; u32 pix; for (x = 0; x < pixels; x++) { pix = le32_to_cpu(sbuf32[x]); val32 = ((pix & 0x000000ff) << 2) | ((pix & 0x0000ff00) << 4) | ((pix & 0x00ff0000) << 6); pix = GENMASK(31, 30) | /* set alpha bits */ val32 | ((val32 >> 8) & 0x00300c03); *dbuf32++ = cpu_to_le32(pix); } } /** * drm_fb_xrgb8888_to_argb2101010 - Convert XRGB8888 to ARGB2101010 clip buffer * @dst: Array of ARGB2101010 destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and * @src refer to arrays. Each array must have at least as many entries as * there are planes in @fb's format. Each entry stores the value for the * format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Drivers can use this function for ARGB2101010 devices that don't support XRGB8888 * natively. */ void drm_fb_xrgb8888_to_argb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb2101010); static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned int pixels) { u8 *dbuf8 = dbuf; const __le32 *sbuf32 = sbuf; unsigned int x; for (x = 0; x < pixels; x++) { u32 pix = le32_to_cpu(sbuf32[x]); u8 r = (pix & 0x00ff0000) >> 16; u8 g = (pix & 0x0000ff00) >> 8; u8 b = pix & 0x000000ff; /* ITU BT.601: Y = 0.299 R + 0.587 G + 0.114 B */ *dbuf8++ = (3 * r + 6 * g + b) / 10; } } /** * drm_fb_xrgb8888_to_gray8 - Convert XRGB8888 to grayscale * @dst: Array of 8-bit grayscale destination buffers * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * DRM doesn't have native monochrome or grayscale support. Drivers can use this * function for grayscale devices that don't support XRGB8888 natively.Such * drivers can announce the commonly supported XR24 format to userspace and use * this function to convert to the native format. Monochrome drivers will use the * most significant bit, where 1 means foreground color and 0 background color. * ITU BT.601 is being used for the RGB -> luma (brightness) conversion. */ void drm_fb_xrgb8888_to_gray8(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_gray8_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); /** * drm_fb_blit - Copy parts of a framebuffer to display memory * @dst: Array of display-memory addresses to copy to * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @dst_format: FOURCC code of the display's color format * @src: The framebuffer memory to copy from * @fb: The framebuffer to copy from * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory. If the * formats of the display and the framebuffer mismatch, the blit function * will attempt to convert between them during the process. The parameters @dst, * @dst_pitch and @src refer to arrays. Each array must have at least as many * entries as there are planes in @dst_format's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). * * Returns: * 0 on success, or * -EINVAL if the color-format conversion failed, or * a negative error code otherwise. */ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t dst_format, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { uint32_t fb_format = fb->format->format; if (fb_format == dst_format) { drm_fb_memcpy(dst, dst_pitch, src, fb, clip); return 0; } else if (fb_format == (dst_format | DRM_FORMAT_BIG_ENDIAN)) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == (dst_format & ~DRM_FORMAT_BIG_ENDIAN)) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == DRM_FORMAT_XRGB8888) { if (dst_format == DRM_FORMAT_RGB565) { drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, state, false); return 0; } else if (dst_format == DRM_FORMAT_XRGB1555) { drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB1555) { drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGBA5551) { drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGB888) { drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB8888) { drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XBGR8888) { drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ABGR8888) { drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XRGB2101010) { drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB2101010) { drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_BGRX8888) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } } drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc found.\n", &fb_format, &dst_format); return -EINVAL; } EXPORT_SYMBOL(drm_fb_blit); static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int pixels) { u8 *dbuf8 = dbuf; const u8 *sbuf8 = sbuf; while (pixels) { unsigned int i, bits = min(pixels, 8U); u8 byte = 0; for (i = 0; i < bits; i++, pixels--) { if (*sbuf8++ >= 128) byte |= BIT(i); } *dbuf8++ = byte; } } /** * drm_fb_xrgb8888_to_mono - Convert XRGB8888 to monochrome * @dst: Array of monochrome destination buffers (0=black, 1=white) * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines * within @dst; can be NULL if scanlines are stored next to each other. * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at * least as many entries as there are planes in @fb's format. Each entry stores the * value for the format's respective color plane at the same index. * * This function does not apply clipping on @dst (i.e. the destination is at the * top-left corner). The first pixel (upper left corner of the clip rectangle) will * be converted and copied to the first bit (LSB) in the first byte of the monochrome * destination buffer. If the caller requires that the first pixel in a byte must * be located at an x-coordinate that is a multiple of 8, then the caller must take * care itself of supplying a suitable clip rectangle. * * DRM doesn't have native monochrome support. Drivers can use this function for * monochrome devices that don't support XRGB8888 natively. Such drivers can * announce the commonly supported XR24 format to userspace and use this function * to convert to the native format. * * This function uses drm_fb_xrgb8888_to_gray8() to convert to grayscale and * then the result is converted from grayscale to monochrome. */ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; unsigned int linepixels = drm_rect_width(clip); unsigned int lines = drm_rect_height(clip); unsigned int cpp = fb->format->cpp[0]; unsigned int len_src32 = linepixels * cpp; struct drm_device *dev = fb->dev; void *vaddr = src[0].vaddr; unsigned int dst_pitch_0; unsigned int y; u8 *mono = dst[0].vaddr, *gray8; u32 *src32; if (drm_WARN_ON(dev, fb->format->format != DRM_FORMAT_XRGB8888)) return; if (!dst_pitch) dst_pitch = default_dst_pitch; dst_pitch_0 = dst_pitch[0]; /* * The mono destination buffer contains 1 bit per pixel */ if (!dst_pitch_0) dst_pitch_0 = DIV_ROUND_UP(linepixels, 8); /* * The dma memory is write-combined so reads are uncached. * Speed up by fetching one line at a time. * * Also, format conversion from XR24 to monochrome are done * line-by-line but are converted to 8-bit grayscale as an * intermediate step. * * Allocate a buffer to be used for both copying from the cma * memory and to store the intermediate grayscale line pixels. */ src32 = drm_format_conv_state_reserve(state, len_src32 + linepixels, GFP_KERNEL); if (!src32) return; gray8 = (u8 *)src32 + len_src32; vaddr += clip_offset(clip, fb->pitches[0], cpp); for (y = 0; y < lines; y++) { src32 = memcpy(src32, vaddr, len_src32); drm_fb_xrgb8888_to_gray8_line(gray8, src32, linepixels); drm_fb_gray8_to_mono_line(mono, gray8, linepixels); vaddr += fb->pitches[0]; mono += dst_pitch_0; } } EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono); static uint32_t drm_fb_nonalpha_fourcc(uint32_t fourcc) { /* only handle formats with depth != 0 and alpha channel */ switch (fourcc) { case DRM_FORMAT_ARGB1555: return DRM_FORMAT_XRGB1555; case DRM_FORMAT_ABGR1555: return DRM_FORMAT_XBGR1555; case DRM_FORMAT_RGBA5551: return DRM_FORMAT_RGBX5551; case DRM_FORMAT_BGRA5551: return DRM_FORMAT_BGRX5551; case DRM_FORMAT_ARGB8888: return DRM_FORMAT_XRGB8888; case DRM_FORMAT_ABGR8888: return DRM_FORMAT_XBGR8888; case DRM_FORMAT_RGBA8888: return DRM_FORMAT_RGBX8888; case DRM_FORMAT_BGRA8888: return DRM_FORMAT_BGRX8888; case DRM_FORMAT_ARGB2101010: return DRM_FORMAT_XRGB2101010; case DRM_FORMAT_ABGR2101010: return DRM_FORMAT_XBGR2101010; case DRM_FORMAT_RGBA1010102: return DRM_FORMAT_RGBX1010102; case DRM_FORMAT_BGRA1010102: return DRM_FORMAT_BGRX1010102; } return fourcc; } static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t fourcc) { const uint32_t *fourccs_end = fourccs + nfourccs; while (fourccs < fourccs_end) { if (*fourccs == fourcc) return true; ++fourccs; } return false; } /** * drm_fb_build_fourcc_list - Filters a list of supported color formats against * the device's native formats * @dev: DRM device * @native_fourccs: 4CC codes of natively supported color formats * @native_nfourccs: The number of entries in @native_fourccs * @fourccs_out: Returns 4CC codes of supported color formats * @nfourccs_out: The number of available entries in @fourccs_out * * This function create a list of supported color format from natively * supported formats and additional emulated formats. * At a minimum, most userspace programs expect at least support for * XRGB8888 on the primary plane. Devices that have to emulate the * format, and possibly others, can use drm_fb_build_fourcc_list() to * create a list of supported color formats. The returned list can * be handed over to drm_universal_plane_init() et al. Native formats * will go before emulated formats. Native formats with alpha channel * will be replaced by such without, as primary planes usually don't * support alpha. Other heuristics might be applied * to optimize the order. Formats near the beginning of the list are * usually preferred over formats near the end of the list. * * Returns: * The number of color-formats 4CC codes returned in @fourccs_out. */ size_t drm_fb_build_fourcc_list(struct drm_device *dev, const u32 *native_fourccs, size_t native_nfourccs, u32 *fourccs_out, size_t nfourccs_out) { /* * XRGB8888 is the default fallback format for most of userspace * and it's currently the only format that should be emulated for * the primary plane. Only if there's ever another default fallback, * it should be added here. */ static const uint32_t extra_fourccs[] = { DRM_FORMAT_XRGB8888, }; static const size_t extra_nfourccs = ARRAY_SIZE(extra_fourccs); u32 *fourccs = fourccs_out; const u32 *fourccs_end = fourccs_out + nfourccs_out; size_t i; /* * The device's native formats go first. */ for (i = 0; i < native_nfourccs; ++i) { /* * Several DTs, boot loaders and firmware report native * alpha formats that are non-alpha formats instead. So * replace alpha formats by non-alpha formats. */ u32 fourcc = drm_fb_nonalpha_fourcc(native_fourccs[i]); if (is_listed_fourcc(fourccs_out, fourccs - fourccs_out, fourcc)) { continue; /* skip duplicate entries */ } else if (fourccs == fourccs_end) { drm_warn(dev, "Ignoring native format %p4cc\n", &fourcc); continue; /* end of available output buffer */ } drm_dbg_kms(dev, "adding native format %p4cc\n", &fourcc); *fourccs = fourcc; ++fourccs; } /* * The extra formats, emulated by the driver, go second. */ for (i = 0; (i < extra_nfourccs) && (fourccs < fourccs_end); ++i) { u32 fourcc = extra_fourccs[i]; if (is_listed_fourcc(fourccs_out, fourccs - fourccs_out, fourcc)) { continue; /* skip duplicate and native entries */ } else if (fourccs == fourccs_end) { drm_warn(dev, "Ignoring emulated format %p4cc\n", &fourcc); continue; /* end of available output buffer */ } drm_dbg_kms(dev, "adding emulated format %p4cc\n", &fourcc); *fourccs = fourcc; ++fourccs; } return fourccs - fourccs_out; } EXPORT_SYMBOL(drm_fb_build_fourcc_list); |
1 20 10 15 11 16 3 2 2 39 4 12 4 1 24 16 3 35 49 60 80 64 4 12 2 2 43 35 2 28 23 3 17 20 56 25 447 317 173 91 34 65 160 28 36 10 9 3 2 8 10 10 26 26 26 26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | // SPDX-License-Identifier: GPL-2.0-or-later /* scm.c - Socket level control messages processing. * * Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Alignment and value checking mods by Craig Metz */ #include <linux/module.h> #include <linux/signal.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/file.h> #include <linux/fcntl.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> #include <linux/pid_namespace.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/errqueue.h> #include <linux/io_uring.h> #include <linux/uaccess.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/compat.h> #include <net/scm.h> #include <net/cls_cgroup.h> #include <net/af_unix.h> /* * Only allow a user to send credentials, that they could set with * setu(g)id. */ static __inline__ int scm_check_creds(struct ucred *creds) { const struct cred *cred = current_cred(); kuid_t uid = make_kuid(cred->user_ns, creds->uid); kgid_t gid = make_kgid(cred->user_ns, creds->gid); if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) { return 0; } return -EPERM; } static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) { int *fdp = (int*)CMSG_DATA(cmsg); struct scm_fp_list *fpl = *fplp; struct file **fpp; int i, num; num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int); if (num <= 0) return 0; if (num > SCM_MAX_FD) return -EINVAL; if (!fpl) { fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT); if (!fpl) return -ENOMEM; *fplp = fpl; fpl->count = 0; fpl->count_unix = 0; fpl->max = SCM_MAX_FD; fpl->user = NULL; #if IS_ENABLED(CONFIG_UNIX) fpl->inflight = false; fpl->dead = false; fpl->edges = NULL; INIT_LIST_HEAD(&fpl->vertices); #endif } fpp = &fpl->fp[fpl->count]; if (fpl->count + num > fpl->max) return -EINVAL; /* * Verify the descriptors and increment the usage count. */ for (i=0; i< num; i++) { int fd = fdp[i]; struct file *file; if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; /* don't allow io_uring files */ if (io_is_uring_fops(file)) { fput(file); return -EINVAL; } if (unix_get_socket(file)) fpl->count_unix++; *fpp++ = file; fpl->count++; } if (!fpl->user) fpl->user = get_uid(current_user()); return num; } void __scm_destroy(struct scm_cookie *scm) { struct scm_fp_list *fpl = scm->fp; int i; if (fpl) { scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); free_uid(fpl->user); kfree(fpl); } } EXPORT_SYMBOL(__scm_destroy); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) { const struct proto_ops *ops = READ_ONCE(sock->ops); struct cmsghdr *cmsg; int err; for_each_cmsghdr(cmsg, msg) { err = -EINVAL; /* Verify that cmsg_len is at least sizeof(struct cmsghdr) */ /* The first check was omitted in <= 2.2.5. The reasoning was that parser checks cmsg_len in any case, so that additional check would be work duplication. But if cmsg_level is not SOL_SOCKET, we do not check for too short ancillary data object at all! Oops. OK, let's add it... */ if (!CMSG_OK(msg, cmsg)) goto error; if (cmsg->cmsg_level != SOL_SOCKET) continue; switch (cmsg->cmsg_type) { case SCM_RIGHTS: if (!ops || ops->family != PF_UNIX) goto error; err=scm_fp_copy(cmsg, &p->fp); if (err<0) goto error; break; case SCM_CREDENTIALS: { struct ucred creds; kuid_t uid; kgid_t gid; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred)); err = scm_check_creds(&creds); if (err) goto error; p->creds.pid = creds.pid; if (!p->pid || pid_vnr(p->pid) != creds.pid) { struct pid *pid; err = -ESRCH; pid = find_get_pid(creds.pid); if (!pid) goto error; put_pid(p->pid); p->pid = pid; } err = -EINVAL; uid = make_kuid(current_user_ns(), creds.uid); gid = make_kgid(current_user_ns(), creds.gid); if (!uid_valid(uid) || !gid_valid(gid)) goto error; p->creds.uid = uid; p->creds.gid = gid; break; } default: goto error; } } if (p->fp && !p->fp->count) { kfree(p->fp); p->fp = NULL; } return 0; error: scm_destroy(p); return err; } EXPORT_SYMBOL(__scm_send); int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { int cmlen = CMSG_LEN(len); if (msg->msg_flags & MSG_CMSG_COMPAT) return put_cmsg_compat(msg, level, type, len, data); if (!msg->msg_control || msg->msg_controllen < sizeof(struct cmsghdr)) { msg->msg_flags |= MSG_CTRUNC; return 0; /* XXX: return error? check spec. */ } if (msg->msg_controllen < cmlen) { msg->msg_flags |= MSG_CTRUNC; cmlen = msg->msg_controllen; } if (msg->msg_control_is_user) { struct cmsghdr __user *cm = msg->msg_control_user; check_object_size(data, cmlen - sizeof(*cm), true); if (!user_write_access_begin(cm, cmlen)) goto efault; unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); unsafe_put_user(level, &cm->cmsg_level, efault_end); unsafe_put_user(type, &cm->cmsg_type, efault_end); unsafe_copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm), efault_end); user_write_access_end(); } else { struct cmsghdr *cm = msg->msg_control; cm->cmsg_level = level; cm->cmsg_type = type; cm->cmsg_len = cmlen; memcpy(CMSG_DATA(cm), data, cmlen - sizeof(*cm)); } cmlen = min(CMSG_SPACE(len), msg->msg_controllen); if (msg->msg_control_is_user) msg->msg_control_user += cmlen; else msg->msg_control += cmlen; msg->msg_controllen -= cmlen; return 0; efault_end: user_write_access_end(); efault: return -EFAULT; } EXPORT_SYMBOL(put_cmsg); void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) { struct scm_timestamping64 tss; int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss); } EXPORT_SYMBOL(put_cmsg_scm_timestamping64); void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) { struct scm_timestamping tss; int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss); } EXPORT_SYMBOL(put_cmsg_scm_timestamping); static int scm_max_fds(struct msghdr *msg) { if (msg->msg_controllen <= sizeof(struct cmsghdr)) return 0; return (msg->msg_controllen - sizeof(struct cmsghdr)) / sizeof(int); } void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { struct cmsghdr __user *cm = (__force struct cmsghdr __user *)msg->msg_control_user; unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count); int __user *cmsg_data = CMSG_USER_DATA(cm); int err = 0, i; /* no use for FD passing from kernel space callers */ if (WARN_ON_ONCE(!msg->msg_control_is_user)) return; if (msg->msg_flags & MSG_CMSG_COMPAT) { scm_detach_fds_compat(msg, scm); return; } for (i = 0; i < fdmax; i++) { err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; } if (i > 0) { int cmlen = CMSG_LEN(i * sizeof(int)); err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i * sizeof(int)); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control_user += cmlen; msg->msg_controllen -= cmlen; } } if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) msg->msg_flags |= MSG_CTRUNC; /* * All of the files that fit in the message have had their usage counts * incremented, so we just free the list. */ __scm_destroy(scm); } EXPORT_SYMBOL(scm_detach_fds); struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) { struct scm_fp_list *new_fpl; int i; if (!fpl) return NULL; new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), GFP_KERNEL_ACCOUNT); if (new_fpl) { for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; new_fpl->user = get_uid(fpl->user); #if IS_ENABLED(CONFIG_UNIX) new_fpl->inflight = false; new_fpl->edges = NULL; INIT_LIST_HEAD(&new_fpl->vertices); #endif } return new_fpl; } EXPORT_SYMBOL(scm_fp_dup); |
3 1 1 1 3 3 3 3 1 1 3 3 4 4 1 1 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | // SPDX-License-Identifier: GPL-2.0-or-later /* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6. * * Copyright (C) 2013 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> * * Based on: * net/ipv4/xfrm4_protocol.c */ #include <linux/init.h> #include <linux/mutex.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <net/ip6_route.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/xfrm.h> static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly; static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly; static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly; static DEFINE_MUTEX(xfrm6_protocol_mutex); static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) { switch (protocol) { case IPPROTO_ESP: return &esp6_handlers; case IPPROTO_AH: return &ah6_handlers; case IPPROTO_COMP: return &ipcomp6_handlers; } return NULL; } #define for_each_protocol_rcu(head, handler) \ for (handler = rcu_dereference(head); \ handler != NULL; \ handler = rcu_dereference(handler->next)) \ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm6_protocol *handler; struct xfrm6_protocol __rcu **head = proto_handlers(protocol); if (!head) return 0; for_each_protocol_rcu(*proto_handlers(protocol), handler) if ((ret = handler->cb_handler(skb, err)) <= 0) return ret; return 0; } int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { int ret; struct xfrm6_protocol *handler; struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); if (!head) goto out; if (!skb_dst(skb)) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); int flags = RT6_LOOKUP_F_HAS_SADDR; struct dst_entry *dst; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowlabel = ip6_flowinfo(ip6h), .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, skb, flags); if (dst->error) goto drop; skb_dst_set(skb, dst); } for_each_protocol_rcu(*head, handler) if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) return ret; out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); return 0; } EXPORT_SYMBOL(xfrm6_rcv_encap); static int xfrm6_esp_rcv(struct sk_buff *skb) { int ret; struct xfrm6_protocol *handler; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; for_each_protocol_rcu(esp6_handlers, handler) if ((ret = handler->handler(skb)) != -EINVAL) return ret; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); return 0; } static int xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(esp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) return 0; return -ENOENT; } static int xfrm6_ah_rcv(struct sk_buff *skb) { int ret; struct xfrm6_protocol *handler; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; for_each_protocol_rcu(ah6_handlers, handler) if ((ret = handler->handler(skb)) != -EINVAL) return ret; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); return 0; } static int xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ah6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) return 0; return -ENOENT; } static int xfrm6_ipcomp_rcv(struct sk_buff *skb) { int ret; struct xfrm6_protocol *handler; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; for_each_protocol_rcu(ipcomp6_handlers, handler) if ((ret = handler->handler(skb)) != -EINVAL) return ret; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); return 0; } static int xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ipcomp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) return 0; return -ENOENT; } static const struct inet6_protocol esp6_protocol = { .handler = xfrm6_esp_rcv, .err_handler = xfrm6_esp_err, .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol ah6_protocol = { .handler = xfrm6_ah_rcv, .err_handler = xfrm6_ah_err, .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol ipcomp6_protocol = { .handler = xfrm6_ipcomp_rcv, .err_handler = xfrm6_ipcomp_err, .flags = INET6_PROTO_NOPOLICY, }; static const struct xfrm_input_afinfo xfrm6_input_afinfo = { .family = AF_INET6, .callback = xfrm6_rcv_cb, }; static inline const struct inet6_protocol *netproto(unsigned char protocol) { switch (protocol) { case IPPROTO_ESP: return &esp6_protocol; case IPPROTO_AH: return &ah6_protocol; case IPPROTO_COMP: return &ipcomp6_protocol; } return NULL; } int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol) { struct xfrm6_protocol __rcu **pprev; struct xfrm6_protocol *t; bool add_netproto = false; int ret = -EEXIST; int priority = handler->priority; if (!proto_handlers(protocol) || !netproto(protocol)) return -EINVAL; mutex_lock(&xfrm6_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), lockdep_is_held(&xfrm6_protocol_mutex))) add_netproto = true; for (pprev = proto_handlers(protocol); (t = rcu_dereference_protected(*pprev, lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; pprev = &t->next) { if (t->priority < priority) break; if (t->priority == priority) goto err; } handler->next = *pprev; rcu_assign_pointer(*pprev, handler); ret = 0; err: mutex_unlock(&xfrm6_protocol_mutex); if (add_netproto) { if (inet6_add_protocol(netproto(protocol), protocol)) { pr_err("%s: can't add protocol\n", __func__); ret = -EAGAIN; } } return ret; } EXPORT_SYMBOL(xfrm6_protocol_register); int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol) { struct xfrm6_protocol __rcu **pprev; struct xfrm6_protocol *t; int ret = -ENOENT; if (!proto_handlers(protocol) || !netproto(protocol)) return -EINVAL; mutex_lock(&xfrm6_protocol_mutex); for (pprev = proto_handlers(protocol); (t = rcu_dereference_protected(*pprev, lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; pprev = &t->next) { if (t == handler) { *pprev = handler->next; ret = 0; break; } } if (!rcu_dereference_protected(*proto_handlers(protocol), lockdep_is_held(&xfrm6_protocol_mutex))) { if (inet6_del_protocol(netproto(protocol), protocol) < 0) { pr_err("%s: can't remove protocol\n", __func__); ret = -EAGAIN; } } mutex_unlock(&xfrm6_protocol_mutex); synchronize_net(); return ret; } EXPORT_SYMBOL(xfrm6_protocol_deregister); int __init xfrm6_protocol_init(void) { return xfrm_input_register_afinfo(&xfrm6_input_afinfo); } void xfrm6_protocol_fini(void) { xfrm_input_unregister_afinfo(&xfrm6_input_afinfo); } |
3 3 4 1 3 2 1 1 2 1 2 2 4 1 1 10 8 3 10 7 2 3 7 7 7 7 2 3 4 5 2 1 1 5 5 2 7 6 6 4 6 6 6 3 22 1 1 2 2 9 6 1 2 8 3 2 2 3 2 3 6 7 13 13 13 13 6 6 4 2 4 2 6 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/ife.c Inter-FE action based on ForCES WG InterFE LFB * * Refer to: * draft-ietf-forces-interfelfb-03 * and * netdev01 paper: * "Distributing Linux Traffic Control Classifier-Action * Subsystem" * Authors: Jamal Hadi Salim and Damascene M. Joachimpillai * * copyright Jamal Hadi Salim (2015) */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_ife.h> #include <net/tc_act/tc_ife.h> #include <linux/etherdevice.h> #include <net/ife.h> #include <net/tc_wrapper.h> static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = { [TCA_IFE_PARMS] = { .len = sizeof(struct tc_ife)}, [TCA_IFE_DMAC] = { .len = ETH_ALEN}, [TCA_IFE_SMAC] = { .len = ETH_ALEN}, [TCA_IFE_TYPE] = { .type = NLA_U16}, }; int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) { u16 edata = 0; if (mi->metaval) edata = *(u16 *)mi->metaval; else if (metaval) edata = metaval; if (!edata) /* will not encode */ return 0; edata = htons(edata); return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata); } EXPORT_SYMBOL_GPL(ife_encode_meta_u16); int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) return nla_put_u32(skb, mi->metaid, *(u32 *)mi->metaval); else return nla_put(skb, mi->metaid, 0, NULL); } EXPORT_SYMBOL_GPL(ife_get_meta_u32); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi) { if (metaval || mi->metaval) return 8; /* T+L+V == 2+2+4 */ return 0; } EXPORT_SYMBOL_GPL(ife_check_meta_u32); int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi) { if (metaval || mi->metaval) return 8; /* T+L+(V) == 2+2+(2+2bytepad) */ return 0; } EXPORT_SYMBOL_GPL(ife_check_meta_u16); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi) { u32 edata = metaval; if (mi->metaval) edata = *(u32 *)mi->metaval; else if (metaval) edata = metaval; if (!edata) /* will not encode */ return 0; edata = htonl(edata); return ife_tlv_meta_encode(skbdata, mi->metaid, 4, &edata); } EXPORT_SYMBOL_GPL(ife_encode_meta_u32); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) return nla_put_u16(skb, mi->metaid, *(u16 *)mi->metaval); else return nla_put(skb, mi->metaid, 0, NULL); } EXPORT_SYMBOL_GPL(ife_get_meta_u16); int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { mi->metaval = kmemdup(metaval, sizeof(u32), gfp); if (!mi->metaval) return -ENOMEM; return 0; } EXPORT_SYMBOL_GPL(ife_alloc_meta_u32); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { mi->metaval = kmemdup(metaval, sizeof(u16), gfp); if (!mi->metaval) return -ENOMEM; return 0; } EXPORT_SYMBOL_GPL(ife_alloc_meta_u16); void ife_release_meta_gen(struct tcf_meta_info *mi) { kfree(mi->metaval); } EXPORT_SYMBOL_GPL(ife_release_meta_gen); int ife_validate_meta_u32(void *val, int len) { if (len == sizeof(u32)) return 0; return -EINVAL; } EXPORT_SYMBOL_GPL(ife_validate_meta_u32); int ife_validate_meta_u16(void *val, int len) { /* length will not include padding */ if (len == sizeof(u16)) return 0; return -EINVAL; } EXPORT_SYMBOL_GPL(ife_validate_meta_u16); static LIST_HEAD(ifeoplist); static DEFINE_RWLOCK(ife_mod_lock); static struct tcf_meta_ops *find_ife_oplist(u16 metaid) { struct tcf_meta_ops *o; read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { if (o->metaid == metaid) { if (!try_module_get(o->owner)) o = NULL; read_unlock(&ife_mod_lock); return o; } } read_unlock(&ife_mod_lock); return NULL; } int register_ife_op(struct tcf_meta_ops *mops) { struct tcf_meta_ops *m; if (!mops->metaid || !mops->metatype || !mops->name || !mops->check_presence || !mops->encode || !mops->decode || !mops->get || !mops->alloc) return -EINVAL; write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid || (strcmp(mops->name, m->name) == 0)) { write_unlock(&ife_mod_lock); return -EEXIST; } } if (!mops->release) mops->release = ife_release_meta_gen; list_add_tail(&mops->list, &ifeoplist); write_unlock(&ife_mod_lock); return 0; } EXPORT_SYMBOL_GPL(unregister_ife_op); int unregister_ife_op(struct tcf_meta_ops *mops) { struct tcf_meta_ops *m; int err = -ENOENT; write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid) { list_del(&mops->list); err = 0; break; } } write_unlock(&ife_mod_lock); return err; } EXPORT_SYMBOL_GPL(register_ife_op); static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) { int ret = 0; /* XXX: unfortunately cant use nla_policy at this point * because a length of 0 is valid in the case of * "allow". "use" semantics do enforce for proper * length and i couldve use nla_policy but it makes it hard * to use it just for that.. */ if (ops->validate) return ops->validate(val, len); if (ops->metatype == NLA_U32) ret = ife_validate_meta_u32(val, len); else if (ops->metatype == NLA_U16) ret = ife_validate_meta_u16(val, len); return ret; } #ifdef CONFIG_MODULES static const char *ife_meta_id2name(u32 metaid) { switch (metaid) { case IFE_META_SKBMARK: return "skbmark"; case IFE_META_PRIO: return "skbprio"; case IFE_META_TCINDEX: return "tcindex"; default: return "unknown"; } } #endif /* called when adding new meta information */ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES if (rtnl_held) rtnl_unlock(); request_module("ife-meta-%s", ife_meta_id2name(metaid)); if (rtnl_held) rtnl_lock(); ops = find_ife_oplist(metaid); #endif } if (ops) { ret = 0; if (len) ret = ife_validate_metatype(ops, val, len); module_put(ops->owner); } return ret; } /* called when adding new meta information */ static int __add_metainfo(const struct tcf_meta_ops *ops, struct tcf_ife_info *ife, u32 metaid, void *metaval, int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; int ret = 0; mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); if (!mi) return -ENOMEM; mi->metaid = metaid; mi->ops = ops; if (len > 0) { ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); return ret; } } if (exists) spin_lock_bh(&ife->tcf_lock); list_add_tail(&mi->metalist, &ife->metalist); if (exists) spin_unlock_bh(&ife->tcf_lock); return ret; } static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops, struct tcf_ife_info *ife, u32 metaid, bool exists) { int ret; if (!try_module_get(ops->owner)) return -ENOENT; ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists); if (ret) module_put(ops->owner); return ret; } static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, int len, bool exists) { const struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret; if (!ops) return -ENOENT; ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); if (ret) /*put back what find_ife_oplist took */ module_put(ops->owner); return ret; } static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; int installed = 0; read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists); if (rc == 0) installed += 1; } read_unlock(&ife_mod_lock); if (installed) return 0; else return -EINVAL; } static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) { struct tcf_meta_info *e; struct nlattr *nest; unsigned char *b = skb_tail_pointer(skb); int total_encoded = 0; /*can only happen on decode */ if (list_empty(&ife->metalist)) return 0; nest = nla_nest_start_noflag(skb, TCA_IFE_METALST); if (!nest) goto out_nlmsg_trim; list_for_each_entry(e, &ife->metalist, metalist) { if (!e->ops->get(skb, e)) total_encoded += 1; } if (!total_encoded) goto out_nlmsg_trim; nla_nest_end(skb, nest); return 0; out_nlmsg_trim: nlmsg_trim(skb, b); return -1; } /* under ife->tcf_lock */ static void _tcf_ife_cleanup(struct tc_action *a) { struct tcf_ife_info *ife = to_ife(a); struct tcf_meta_info *e, *n; list_for_each_entry_safe(e, n, &ife->metalist, metalist) { list_del(&e->metalist); if (e->metaval) { if (e->ops->release) e->ops->release(e); else kfree(e->metaval); } module_put(e->ops->owner); kfree(e); } } static void tcf_ife_cleanup(struct tc_action *a) { struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; spin_lock_bh(&ife->tcf_lock); _tcf_ife_cleanup(a); spin_unlock_bh(&ife->tcf_lock); p = rcu_dereference_protected(ife->params, 1); if (p) kfree_rcu(p, rcu); } static int load_metalist(struct nlattr **tb, bool rtnl_held) { int i; for (i = 1; i < max_metacnt; i++) { if (tb[i]) { void *val = nla_data(tb[i]); int len = nla_len(tb[i]); int rc; rc = load_metaops_and_vet(i, val, len, rtnl_held); if (rc != 0) return rc; } } return 0; } static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { int len = 0; int rc = 0; int i = 0; void *val; for (i = 1; i < max_metacnt; i++) { if (tb[i]) { val = nla_data(tb[i]); len = nla_len(tb[i]); rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } } return rc; } static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_ife_params *p; struct tcf_ife_info *ife; u16 ife_type = ETH_P_IFE; struct tc_ife *parm; u8 *daddr = NULL; u8 *saddr = NULL; bool exists = false; int ret = 0; u32 index; int err; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "IFE requires attributes to be passed"); return -EINVAL; } err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy, NULL); if (err < 0) return err; if (!tb[TCA_IFE_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_IFE_PARMS]); /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because * they cannot run as the same time. Check on all other values which * are not supported right now. */ if (parm->flags & ~IFE_ENCODE) return -EINVAL; p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; if (tb[TCA_IFE_METALST]) { err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], NULL, NULL); if (err) { kfree(p); return err; } err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) { kfree(p); return err; } } index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) { kfree(p); return err; } exists = err; if (exists && bind) { kfree(p); return ACT_P_BOUND; } if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_ife_ops, bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); kfree(p); return ret; } ret = ACT_P_CREATED; } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); kfree(p); return -EEXIST; } ife = to_ife(*a); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&ife->metalist); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { if (tb[TCA_IFE_TYPE]) ife_type = nla_get_u16(tb[TCA_IFE_TYPE]); if (tb[TCA_IFE_DMAC]) daddr = nla_data(tb[TCA_IFE_DMAC]); if (tb[TCA_IFE_SMAC]) saddr = nla_data(tb[TCA_IFE_SMAC]); } if (parm->flags & IFE_ENCODE) { if (daddr) ether_addr_copy(p->eth_dst, daddr); else eth_zero_addr(p->eth_dst); if (saddr) ether_addr_copy(p->eth_src, saddr); else eth_zero_addr(p->eth_src); p->eth_type = ife_type; } if (tb[TCA_IFE_METALST]) { err = populate_metalist(ife, tb2, exists, !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) goto metadata_parse_err; } else { /* if no passed metadata allow list or passed allow-all * then here we process by adding as many supported metadatum * as we can. You better have at least one else we are * going to bail out */ err = use_all_metadata(ife, exists); if (err) goto metadata_parse_err; } if (exists) spin_lock_bh(&ife->tcf_lock); /* protected by tcf_lock when modifying existing action */ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p = rcu_replace_pointer(ife->params, p, 1); if (exists) spin_unlock_bh(&ife->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); return ret; metadata_parse_err: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: kfree(p); tcf_idr_release(*a, bind); return err; } static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; struct tc_ife opt = { .index = ife->tcf_index, .refcnt = refcount_read(&ife->tcf_refcnt) - ref, .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, }; struct tcf_t t; spin_lock_bh(&ife->tcf_lock); opt.action = ife->tcf_action; p = rcu_dereference_protected(ife->params, lockdep_is_held(&ife->tcf_lock)); opt.flags = p->flags; if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; tcf_tm_dump(&t, &ife->tcf_tm); if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) goto nla_put_failure; if (!is_zero_ether_addr(p->eth_dst)) { if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst)) goto nla_put_failure; } if (!is_zero_ether_addr(p->eth_src)) { if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src)) goto nla_put_failure; } if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type)) goto nla_put_failure; if (dump_metalist(skb, ife)) { /*ignore failure to dump metalist */ pr_info("Failed to dump metalist\n"); } spin_unlock_bh(&ife->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&ife->tcf_lock); nlmsg_trim(skb, b); return -1; } static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, u16 metaid, u16 mlen, void *mdata) { struct tcf_meta_info *e; /* XXX: use hash to speed up */ list_for_each_entry(e, &ife->metalist, metalist) { if (metaid == e->metaid) { if (e->ops) { /* We check for decode presence already */ return e->ops->decode(skb, mdata, mlen); } } } return -ENOENT; } static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; u8 *ifehdr_end; u8 *tlv_data; u16 metalen; bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); tlv_data = ife_decode(skb, &metalen); if (unlikely(!tlv_data)) { qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } ifehdr_end = tlv_data + metalen; for (; tlv_data < ifehdr_end; tlv_data = ife_tlv_meta_next(tlv_data)) { u8 *curr_data; u16 mtype; u16 dlen; curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype, &dlen, NULL); if (!curr_data) { qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata * but dont have an ops for it */ pr_info_ratelimited("Unknown metaid %d dlen %d\n", mtype, dlen); qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); } } if (WARN_ON(tlv_data != ifehdr_end)) { qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } skb->protocol = eth_type_trans(skb, skb->dev); skb_reset_network_header(skb); return action; } /*XXX: check if we can do this at install time instead of current * send data path **/ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife) { struct tcf_meta_info *e, *n; int tot_run_sz = 0, run_sz = 0; list_for_each_entry_safe(e, n, &ife->metalist, metalist) { if (e->ops->check_presence) { run_sz = e->ops->check_presence(skb, e); tot_run_sz += run_sz; } } return tot_run_sz; } static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res, struct tcf_ife_params *p) { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; struct ethhdr *oethh; /* outer ether header */ struct tcf_meta_info *e; /* OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA where ORIGDATA = original ethernet header ... */ u16 metalen = ife_get_sz(skb, ife); int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; unsigned int skboff = 0; int new_len = skb->len + hdrm; bool exceed_mtu = false; void *ife_meta; int err = 0; if (!skb_at_tc_ingress(skb)) { if (new_len > skb->dev->mtu) exceed_mtu = true; } bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ /* abuse overlimits to count when we allow packet * with no metadata */ qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); return action; } /* could be stupid policy setup or mtu config * so lets be conservative.. */ if ((action == TC_ACT_SHOT) || exceed_mtu) { qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); ife_meta = ife_encode(skb, metalen); spin_lock(&ife->tcf_lock); /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by * ops->presence_check... */ list_for_each_entry(e, &ife->metalist, metalist) { if (e->ops->encode) { err = e->ops->encode(skb, (void *)(ife_meta + skboff), e); } if (err < 0) { /* too corrupt to keep around if overwritten */ spin_unlock(&ife->tcf_lock); qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } skboff += err; } spin_unlock(&ife->tcf_lock); oethh = (struct ethhdr *)skb->data; if (!is_zero_ether_addr(p->eth_src)) ether_addr_copy(oethh->h_source, p->eth_src); if (!is_zero_ether_addr(p->eth_dst)) ether_addr_copy(oethh->h_dest, p->eth_dst); oethh->h_proto = htons(p->eth_type); if (skb_at_tc_ingress(skb)) skb_pull(skb, skb->dev->hard_header_len); return action; } TC_INDIRECT_SCOPE int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; int ret; p = rcu_dereference_bh(ife->params); if (p->flags & IFE_ENCODE) { ret = tcf_ife_encode(skb, a, res, p); return ret; } return tcf_ife_decode(skb, a, res); } static struct tc_action_ops act_ife_ops = { .kind = "ife", .id = TCA_ID_IFE, .owner = THIS_MODULE, .act = tcf_ife_act, .dump = tcf_ife_dump, .cleanup = tcf_ife_cleanup, .init = tcf_ife_init, .size = sizeof(struct tcf_ife_info), }; MODULE_ALIAS_NET_ACT("ife"); static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); return tc_action_net_init(net, tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_ife_ops.net_id); } static struct pernet_operations ife_net_ops = { .init = ife_init_net, .exit_batch = ife_exit_net, .id = &act_ife_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init ife_init_module(void) { return tcf_register_action(&act_ife_ops, &ife_net_ops); } static void __exit ife_cleanup_module(void) { tcf_unregister_action(&act_ife_ops, &ife_net_ops); } module_init(ife_init_module); module_exit(ife_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2015)"); MODULE_DESCRIPTION("Inter-FE LFB action"); MODULE_LICENSE("GPL"); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | /* SPDX-License-Identifier: GPL-2.0 */ /* * QNX6 file system, Linux implementation. * * Version : 1.0.0 * * History : * * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release. * 16-02-2012 page map extension by Al Viro * */ #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/fs.h> #include <linux/pagemap.h> typedef __u16 __bitwise __fs16; typedef __u32 __bitwise __fs32; typedef __u64 __bitwise __fs64; #include <linux/qnx6_fs.h> struct qnx6_sb_info { struct buffer_head *sb_buf; /* superblock buffer */ struct qnx6_super_block *sb; /* our superblock */ int s_blks_off; /* blkoffset fs-startpoint */ int s_ptrbits; /* indirect pointer bitfield */ unsigned long s_mount_opt; /* all mount options */ int s_bytesex; /* holds endianess info */ struct inode * inodes; struct inode * longfile; }; struct qnx6_inode_info { __fs32 di_block_ptr[QNX6_NO_DIRECT_POINTERS]; __u8 di_filelevels; __u32 i_dir_start_lookup; struct inode vfs_inode; }; extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino); extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); #ifdef CONFIG_QNX6FS_DEBUG extern void qnx6_superblock_debug(struct qnx6_super_block *, struct super_block *); #endif extern const struct inode_operations qnx6_dir_inode_operations; extern const struct file_operations qnx6_dir_operations; static inline struct qnx6_sb_info *QNX6_SB(struct super_block *sb) { return sb->s_fs_info; } static inline struct qnx6_inode_info *QNX6_I(struct inode *inode) { return container_of(inode, struct qnx6_inode_info, vfs_inode); } #define clear_opt(o, opt) (o &= ~(QNX6_MOUNT_##opt)) #define set_opt(o, opt) (o |= (QNX6_MOUNT_##opt)) #define test_opt(sb, opt) (QNX6_SB(sb)->s_mount_opt & \ QNX6_MOUNT_##opt) enum { BYTESEX_LE, BYTESEX_BE, }; static inline __u64 fs64_to_cpu(struct qnx6_sb_info *sbi, __fs64 n) { if (sbi->s_bytesex == BYTESEX_LE) return le64_to_cpu((__force __le64)n); else return be64_to_cpu((__force __be64)n); } static inline __fs64 cpu_to_fs64(struct qnx6_sb_info *sbi, __u64 n) { if (sbi->s_bytesex == BYTESEX_LE) return (__force __fs64)cpu_to_le64(n); else return (__force __fs64)cpu_to_be64(n); } static inline __u32 fs32_to_cpu(struct qnx6_sb_info *sbi, __fs32 n) { if (sbi->s_bytesex == BYTESEX_LE) return le32_to_cpu((__force __le32)n); else return be32_to_cpu((__force __be32)n); } static inline __fs32 cpu_to_fs32(struct qnx6_sb_info *sbi, __u32 n) { if (sbi->s_bytesex == BYTESEX_LE) return (__force __fs32)cpu_to_le32(n); else return (__force __fs32)cpu_to_be32(n); } static inline __u16 fs16_to_cpu(struct qnx6_sb_info *sbi, __fs16 n) { if (sbi->s_bytesex == BYTESEX_LE) return le16_to_cpu((__force __le16)n); else return be16_to_cpu((__force __be16)n); } static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n) { if (sbi->s_bytesex == BYTESEX_LE) return (__force __fs16)cpu_to_le16(n); else return (__force __fs16)cpu_to_be16(n); } extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent); static inline void qnx6_put_page(struct page *page) { kunmap(page); put_page(page); } extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name, struct page **res_page); |
70 69 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ #include <linux/ctype.h> #include <linux/list.h> #include <linux/sched.h> #include <drm/drm_fourcc.h> #include <drm/drm_mode_object.h> struct drm_clip_rect; struct drm_device; struct drm_file; struct drm_framebuffer; struct drm_gem_object; /** * struct drm_framebuffer_funcs - framebuffer hooks */ struct drm_framebuffer_funcs { /** * @destroy: * * Clean up framebuffer resources, specifically also unreference the * backing storage. The core guarantees to call this function for every * framebuffer successfully created by calling * &drm_mode_config_funcs.fb_create. Drivers must also call * drm_framebuffer_cleanup() to release DRM core resources for this * framebuffer. */ void (*destroy)(struct drm_framebuffer *framebuffer); /** * @create_handle: * * Create a buffer handle in the driver-specific buffer manager (either * GEM or TTM) valid for the passed-in &struct drm_file. This is used by * the core to implement the GETFB IOCTL, which returns (for * sufficiently priviledged user) also a native buffer handle. This can * be used for seamless transitions between modesetting clients by * copying the current screen contents to a private buffer and blending * between that and the new contents. * * GEM based drivers should call drm_gem_handle_create() to create the * handle. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*create_handle)(struct drm_framebuffer *fb, struct drm_file *file_priv, unsigned int *handle); /** * @dirty: * * Optional callback for the dirty fb IOCTL. * * Userspace can notify the driver via this callback that an area of the * framebuffer has changed and should be flushed to the display * hardware. This can also be used internally, e.g. by the fbdev * emulation, though that's not the case currently. * * See documentation in drm_mode.h for the struct drm_mode_fb_dirty_cmd * for more information as all the semantics and arguments have a one to * one mapping on this function. * * Atomic drivers should use drm_atomic_helper_dirtyfb() to implement * this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*dirty)(struct drm_framebuffer *framebuffer, struct drm_file *file_priv, unsigned flags, unsigned color, struct drm_clip_rect *clips, unsigned num_clips); }; /** * struct drm_framebuffer - frame buffer object * * Note that the fb is refcounted for the benefit of driver internals, * for example some hw, disabling a CRTC/plane is asynchronous, and * scanout does not actually complete until the next vblank. So some * cleanup (like releasing the reference(s) on the backing GEM bo(s)) * should be deferred. In cases like this, the driver would like to * hold a ref to the fb even though it has already been removed from * userspace perspective. See drm_framebuffer_get() and * drm_framebuffer_put(). * * The refcount is stored inside the mode object @base. */ struct drm_framebuffer { /** * @dev: DRM device this framebuffer belongs to */ struct drm_device *dev; /** * @head: Place on the &drm_mode_config.fb_list, access protected by * &drm_mode_config.fb_lock. */ struct list_head head; /** * @base: base modeset object structure, contains the reference count. */ struct drm_mode_object base; /** * @comm: Name of the process allocating the fb, used for fb dumping. */ char comm[TASK_COMM_LEN]; /** * @format: framebuffer format information */ const struct drm_format_info *format; /** * @funcs: framebuffer vfunc table */ const struct drm_framebuffer_funcs *funcs; /** * @pitches: Line stride per buffer. For userspace created object this * is copied from drm_mode_fb_cmd2. */ unsigned int pitches[DRM_FORMAT_MAX_PLANES]; /** * @offsets: Offset from buffer start to the actual pixel data in bytes, * per buffer. For userspace created object this is copied from * drm_mode_fb_cmd2. * * Note that this is a linear offset and does not take into account * tiling or buffer layout per @modifier. It is meant to be used when * the actual pixel data for this framebuffer plane starts at an offset, * e.g. when multiple planes are allocated within the same backing * storage buffer object. For tiled layouts this generally means its * @offsets must at least be tile-size aligned, but hardware often has * stricter requirements. * * This should not be used to specifiy x/y pixel offsets into the buffer * data (even for linear buffers). Specifying an x/y pixel offset is * instead done through the source rectangle in &struct drm_plane_state. */ unsigned int offsets[DRM_FORMAT_MAX_PLANES]; /** * @modifier: Data layout modifier. This is used to describe * tiling, or also special layouts (like compression) of auxiliary * buffers. For userspace created object this is copied from * drm_mode_fb_cmd2. */ uint64_t modifier; /** * @width: Logical width of the visible area of the framebuffer, in * pixels. */ unsigned int width; /** * @height: Logical height of the visible area of the framebuffer, in * pixels. */ unsigned int height; /** * @flags: Framebuffer flags like DRM_MODE_FB_INTERLACED or * DRM_MODE_FB_MODIFIERS. */ int flags; /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ struct list_head filp_head; /** * @obj: GEM objects backing the framebuffer, one per plane (optional). * * This is used by the GEM framebuffer helpers, see e.g. * drm_gem_fb_create(). */ struct drm_gem_object *obj[DRM_FORMAT_MAX_PLANES]; }; #define obj_to_fb(x) container_of(x, struct drm_framebuffer, base) int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer *drm_framebuffer_lookup(struct drm_device *dev, struct drm_file *file_priv, uint32_t id); void drm_framebuffer_remove(struct drm_framebuffer *fb); void drm_framebuffer_cleanup(struct drm_framebuffer *fb); void drm_framebuffer_unregister_private(struct drm_framebuffer *fb); /** * drm_framebuffer_get - acquire a framebuffer reference * @fb: DRM framebuffer * * This function increments the framebuffer's reference count. */ static inline void drm_framebuffer_get(struct drm_framebuffer *fb) { drm_mode_object_get(&fb->base); } /** * drm_framebuffer_put - release a framebuffer reference * @fb: DRM framebuffer * * This function decrements the framebuffer's reference count and frees the * framebuffer if the reference count drops to zero. */ static inline void drm_framebuffer_put(struct drm_framebuffer *fb) { drm_mode_object_put(&fb->base); } /** * drm_framebuffer_read_refcount - read the framebuffer reference count. * @fb: framebuffer * * This functions returns the framebuffer's reference count. */ static inline uint32_t drm_framebuffer_read_refcount(const struct drm_framebuffer *fb) { return kref_read(&fb->base.refcount); } /** * drm_framebuffer_assign - store a reference to the fb * @p: location to store framebuffer * @fb: new framebuffer (maybe NULL) * * This functions sets the location to store a reference to the framebuffer, * unreferencing the framebuffer that was previously stored in that location. */ static inline void drm_framebuffer_assign(struct drm_framebuffer **p, struct drm_framebuffer *fb) { if (fb) drm_framebuffer_get(fb); if (*p) drm_framebuffer_put(*p); *p = fb; } /* * drm_for_each_fb - iterate over all framebuffers * @fb: the loop cursor * @dev: the DRM device * * Iterate over all framebuffers of @dev. User must hold * &drm_mode_config.fb_lock. */ #define drm_for_each_fb(fb, dev) \ for (WARN_ON(!mutex_is_locked(&(dev)->mode_config.fb_lock)), \ fb = list_first_entry(&(dev)->mode_config.fb_list, \ struct drm_framebuffer, head); \ &fb->head != (&(dev)->mode_config.fb_list); \ fb = list_next_entry(fb, head)) /** * struct drm_afbc_framebuffer - a special afbc frame buffer object * * A derived class of struct drm_framebuffer, dedicated for afbc use cases. */ struct drm_afbc_framebuffer { /** * @base: base framebuffer structure. */ struct drm_framebuffer base; /** * @block_width: width of a single afbc block */ u32 block_width; /** * @block_height: height of a single afbc block */ u32 block_height; /** * @aligned_width: aligned frame buffer width */ u32 aligned_width; /** * @aligned_height: aligned frame buffer height */ u32 aligned_height; /** * @offset: offset of the first afbc header */ u32 offset; /** * @afbc_size: minimum size of afbc buffer */ u32 afbc_size; }; #define fb_to_afbc_fb(x) container_of(x, struct drm_afbc_framebuffer, base) #endif |
5 5 5 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 | // SPDX-License-Identifier: GPL-2.0-or-later /* * cgroups support for the BFQ I/O scheduler. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/cgroup.h> #include <linux/ktime.h> #include <linux/rbtree.h> #include <linux/ioprio.h> #include <linux/sbitmap.h> #include <linux/delay.h> #include "elevator.h" #include "bfq-iosched.h" #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp) { int ret; ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); if (ret) return ret; atomic64_set(&stat->aux_cnt, 0); return 0; } static void bfq_stat_exit(struct bfq_stat *stat) { percpu_counter_destroy(&stat->cpu_cnt); } /** * bfq_stat_add - add a value to a bfq_stat * @stat: target bfq_stat * @val: value to add * * Add @val to @stat. The caller must ensure that IRQ on the same CPU * don't re-enter this function for the same counter. */ static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val) { percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); } /** * bfq_stat_read - read the current value of a bfq_stat * @stat: bfq_stat to read */ static inline uint64_t bfq_stat_read(struct bfq_stat *stat) { return percpu_counter_sum_positive(&stat->cpu_cnt); } /** * bfq_stat_reset - reset a bfq_stat * @stat: bfq_stat to reset */ static inline void bfq_stat_reset(struct bfq_stat *stat) { percpu_counter_set(&stat->cpu_cnt, 0); atomic64_set(&stat->aux_cnt, 0); } /** * bfq_stat_add_aux - add a bfq_stat into another's aux count * @to: the destination bfq_stat * @from: the source * * Add @from's count including the aux one to @to's aux count. */ static inline void bfq_stat_add_aux(struct bfq_stat *to, struct bfq_stat *from) { atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt), &to->aux_cnt); } /** * blkg_prfill_stat - prfill callback for bfq_stat * @sf: seq_file to print to * @pd: policy private data of interest * @off: offset to the bfq_stat in @pd * * prfill callback for printing a bfq_stat. */ static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off)); } /* bfqg stats flags */ enum bfqg_stats_flags { BFQG_stats_waiting = 0, BFQG_stats_idling, BFQG_stats_empty, }; #define BFQG_FLAG_FNS(name) \ static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \ { \ stats->flags |= (1 << BFQG_stats_##name); \ } \ static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \ { \ stats->flags &= ~(1 << BFQG_stats_##name); \ } \ static int bfqg_stats_##name(struct bfqg_stats *stats) \ { \ return (stats->flags & (1 << BFQG_stats_##name)) != 0; \ } \ BFQG_FLAG_FNS(waiting) BFQG_FLAG_FNS(idling) BFQG_FLAG_FNS(empty) #undef BFQG_FLAG_FNS /* This should be called with the scheduler lock held. */ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_waiting(stats)) return; now = blk_time_get_ns(); if (now > stats->start_group_wait_time) bfq_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); bfqg_stats_clear_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, struct bfq_group *curr_bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_waiting(stats)) return; if (bfqg == curr_bfqg) return; stats->start_group_wait_time = blk_time_get_ns(); bfqg_stats_mark_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_empty(stats)) return; now = blk_time_get_ns(); if (now > stats->start_empty_time) bfq_stat_add(&stats->empty_time, now - stats->start_empty_time); bfqg_stats_clear_empty(stats); } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { bfq_stat_add(&bfqg->stats.dequeue, 1); } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (blkg_rwstat_total(&stats->queued)) return; /* * group is already marked empty. This can happen if bfqq got new * request in parent group and moved to this group while being added * to service tree. Just ignore the event and move on. */ if (bfqg_stats_empty(stats)) return; stats->start_empty_time = blk_time_get_ns(); bfqg_stats_mark_empty(stats); } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_idling(stats)) { u64 now = blk_time_get_ns(); if (now > stats->start_idle_time) bfq_stat_add(&stats->idle_time, now - stats->start_idle_time); bfqg_stats_clear_idling(stats); } } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; stats->start_idle_time = blk_time_get_ns(); bfqg_stats_mark_idling(stats); } void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; bfq_stat_add(&stats->avg_queue_size_sum, blkg_rwstat_total(&stats->queued)); bfq_stat_add(&stats->avg_queue_size_samples, 1); bfqg_stats_update_group_wait_time(stats); } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); if (!(bfqq == bfqg->bfqd->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, -1); } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.merged, opf, 1); } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; u64 now = blk_time_get_ns(); if (now > io_start_time_ns) blkg_rwstat_add(&stats->service_time, opf, now - io_start_time_ns); if (io_start_time_ns > start_time_ns) blkg_rwstat_add(&stats->wait_time, opf, io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_CGROUP_DEBUG */ void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ #ifdef CONFIG_BFQ_GROUP_IOSCHED /* * blk-cgroup policy-related handlers * The following functions help in converting between blk-cgroup * internal structures and BFQ-specific structures. */ static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct bfq_group, pd) : NULL; } struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg) { return pd_to_blkg(&bfqg->pd); } static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) { return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq)); } /* * bfq_group handlers * The following functions help in navigating the bfq_group hierarchy * by allowing to find the parent of a bfq_group or the bfq_group * associated to a bfq_queue. */ static struct bfq_group *bfqg_parent(struct bfq_group *bfqg) { struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent; return pblkg ? blkg_to_bfqg(pblkg) : NULL; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { struct bfq_entity *group_entity = bfqq->entity.parent; return group_entity ? container_of(group_entity, struct bfq_group, entity) : bfqq->bfqd->root_group; } /* * The following two functions handle get and put of a bfq_group by * wrapping the related blk-cgroup hooks. */ static void bfqg_get(struct bfq_group *bfqg) { refcount_inc(&bfqg->ref); } static void bfqg_put(struct bfq_group *bfqg) { if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); } static void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); blkg_get(bfqg_to_blkg(bfqg)); } void bfqg_and_blkg_put(struct bfq_group *bfqg) { blkg_put(bfqg_to_blkg(bfqg)); bfqg_put(bfqg); } void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq) { struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg); if (!bfqg) return; blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq)); blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1); } /* @stats = 0 */ static void bfqg_stats_reset(struct bfqg_stats *stats) { #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_reset(&stats->merged); blkg_rwstat_reset(&stats->service_time); blkg_rwstat_reset(&stats->wait_time); bfq_stat_reset(&stats->time); bfq_stat_reset(&stats->avg_queue_size_sum); bfq_stat_reset(&stats->avg_queue_size_samples); bfq_stat_reset(&stats->dequeue); bfq_stat_reset(&stats->group_wait_time); bfq_stat_reset(&stats->idle_time); bfq_stat_reset(&stats->empty_time); #endif } /* @to += @from */ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) { if (!to || !from) return; #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_add_aux(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->service_time, &from->service_time); blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); bfq_stat_add_aux(&from->time, &from->time); bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); bfq_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); bfq_stat_add_aux(&to->dequeue, &from->dequeue); bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time); bfq_stat_add_aux(&to->idle_time, &from->idle_time); bfq_stat_add_aux(&to->empty_time, &from->empty_time); #endif } /* * Transfer @bfqg's stats to its parent's aux counts so that the ancestors' * recursive stats can still account for the amount used by this bfqg after * it's gone. */ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) { struct bfq_group *parent; if (!bfqg) /* root_group */ return; parent = bfqg_parent(bfqg); lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock); if (unlikely(!parent)) return; bfqg_stats_add_aux(&parent->stats, &bfqg->stats); bfqg_stats_reset(&bfqg->stats); } void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; /* * Make sure that bfqg and its associated blkg do not * disappear before entity. */ bfqg_and_blkg_get(bfqg); } entity->parent = bfqg->my_entity; /* NULL for root group */ entity->sched_data = &bfqg->sched_data; } static void bfqg_stats_exit(struct bfqg_stats *stats) { blkg_rwstat_exit(&stats->bytes); blkg_rwstat_exit(&stats->ios); #ifdef CONFIG_BFQ_CGROUP_DEBUG blkg_rwstat_exit(&stats->merged); blkg_rwstat_exit(&stats->service_time); blkg_rwstat_exit(&stats->wait_time); blkg_rwstat_exit(&stats->queued); bfq_stat_exit(&stats->time); bfq_stat_exit(&stats->avg_queue_size_sum); bfq_stat_exit(&stats->avg_queue_size_samples); bfq_stat_exit(&stats->dequeue); bfq_stat_exit(&stats->group_wait_time); bfq_stat_exit(&stats->idle_time); bfq_stat_exit(&stats->empty_time); #endif } static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) { if (blkg_rwstat_init(&stats->bytes, gfp) || blkg_rwstat_init(&stats->ios, gfp)) goto error; #ifdef CONFIG_BFQ_CGROUP_DEBUG if (blkg_rwstat_init(&stats->merged, gfp) || blkg_rwstat_init(&stats->service_time, gfp) || blkg_rwstat_init(&stats->wait_time, gfp) || blkg_rwstat_init(&stats->queued, gfp) || bfq_stat_init(&stats->time, gfp) || bfq_stat_init(&stats->avg_queue_size_sum, gfp) || bfq_stat_init(&stats->avg_queue_size_samples, gfp) || bfq_stat_init(&stats->dequeue, gfp) || bfq_stat_init(&stats->group_wait_time, gfp) || bfq_stat_init(&stats->idle_time, gfp) || bfq_stat_init(&stats->empty_time, gfp)) goto error; #endif return 0; error: bfqg_stats_exit(stats); return -ENOMEM; } static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) { return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; } static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) { return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); } static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) { struct bfq_group_data *bgd; bgd = kzalloc(sizeof(*bgd), gfp); if (!bgd) return NULL; bgd->weight = CGROUP_WEIGHT_DFL; return &bgd->pd; } static void bfq_cpd_free(struct blkcg_policy_data *cpd) { kfree(cpd_to_bfqgd(cpd)); } static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; if (bfqg_stats_init(&bfqg->stats, gfp)) { kfree(bfqg); return NULL; } /* see comments in bfq_bic_update_cgroup for why refcounting */ refcount_set(&bfqg->ref, 1); return &bfqg->pd; } static void bfq_pd_init(struct blkg_policy_data *pd) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct bfq_group *bfqg = blkg_to_bfqg(blkg); struct bfq_data *bfqd = blkg->q->elevator->elevator_data; struct bfq_entity *entity = &bfqg->entity; struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); entity->orig_weight = entity->weight = entity->new_weight = d->weight; entity->my_sched_data = &bfqg->sched_data; entity->last_bfqq_created = NULL; bfqg->my_entity = entity; /* * the root_group's will be set to NULL * in bfq_init_queue() */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->num_queues_with_pending_reqs = 0; bfqg->rq_pos_tree = RB_ROOT; } static void bfq_pd_free(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_exit(&bfqg->stats); bfqg_put(bfqg); } static void bfq_pd_reset_stats(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_reset(&bfqg->stats); } static void bfq_group_set_parent(struct bfq_group *bfqg, struct bfq_group *parent) { struct bfq_entity *entity; entity = &bfqg->entity; entity->parent = parent->my_entity; entity->sched_data = &parent->sched_data; } static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { struct bfq_group *parent; struct bfq_entity *entity; /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet * to the private hierarchy of BFQ. */ entity = &bfqg->entity; for_each_entity(entity) { struct bfq_group *curr_bfqg = container_of(entity, struct bfq_group, entity); if (curr_bfqg != bfqd->root_group) { parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; bfq_group_set_parent(curr_bfqg, parent); } } } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; struct bfq_group *bfqg; while (blkg) { if (!blkg->online) { blkg = blkg->parent; continue; } bfqg = blkg_to_bfqg(blkg); if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } blkg = blkg->parent; } bio_associate_blkg_from_css(bio, &bfqg_to_blkg(bfqd->root_group)->blkcg->css); return bfqd->root_group; } /** * bfq_bfqq_move - migrate @bfqq to @bfqg. * @bfqd: queue descriptor. * @bfqq: the queue to move. * @bfqg: the group to move to. * * Move @bfqq to @bfqg, deactivating it from its old group and reactivating * it on the new one. Avoid putting the entity on the old group idle tree. * * Must be called under the scheduler lock, to make sure that the blkg * owning @bfqg does not disappear (see comments in * bfq_bic_update_cgroup on guaranteeing the consistency of blkg * objects). */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) { struct bfq_entity *entity = &bfqq->entity; struct bfq_group *old_parent = bfqq_group(bfqq); bool has_pending_reqs = false; /* * No point to move bfqq to the same group, which can happen when * root group is offlined */ if (old_parent == bfqg) return; /* * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group * until elevator exit. */ if (bfqq == &bfqd->oom_bfqq) return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. */ bfqq->ref++; if (entity->in_groups_with_pending_reqs) { has_pending_reqs = true; bfq_del_bfqq_in_groups_with_pending_reqs(bfqq); } /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we * need to remove bfqq explicitly with bfq_deactivate_bfqq, as * we do below. */ if (bfqq == bfqd->in_service_queue) bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(old_parent); if (entity->parent && entity->parent->last_bfqq_created == bfqq) entity->parent->last_bfqq_created = NULL; else if (bfqd->last_bfqq_created == bfqq) bfqd->last_bfqq_created = NULL; entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); if (has_pending_reqs) bfq_add_bfqq_in_groups_with_pending_reqs(bfqq); if (bfq_bfqq_busy(bfqq)) { if (unlikely(!bfqd->nonrot_with_queueing)) bfq_pos_tree_add_move(bfqd, bfqq); bfq_activate_bfqq(bfqd, bfqq); } if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } static void bfq_sync_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *sync_bfqq, struct bfq_io_cq *bic, struct bfq_group *bfqg, unsigned int act_idx) { struct bfq_queue *bfqq; if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { /* We are the only user of this bfqq, just move it */ if (sync_bfqq->entity.sched_data != &bfqg->sched_data) bfq_bfqq_move(bfqd, sync_bfqq, bfqg); return; } /* * The queue was merged to a different queue. Check * that the merge chain still belongs to the same * cgroup. */ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) if (bfqq->entity.sched_data != &bfqg->sched_data) break; if (bfqq) { /* * Some queue changed cgroup so the merge is not valid * anymore. We cannot easily just cancel the merge (by * clearing new_bfqq) as there may be other processes * using this queue and holding refs to all queues * below sync_bfqq->new_bfqq. Similarly if the merge * already happened, we need to detach from bfqq now * so that we cannot merge bio to a request from the * old cgroup. */ bfq_put_cooperator(sync_bfqq); bic_set_bfqq(bic, NULL, true, act_idx); bfq_release_process_ref(bfqd, sync_bfqq); } } /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. * @bic: the bic to move. * @bfqg: the group to move to. * * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) */ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_group *bfqg) { unsigned int act_idx; for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); if (async_bfqq && async_bfqq->entity.sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, false, act_idx); bfq_release_process_ref(bfqd, async_bfqq); } if (sync_bfqq) bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); } } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) return; /* * New cgroup for this process. Make sure it is linked to bfq internal * cgroup hierarchy. */ bfq_link_bfqg(bfqd, bfqg); __bfq_bic_change_cgroup(bfqd, bic, bfqg); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following * reasons. Operations on blkg objects in blk-cgroup are * protected with the request_queue lock, and not with the * lock that protects the instances of this scheduler * (bfqd->lock). This exposes BFQ to the following sort of * race. * * The blkg_lookup performed in bfq_get_queue, protected * through rcu, may happen to return the address of a copy of * the original blkg. If this is the case, then the * bfqg_and_blkg_get performed in bfq_get_queue, to pin down * the blkg, is useless: it does not prevent blk-cgroup code * from destroying both the original blkg and all objects * directly or indirectly referred by the copy of the * blkg. * * On the bright side, destroy operations on a blkg invoke, as * a first step, hooks of the scheduler associated with the * blkg. And these hooks are executed with bfqd->lock held for * BFQ. As a consequence, for any blkg associated with the * request queue this instance of the scheduler is attached * to, we are guaranteed that such a blkg is not destroyed, and * that all the pointers it contains are consistent, while we * are holding bfqd->lock. A blkg_lookup performed with * bfqd->lock held then returns a fully consistent blkg, which * remains consistent until this lock is held. * * Thanks to the last fact, and to the fact that: (1) bfqg has * been obtained through a blkg_lookup in the above * assignment, and (2) bfqd->lock is being held, here we can * safely use the policy data for the involved blkg (i.e., the * field bfqg->pd) to get to the blkg associated with bfqg, * and then we can safely use any field of blkg. After we * release bfqd->lock, even just getting blkg through this * bfqg may cause dangling references to be traversed, as * bfqg->pd may not exist any more. * * In view of the above facts, here we cache, in the bfqg, any * blkg data we may need for this bic, and for its associated * bfq_queue. As of now, we need to cache only the path of the * blkg, which is used in the bfq_log_* functions. * * Finally, note that bfqg itself needs to be protected from * destruction on the blkg_free of the original blkg (which * invokes bfq_pd_free). We use an additional private * refcounter for bfqg, to let it disappear only after no * bfq_queue refers to it any longer. */ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; } /** * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. * @st: the service tree being flushed. */ static void bfq_flush_idle_tree(struct bfq_service_tree *st) { struct bfq_entity *entity = st->first_idle; for (; entity ; entity = st->first_idle) __bfq_deactivate_entity(entity, false); } /** * bfq_reparent_leaf_entity - move leaf entity to the root_group. * @bfqd: the device data structure with the root group. * @entity: the entity to move, if entity is a leaf; or the parent entity * of an active leaf entity to move, if entity is not a leaf. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_entity *entity, int ioprio_class) { struct bfq_queue *bfqq; struct bfq_entity *child_entity = entity; while (child_entity->my_sched_data) { /* leaf not reached yet */ struct bfq_sched_data *child_sd = child_entity->my_sched_data; struct bfq_service_tree *child_st = child_sd->service_tree + ioprio_class; struct rb_root *child_active = &child_st->active; child_entity = bfq_entity_of(rb_first(child_active)); if (!child_entity) child_entity = child_sd->in_service_entity; } bfqq = bfq_entity_to_bfqq(child_entity); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** * bfq_reparent_active_queues - move to the root group all active queues. * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree to start the search from. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_active_queues(struct bfq_data *bfqd, struct bfq_group *bfqg, struct bfq_service_tree *st, int ioprio_class) { struct rb_root *active = &st->active; struct bfq_entity *entity; while ((entity = bfq_entity_of(rb_first(active)))) bfq_reparent_leaf_entity(bfqd, entity, ioprio_class); if (bfqg->sched_data.in_service_entity) bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.in_service_entity, ioprio_class); } /** * bfq_pd_offline - deactivate the entity associated with @pd, * and reparent its children entities. * @pd: descriptor of the policy going offline. * * blkio already grabs the queue_lock for us, so no need to use * RCU-based magic */ static void bfq_pd_offline(struct blkg_policy_data *pd) { struct bfq_service_tree *st; struct bfq_group *bfqg = pd_to_bfqg(pd); struct bfq_data *bfqd = bfqg->bfqd; struct bfq_entity *entity = bfqg->my_entity; unsigned long flags; int i; spin_lock_irqsave(&bfqd->lock, flags); if (!entity) /* root group */ goto put_async_queues; /* * Empty all service_trees belonging to this group before * deactivating the group itself. */ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { st = bfqg->sched_data.service_tree + i; /* * It may happen that some queues are still active * (busy) upon group destruction (if the corresponding * processes have been forced to terminate). We move * all the leaf entities corresponding to these queues * to the root_group. * Also, it may happen that the group has an entity * in service, which is disconnected from the active * tree: it must be moved, too. * There is no need to put the sync queues, as the * scheduler has taken no reference. */ bfq_reparent_active_queues(bfqd, bfqg, st, i); /* * The idle tree may still contain bfq_queues * belonging to exited task because they never * migrated to a different cgroup from the one being * destroyed now. In addition, even * bfq_reparent_active_queues() may happen to add some * entities to the idle tree. It happens if, in some * of the calls to bfq_bfqq_move() performed by * bfq_reparent_active_queues(), the queue to move is * empty and gets expired. */ bfq_flush_idle_tree(st); } __bfq_deactivate_entity(entity, false); put_async_queues: bfq_put_async_queues(bfqd, bfqg); spin_unlock_irqrestore(&bfqd->lock, flags); /* * @blkg is going offline and will be ignored by * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so * that they don't get lost. If IOs complete after this point, the * stats for them will be lost. Oh well... */ bfqg_stats_xfer_dead(bfqg); } void bfq_end_wr_async(struct bfq_data *bfqd) { struct blkcg_gq *blkg; list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); bfq_end_wr_async_queues(bfqd, bfqg); } bfq_end_wr_async_queues(bfqd, bfqd->root_group); } static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); unsigned int val = 0; if (bfqgd) val = bfqgd->weight; seq_printf(sf, "%u\n", val); return 0; } static u64 bfqg_prfill_weight_device(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); if (!bfqg->entity.dev_weight) return 0; return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight); } static int bfq_io_show_weight(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); seq_printf(sf, "default %u\n", bfqgd->weight); blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device, &blkcg_policy_bfq, 0, false); return 0; } static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight) { weight = dev_weight ?: weight; bfqg->entity.dev_weight = dev_weight; /* * Setting the prio_changed flag of the entity * to 1 with new_weight == weight would re-set * the value of the weight to its ioprio mapping. * Set the flag only if necessary. */ if ((unsigned short)weight != bfqg->entity.new_weight) { bfqg->entity.new_weight = (unsigned short)weight; /* * Make sure that the above new value has been * stored in bfqg->entity.new_weight before * setting the prio_changed flag. In fact, * this flag may be read asynchronously (in * critical sections protected by a different * lock than that held here), and finding this * flag set may cause the execution of the code * for updating parameters whose value may * depend also on bfqg->entity.new_weight (in * __bfq_entity_update_weight_prio). * This barrier makes sure that the new value * of bfqg->entity.new_weight is correctly * seen in that code. */ smp_wmb(); bfqg->entity.prio_changed = 1; } } static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css, struct cftype *cftype, u64 val) { struct blkcg *blkcg = css_to_blkcg(css); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); struct blkcg_gq *blkg; int ret = -ERANGE; if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) return ret; ret = 0; spin_lock_irq(&blkcg->lock); bfqgd->weight = (unsigned short)val; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); if (bfqg) bfq_group_set_weight(bfqg, val, 0); } spin_unlock_irq(&blkcg->lock); return ret; } static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { int ret; struct blkg_conf_ctx ctx; struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct bfq_group *bfqg; u64 v; blkg_conf_init(&ctx, buf); ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) goto out; if (sscanf(ctx.body, "%llu", &v) == 1) { /* require "default" on dfl */ ret = -ERANGE; if (!v) goto out; } else if (!strcmp(strim(ctx.body), "default")) { v = 0; } else { ret = -EINVAL; goto out; } bfqg = blkg_to_bfqg(ctx.blkg); ret = -ERANGE; if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) { bfq_group_set_weight(bfqg, bfqg->entity.weight, v); ret = 0; } out: blkg_conf_exit(&ctx); return ret ?: nbytes; } static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { char *endp; int ret; u64 v; buf = strim(buf); /* "WEIGHT" or "default WEIGHT" sets the default weight */ v = simple_strtoull(buf, &endp, 0); if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) { ret = bfq_io_set_weight_legacy(of_css(of), NULL, v); return ret ?: nbytes; } return bfq_io_set_device_weight(of, buf, nbytes, off); } static int bfqg_print_rwstat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample sum; blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); } static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfqg_print_stat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; u64 sum = 0; lockdep_assert_held(&blkg->q->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { struct bfq_stat *stat; if (!pos_blkg->online) continue; stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off; sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt); } rcu_read_unlock(); return __blkg_prfill_u64(sf, pd, sum); } static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_stat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg); u64 sum = blkg_rwstat_total(&bfqg->stats.bytes); return __blkg_prfill_u64(sf, pd, sum >> 9); } static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample tmp; blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq, offsetof(struct bfq_group, stats.bytes), &tmp); return __blkg_prfill_u64(sf, pd, (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9); } static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples); u64 v = 0; if (samples) { v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum); v = div64_u64(v, samples); } __blkg_prfill_u64(sf, pd, v); return 0; } /* print avg_queue_size */ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_avg_queue_size, &blkcg_policy_bfq, 0, false); return 0; } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { int ret; ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq); if (ret) return NULL; return blkg_to_bfqg(bfqd->queue->root_blkg); } struct blkcg_policy blkcg_policy_bfq = { .dfl_cftypes = bfq_blkg_files, .legacy_cftypes = bfq_blkcg_legacy_files, .cpd_alloc_fn = bfq_cpd_alloc, .cpd_free_fn = bfq_cpd_free, .pd_alloc_fn = bfq_pd_alloc, .pd_init_fn = bfq_pd_init, .pd_offline_fn = bfq_pd_offline, .pd_free_fn = bfq_pd_free, .pd_reset_stats_fn = bfq_pd_reset_stats, }; struct cftype bfq_blkcg_legacy_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight_legacy, .write_u64 = bfq_io_set_weight_legacy, }, { .name = "bfq.weight_device", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, /* statistics, covers only the tasks in the bfqg */ { .name = "bfq.io_service_bytes", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_serviced", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat, }, { .name = "bfq.sectors", .seq_show = bfqg_print_stat_sectors, }, { .name = "bfq.io_service_time", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_wait_time", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_merged", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_queued", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ /* the same statistics which cover the bfqg and its descendants */ { .name = "bfq.io_service_bytes_recursive", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_serviced_recursive", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat_recursive, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time_recursive", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat_recursive, }, { .name = "bfq.sectors_recursive", .seq_show = bfqg_print_stat_sectors_recursive, }, { .name = "bfq.io_service_time_recursive", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_wait_time_recursive", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_merged_recursive", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_queued_recursive", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.avg_queue_size", .seq_show = bfqg_print_avg_queue_size, }, { .name = "bfq.group_wait_time", .private = offsetof(struct bfq_group, stats.group_wait_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.idle_time", .private = offsetof(struct bfq_group, stats.idle_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.empty_time", .private = offsetof(struct bfq_group, stats.empty_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.dequeue", .private = offsetof(struct bfq_group, stats.dequeue), .seq_show = bfqg_print_stat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ { } /* terminate */ }; struct cftype bfq_blkg_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, {} /* terminate */ }; #else /* CONFIG_BFQ_GROUP_IOSCHED */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) {} void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; } entity->sched_data = &bfqg->sched_data; } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} void bfq_end_wr_async(struct bfq_data *bfqd) { bfq_end_wr_async_queues(bfqd, bfqd->root_group); } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { return bfqq->bfqd->root_group; } void bfqg_and_blkg_put(struct bfq_group *bfqg) {} struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; int i; bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); if (!bfqg) return NULL; for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; return bfqg; } #endif /* CONFIG_BFQ_GROUP_IOSCHED */ |
2 2 2 2 2 2 2 2 2 2 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 | // SPDX-License-Identifier: GPL-2.0-or-later /* * DVB USB framework * * Copyright (C) 2004-6 Patrick Boettcher <patrick.boettcher@posteo.de> * Copyright (C) 2012 Antti Palosaari <crope@iki.fi> */ #include "dvb_usb_common.h" #include <media/media-device.h> static int dvb_usbv2_disable_rc_polling; module_param_named(disable_rc_polling, dvb_usbv2_disable_rc_polling, int, 0644); MODULE_PARM_DESC(disable_rc_polling, "disable remote control polling (default: 0)"); static int dvb_usb_force_pid_filter_usage; module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444); MODULE_PARM_DESC(force_pid_filter_usage, "force all DVB USB devices to use a PID filter, if any (default: 0)"); static int dvb_usbv2_download_firmware(struct dvb_usb_device *d, const char *name) { int ret; const struct firmware *fw; dev_dbg(&d->udev->dev, "%s:\n", __func__); if (!d->props->download_firmware) { ret = -EINVAL; goto err; } ret = request_firmware(&fw, name, &d->udev->dev); if (ret < 0) { dev_err(&d->udev->dev, "%s: Did not find the firmware file '%s' (status %d). You can use <kernel_dir>/scripts/get_dvb_firmware to get the firmware\n", KBUILD_MODNAME, name, ret); goto err; } dev_info(&d->udev->dev, "%s: downloading firmware from file '%s'\n", KBUILD_MODNAME, name); ret = d->props->download_firmware(d, fw); release_firmware(fw); if (ret < 0) goto err; return ret; err: dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usbv2_i2c_init(struct dvb_usb_device *d) { int ret; dev_dbg(&d->udev->dev, "%s:\n", __func__); if (!d->props->i2c_algo) return 0; strscpy(d->i2c_adap.name, d->name, sizeof(d->i2c_adap.name)); d->i2c_adap.algo = d->props->i2c_algo; d->i2c_adap.dev.parent = &d->udev->dev; i2c_set_adapdata(&d->i2c_adap, d); ret = i2c_add_adapter(&d->i2c_adap); if (ret < 0) { d->i2c_adap.algo = NULL; goto err; } return 0; err: dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usbv2_i2c_exit(struct dvb_usb_device *d) { dev_dbg(&d->udev->dev, "%s:\n", __func__); if (d->i2c_adap.algo) i2c_del_adapter(&d->i2c_adap); return 0; } #if IS_ENABLED(CONFIG_RC_CORE) static void dvb_usb_read_remote_control(struct work_struct *work) { struct dvb_usb_device *d = container_of(work, struct dvb_usb_device, rc_query_work.work); int ret; /* * When the parameter has been set to 1 via sysfs while the * driver was running, or when bulk mode is enabled after IR init. */ if (dvb_usbv2_disable_rc_polling || d->rc.bulk_mode) { d->rc_polling_active = false; return; } ret = d->rc.query(d); if (ret < 0) { dev_err(&d->udev->dev, "%s: rc.query() failed=%d\n", KBUILD_MODNAME, ret); d->rc_polling_active = false; return; /* stop polling */ } schedule_delayed_work(&d->rc_query_work, msecs_to_jiffies(d->rc.interval)); } static int dvb_usbv2_remote_init(struct dvb_usb_device *d) { int ret; struct rc_dev *dev; dev_dbg(&d->udev->dev, "%s:\n", __func__); if (dvb_usbv2_disable_rc_polling || !d->props->get_rc_config) return 0; d->rc.map_name = d->rc_map; ret = d->props->get_rc_config(d, &d->rc); if (ret < 0) goto err; /* disable rc when there is no keymap defined */ if (!d->rc.map_name) return 0; dev = rc_allocate_device(d->rc.driver_type); if (!dev) { ret = -ENOMEM; goto err; } dev->dev.parent = &d->udev->dev; dev->device_name = d->name; usb_make_path(d->udev, d->rc_phys, sizeof(d->rc_phys)); strlcat(d->rc_phys, "/ir0", sizeof(d->rc_phys)); dev->input_phys = d->rc_phys; usb_to_input_id(d->udev, &dev->input_id); dev->driver_name = d->props->driver_name; dev->map_name = d->rc.map_name; dev->allowed_protocols = d->rc.allowed_protos; dev->change_protocol = d->rc.change_protocol; dev->timeout = d->rc.timeout; dev->priv = d; ret = rc_register_device(dev); if (ret < 0) { rc_free_device(dev); goto err; } d->rc_dev = dev; /* start polling if needed */ if (d->rc.query && !d->rc.bulk_mode) { /* initialize a work queue for handling polling */ INIT_DELAYED_WORK(&d->rc_query_work, dvb_usb_read_remote_control); dev_info(&d->udev->dev, "%s: schedule remote query interval to %d msecs\n", KBUILD_MODNAME, d->rc.interval); schedule_delayed_work(&d->rc_query_work, msecs_to_jiffies(d->rc.interval)); d->rc_polling_active = true; } return 0; err: dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usbv2_remote_exit(struct dvb_usb_device *d) { dev_dbg(&d->udev->dev, "%s:\n", __func__); if (d->rc_dev) { cancel_delayed_work_sync(&d->rc_query_work); rc_unregister_device(d->rc_dev); d->rc_dev = NULL; } return 0; } #else #define dvb_usbv2_remote_init(args...) 0 #define dvb_usbv2_remote_exit(args...) #endif static void dvb_usb_data_complete(struct usb_data_stream *stream, u8 *buf, size_t len) { struct dvb_usb_adapter *adap = stream->user_priv; dvb_dmx_swfilter(&adap->demux, buf, len); } static void dvb_usb_data_complete_204(struct usb_data_stream *stream, u8 *buf, size_t len) { struct dvb_usb_adapter *adap = stream->user_priv; dvb_dmx_swfilter_204(&adap->demux, buf, len); } static void dvb_usb_data_complete_raw(struct usb_data_stream *stream, u8 *buf, size_t len) { struct dvb_usb_adapter *adap = stream->user_priv; dvb_dmx_swfilter_raw(&adap->demux, buf, len); } static int dvb_usbv2_adapter_stream_init(struct dvb_usb_adapter *adap) { dev_dbg(&adap_to_d(adap)->udev->dev, "%s: adap=%d\n", __func__, adap->id); adap->stream.udev = adap_to_d(adap)->udev; adap->stream.user_priv = adap; adap->stream.complete = dvb_usb_data_complete; return usb_urb_initv2(&adap->stream, &adap->props->stream); } static int dvb_usbv2_adapter_stream_exit(struct dvb_usb_adapter *adap) { dev_dbg(&adap_to_d(adap)->udev->dev, "%s: adap=%d\n", __func__, adap->id); return usb_urb_exitv2(&adap->stream); } static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed) { struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv; struct dvb_usb_device *d = adap_to_d(adap); int ret = 0; struct usb_data_stream_properties stream_props; dev_dbg(&d->udev->dev, "%s: adap=%d active_fe=%d feed_type=%d setting pid [%s]: %04x (%04d) at index %d\n", __func__, adap->id, adap->active_fe, dvbdmxfeed->type, adap->pid_filtering ? "yes" : "no", dvbdmxfeed->pid, dvbdmxfeed->pid, dvbdmxfeed->index); /* wait init is done */ wait_on_bit(&adap->state_bits, ADAP_INIT, TASK_UNINTERRUPTIBLE); if (adap->active_fe == -1) return -EINVAL; /* skip feed setup if we are already feeding */ if (adap->feed_count++ > 0) goto skip_feed_start; /* set 'streaming' status bit */ set_bit(ADAP_STREAMING, &adap->state_bits); /* resolve input and output streaming parameters */ if (d->props->get_stream_config) { memcpy(&stream_props, &adap->props->stream, sizeof(struct usb_data_stream_properties)); ret = d->props->get_stream_config(adap->fe[adap->active_fe], &adap->ts_type, &stream_props); if (ret) dev_err(&d->udev->dev, "%s: get_stream_config() failed=%d\n", KBUILD_MODNAME, ret); } else { stream_props = adap->props->stream; } switch (adap->ts_type) { case DVB_USB_FE_TS_TYPE_204: adap->stream.complete = dvb_usb_data_complete_204; break; case DVB_USB_FE_TS_TYPE_RAW: adap->stream.complete = dvb_usb_data_complete_raw; break; case DVB_USB_FE_TS_TYPE_188: default: adap->stream.complete = dvb_usb_data_complete; break; } /* submit USB streaming packets */ usb_urb_submitv2(&adap->stream, &stream_props); /* enable HW PID filter */ if (adap->pid_filtering && adap->props->pid_filter_ctrl) { ret = adap->props->pid_filter_ctrl(adap, 1); if (ret) dev_err(&d->udev->dev, "%s: pid_filter_ctrl() failed=%d\n", KBUILD_MODNAME, ret); } /* ask device to start streaming */ if (d->props->streaming_ctrl) { ret = d->props->streaming_ctrl(adap->fe[adap->active_fe], 1); if (ret) dev_err(&d->udev->dev, "%s: streaming_ctrl() failed=%d\n", KBUILD_MODNAME, ret); } skip_feed_start: /* add PID to device HW PID filter */ if (adap->pid_filtering && adap->props->pid_filter) { ret = adap->props->pid_filter(adap, dvbdmxfeed->index, dvbdmxfeed->pid, 1); if (ret) dev_err(&d->udev->dev, "%s: pid_filter() failed=%d\n", KBUILD_MODNAME, ret); } if (ret) dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usb_stop_feed(struct dvb_demux_feed *dvbdmxfeed) { struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv; struct dvb_usb_device *d = adap_to_d(adap); int ret = 0; dev_dbg(&d->udev->dev, "%s: adap=%d active_fe=%d feed_type=%d setting pid [%s]: %04x (%04d) at index %d\n", __func__, adap->id, adap->active_fe, dvbdmxfeed->type, adap->pid_filtering ? "yes" : "no", dvbdmxfeed->pid, dvbdmxfeed->pid, dvbdmxfeed->index); if (adap->active_fe == -1) return -EINVAL; /* remove PID from device HW PID filter */ if (adap->pid_filtering && adap->props->pid_filter) { ret = adap->props->pid_filter(adap, dvbdmxfeed->index, dvbdmxfeed->pid, 0); if (ret) dev_err(&d->udev->dev, "%s: pid_filter() failed=%d\n", KBUILD_MODNAME, ret); } /* we cannot stop streaming until last PID is removed */ if (--adap->feed_count > 0) goto skip_feed_stop; /* ask device to stop streaming */ if (d->props->streaming_ctrl) { ret = d->props->streaming_ctrl(adap->fe[adap->active_fe], 0); if (ret) dev_err(&d->udev->dev, "%s: streaming_ctrl() failed=%d\n", KBUILD_MODNAME, ret); } /* disable HW PID filter */ if (adap->pid_filtering && adap->props->pid_filter_ctrl) { ret = adap->props->pid_filter_ctrl(adap, 0); if (ret) dev_err(&d->udev->dev, "%s: pid_filter_ctrl() failed=%d\n", KBUILD_MODNAME, ret); } /* kill USB streaming packets */ usb_urb_killv2(&adap->stream); /* clear 'streaming' status bit */ clear_bit(ADAP_STREAMING, &adap->state_bits); smp_mb__after_atomic(); wake_up_bit(&adap->state_bits, ADAP_STREAMING); skip_feed_stop: if (ret) dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usbv2_media_device_init(struct dvb_usb_adapter *adap) { #ifdef CONFIG_MEDIA_CONTROLLER_DVB struct media_device *mdev; struct dvb_usb_device *d = adap_to_d(adap); struct usb_device *udev = d->udev; mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return -ENOMEM; media_device_usb_init(mdev, udev, d->name); dvb_register_media_controller(&adap->dvb_adap, mdev); dev_info(&d->udev->dev, "media controller created\n"); #endif return 0; } static int dvb_usbv2_media_device_register(struct dvb_usb_adapter *adap) { #ifdef CONFIG_MEDIA_CONTROLLER_DVB return media_device_register(adap->dvb_adap.mdev); #else return 0; #endif } static void dvb_usbv2_media_device_unregister(struct dvb_usb_adapter *adap) { #ifdef CONFIG_MEDIA_CONTROLLER_DVB if (!adap->dvb_adap.mdev) return; media_device_unregister(adap->dvb_adap.mdev); media_device_cleanup(adap->dvb_adap.mdev); kfree(adap->dvb_adap.mdev); adap->dvb_adap.mdev = NULL; #endif } static int dvb_usbv2_adapter_dvb_init(struct dvb_usb_adapter *adap) { int ret; struct dvb_usb_device *d = adap_to_d(adap); dev_dbg(&d->udev->dev, "%s: adap=%d\n", __func__, adap->id); ret = dvb_register_adapter(&adap->dvb_adap, d->name, d->props->owner, &d->udev->dev, d->props->adapter_nr); if (ret < 0) { dev_dbg(&d->udev->dev, "%s: dvb_register_adapter() failed=%d\n", __func__, ret); goto err_dvb_register_adapter; } adap->dvb_adap.priv = adap; ret = dvb_usbv2_media_device_init(adap); if (ret < 0) { dev_dbg(&d->udev->dev, "%s: dvb_usbv2_media_device_init() failed=%d\n", __func__, ret); goto err_dvb_register_mc; } if (d->props->read_mac_address) { ret = d->props->read_mac_address(adap, adap->dvb_adap.proposed_mac); if (ret < 0) goto err_dvb_dmx_init; dev_info(&d->udev->dev, "%s: MAC address: %pM\n", KBUILD_MODNAME, adap->dvb_adap.proposed_mac); } adap->demux.dmx.capabilities = DMX_TS_FILTERING | DMX_SECTION_FILTERING; adap->demux.priv = adap; adap->demux.filternum = 0; adap->demux.filternum = adap->max_feed_count; adap->demux.feednum = adap->demux.filternum; adap->demux.start_feed = dvb_usb_start_feed; adap->demux.stop_feed = dvb_usb_stop_feed; adap->demux.write_to_decoder = NULL; ret = dvb_dmx_init(&adap->demux); if (ret < 0) { dev_err(&d->udev->dev, "%s: dvb_dmx_init() failed=%d\n", KBUILD_MODNAME, ret); goto err_dvb_dmx_init; } adap->dmxdev.filternum = adap->demux.filternum; adap->dmxdev.demux = &adap->demux.dmx; adap->dmxdev.capabilities = 0; ret = dvb_dmxdev_init(&adap->dmxdev, &adap->dvb_adap); if (ret < 0) { dev_err(&d->udev->dev, "%s: dvb_dmxdev_init() failed=%d\n", KBUILD_MODNAME, ret); goto err_dvb_dmxdev_init; } ret = dvb_net_init(&adap->dvb_adap, &adap->dvb_net, &adap->demux.dmx); if (ret < 0) { dev_err(&d->udev->dev, "%s: dvb_net_init() failed=%d\n", KBUILD_MODNAME, ret); goto err_dvb_net_init; } return 0; err_dvb_net_init: dvb_dmxdev_release(&adap->dmxdev); err_dvb_dmxdev_init: dvb_dmx_release(&adap->demux); err_dvb_dmx_init: dvb_usbv2_media_device_unregister(adap); err_dvb_register_mc: dvb_unregister_adapter(&adap->dvb_adap); err_dvb_register_adapter: adap->dvb_adap.priv = NULL; return ret; } static int dvb_usbv2_adapter_dvb_exit(struct dvb_usb_adapter *adap) { dev_dbg(&adap_to_d(adap)->udev->dev, "%s: adap=%d\n", __func__, adap->id); if (adap->dvb_adap.priv) { dvb_net_release(&adap->dvb_net); adap->demux.dmx.close(&adap->demux.dmx); dvb_dmxdev_release(&adap->dmxdev); dvb_dmx_release(&adap->demux); dvb_unregister_adapter(&adap->dvb_adap); } return 0; } static int dvb_usbv2_device_power_ctrl(struct dvb_usb_device *d, int onoff) { int ret; if (onoff) d->powered++; else d->powered--; if (d->powered == 0 || (onoff && d->powered == 1)) { /* when switching from 1 to 0 or from 0 to 1 */ dev_dbg(&d->udev->dev, "%s: power=%d\n", __func__, onoff); if (d->props->power_ctrl) { ret = d->props->power_ctrl(d, onoff); if (ret < 0) goto err; } } return 0; err: dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usb_fe_init(struct dvb_frontend *fe) { int ret; struct dvb_usb_adapter *adap = fe->dvb->priv; struct dvb_usb_device *d = adap_to_d(adap); dev_dbg(&d->udev->dev, "%s: adap=%d fe=%d\n", __func__, adap->id, fe->id); if (!adap->suspend_resume_active) { adap->active_fe = fe->id; set_bit(ADAP_INIT, &adap->state_bits); } ret = dvb_usbv2_device_power_ctrl(d, 1); if (ret < 0) goto err; if (d->props->frontend_ctrl) { ret = d->props->frontend_ctrl(fe, 1); if (ret < 0) goto err; } if (adap->fe_init[fe->id]) { ret = adap->fe_init[fe->id](fe); if (ret < 0) goto err; } err: if (!adap->suspend_resume_active) { clear_bit(ADAP_INIT, &adap->state_bits); smp_mb__after_atomic(); wake_up_bit(&adap->state_bits, ADAP_INIT); } dev_dbg(&d->udev->dev, "%s: ret=%d\n", __func__, ret); return ret; } static int dvb_usb_fe_sleep(struct dvb_frontend *fe) { int ret; struct dvb_usb_adapter *adap = fe->dvb->priv; struct dvb_usb_device *d = adap_to_d(adap); dev_dbg(&d->udev->dev, "%s: adap=%d fe=%d\n", __func__, adap->id, fe->id); if (!adap->suspend_resume_active) { set_bit(ADAP_SLEEP, &adap->state_bits); wait_on_bit(&adap->state_bits, ADAP_STREAMING, TASK_UNINTERRUPTIBLE); } if (adap->fe_sleep[fe->id]) { ret = adap->fe_sleep[fe->id](fe); if (ret < 0) goto err; } if (d->props->frontend_ctrl) { ret = d->props->frontend_ctrl(fe, 0); if (ret < 0) goto err; } ret = dvb_usbv2_device_power_ctrl(d, 0); err: if (!adap->suspend_resume_active) { adap->active_fe = -1; clear_bit(ADAP_SLEEP, &adap->state_bits); smp_mb__after_atomic(); wake_up_bit(&adap->state_bits, ADAP_SLEEP); } dev_dbg(&d->udev->dev, "%s: ret=%d\n", __func__, ret); return ret; } static int dvb_usbv2_adapter_frontend_init(struct dvb_usb_adapter *adap) { int ret, i, count_registered = 0; struct dvb_usb_device *d = adap_to_d(adap); dev_dbg(&d->udev->dev, "%s: adap=%d\n", __func__, adap->id); memset(adap->fe, 0, sizeof(adap->fe)); adap->active_fe = -1; if (d->props->frontend_attach) { ret = d->props->frontend_attach(adap); if (ret < 0) { dev_dbg(&d->udev->dev, "%s: frontend_attach() failed=%d\n", __func__, ret); goto err_dvb_frontend_detach; } } else { dev_dbg(&d->udev->dev, "%s: frontend_attach() do not exists\n", __func__); ret = 0; goto err; } for (i = 0; i < MAX_NO_OF_FE_PER_ADAP && adap->fe[i]; i++) { adap->fe[i]->id = i; /* re-assign sleep and wakeup functions */ adap->fe_init[i] = adap->fe[i]->ops.init; adap->fe[i]->ops.init = dvb_usb_fe_init; adap->fe_sleep[i] = adap->fe[i]->ops.sleep; adap->fe[i]->ops.sleep = dvb_usb_fe_sleep; ret = dvb_register_frontend(&adap->dvb_adap, adap->fe[i]); if (ret < 0) { dev_err(&d->udev->dev, "%s: frontend%d registration failed\n", KBUILD_MODNAME, i); goto err_dvb_unregister_frontend; } count_registered++; } if (d->props->tuner_attach) { ret = d->props->tuner_attach(adap); if (ret < 0) { dev_dbg(&d->udev->dev, "%s: tuner_attach() failed=%d\n", __func__, ret); goto err_dvb_unregister_frontend; } } ret = dvb_create_media_graph(&adap->dvb_adap, true); if (ret < 0) goto err_dvb_unregister_frontend; ret = dvb_usbv2_media_device_register(adap); return ret; err_dvb_unregister_frontend: for (i = count_registered - 1; i >= 0; i--) dvb_unregister_frontend(adap->fe[i]); err_dvb_frontend_detach: for (i = MAX_NO_OF_FE_PER_ADAP - 1; i >= 0; i--) { if (adap->fe[i]) { dvb_frontend_detach(adap->fe[i]); adap->fe[i] = NULL; } } err: dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usbv2_adapter_frontend_exit(struct dvb_usb_adapter *adap) { int ret, i; struct dvb_usb_device *d = adap_to_d(adap); dev_dbg(&d->udev->dev, "%s: adap=%d\n", __func__, adap->id); for (i = MAX_NO_OF_FE_PER_ADAP - 1; i >= 0; i--) { if (adap->fe[i]) { dvb_unregister_frontend(adap->fe[i]); dvb_frontend_detach(adap->fe[i]); } } if (d->props->tuner_detach) { ret = d->props->tuner_detach(adap); if (ret < 0) { dev_dbg(&d->udev->dev, "%s: tuner_detach() failed=%d\n", __func__, ret); } } if (d->props->frontend_detach) { ret = d->props->frontend_detach(adap); if (ret < 0) { dev_dbg(&d->udev->dev, "%s: frontend_detach() failed=%d\n", __func__, ret); } } return 0; } static int dvb_usbv2_adapter_init(struct dvb_usb_device *d) { struct dvb_usb_adapter *adap; int ret, i, adapter_count; /* resolve adapter count */ adapter_count = d->props->num_adapters; if (d->props->get_adapter_count) { ret = d->props->get_adapter_count(d); if (ret < 0) goto err; adapter_count = ret; } for (i = 0; i < adapter_count; i++) { adap = &d->adapter[i]; adap->id = i; adap->props = &d->props->adapter[i]; /* speed - when running at FULL speed we need a HW PID filter */ if (d->udev->speed == USB_SPEED_FULL && !(adap->props->caps & DVB_USB_ADAP_HAS_PID_FILTER)) { dev_err(&d->udev->dev, "%s: this USB2.0 device cannot be run on a USB1.1 port (it lacks a hardware PID filter)\n", KBUILD_MODNAME); ret = -ENODEV; goto err; } else if ((d->udev->speed == USB_SPEED_FULL && adap->props->caps & DVB_USB_ADAP_HAS_PID_FILTER) || (adap->props->caps & DVB_USB_ADAP_NEED_PID_FILTERING)) { dev_info(&d->udev->dev, "%s: will use the device's hardware PID filter (table count: %d)\n", KBUILD_MODNAME, adap->props->pid_filter_count); adap->pid_filtering = 1; adap->max_feed_count = adap->props->pid_filter_count; } else { dev_info(&d->udev->dev, "%s: will pass the complete MPEG2 transport stream to the software demuxer\n", KBUILD_MODNAME); adap->pid_filtering = 0; adap->max_feed_count = 255; } if (!adap->pid_filtering && dvb_usb_force_pid_filter_usage && adap->props->caps & DVB_USB_ADAP_HAS_PID_FILTER) { dev_info(&d->udev->dev, "%s: PID filter enabled by module option\n", KBUILD_MODNAME); adap->pid_filtering = 1; adap->max_feed_count = adap->props->pid_filter_count; } ret = dvb_usbv2_adapter_stream_init(adap); if (ret) goto err; ret = dvb_usbv2_adapter_dvb_init(adap); if (ret) goto err; ret = dvb_usbv2_adapter_frontend_init(adap); if (ret) goto err; /* use exclusive FE lock if there is multiple shared FEs */ if (adap->fe[1]) adap->dvb_adap.mfe_shared = 1; } return 0; err: dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static int dvb_usbv2_adapter_exit(struct dvb_usb_device *d) { int i; dev_dbg(&d->udev->dev, "%s:\n", __func__); for (i = MAX_NO_OF_ADAPTER_PER_DEVICE - 1; i >= 0; i--) { if (d->adapter[i].props) { dvb_usbv2_adapter_dvb_exit(&d->adapter[i]); dvb_usbv2_adapter_stream_exit(&d->adapter[i]); dvb_usbv2_adapter_frontend_exit(&d->adapter[i]); dvb_usbv2_media_device_unregister(&d->adapter[i]); } } return 0; } /* general initialization functions */ static int dvb_usbv2_exit(struct dvb_usb_device *d) { dev_dbg(&d->udev->dev, "%s:\n", __func__); dvb_usbv2_remote_exit(d); dvb_usbv2_adapter_exit(d); dvb_usbv2_i2c_exit(d); return 0; } static int dvb_usbv2_init(struct dvb_usb_device *d) { int ret; dev_dbg(&d->udev->dev, "%s:\n", __func__); dvb_usbv2_device_power_ctrl(d, 1); if (d->props->read_config) { ret = d->props->read_config(d); if (ret < 0) goto err; } ret = dvb_usbv2_i2c_init(d); if (ret < 0) goto err; ret = dvb_usbv2_adapter_init(d); if (ret < 0) goto err; if (d->props->init) { ret = d->props->init(d); if (ret < 0) goto err; } ret = dvb_usbv2_remote_init(d); if (ret < 0) goto err; dvb_usbv2_device_power_ctrl(d, 0); return 0; err: dvb_usbv2_device_power_ctrl(d, 0); dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } int dvb_usbv2_probe(struct usb_interface *intf, const struct usb_device_id *id) { int ret; struct dvb_usb_device *d; struct usb_device *udev = interface_to_usbdev(intf); struct dvb_usb_driver_info *driver_info = (struct dvb_usb_driver_info *) id->driver_info; dev_dbg(&udev->dev, "%s: bInterfaceNumber=%d\n", __func__, intf->cur_altsetting->desc.bInterfaceNumber); if (!id->driver_info) { dev_err(&udev->dev, "%s: driver_info failed\n", KBUILD_MODNAME); ret = -ENODEV; goto err; } d = kzalloc(sizeof(struct dvb_usb_device), GFP_KERNEL); if (!d) { dev_err(&udev->dev, "%s: kzalloc() failed\n", KBUILD_MODNAME); ret = -ENOMEM; goto err; } d->intf = intf; d->name = driver_info->name; d->rc_map = driver_info->rc_map; d->udev = udev; d->props = driver_info->props; if (intf->cur_altsetting->desc.bInterfaceNumber != d->props->bInterfaceNumber) { ret = -ENODEV; goto err_kfree_d; } mutex_init(&d->usb_mutex); mutex_init(&d->i2c_mutex); if (d->props->size_of_priv) { d->priv = kzalloc(d->props->size_of_priv, GFP_KERNEL); if (!d->priv) { dev_err(&d->udev->dev, "%s: kzalloc() failed\n", KBUILD_MODNAME); ret = -ENOMEM; goto err_kfree_d; } } if (d->props->probe) { ret = d->props->probe(d); if (ret) goto err_kfree_priv; } if (d->props->identify_state) { const char *name = NULL; ret = d->props->identify_state(d, &name); if (ret == COLD) { dev_info(&d->udev->dev, "%s: found a '%s' in cold state\n", KBUILD_MODNAME, d->name); if (!name) name = d->props->firmware; ret = dvb_usbv2_download_firmware(d, name); if (ret == 0) { /* device is warm, continue initialization */ ; } else if (ret == RECONNECTS_USB) { /* * USB core will call disconnect() and then * probe() as device reconnects itself from the * USB bus. disconnect() will release all driver * resources and probe() is called for 'new' * device. As 'new' device is warm we should * never go here again. */ goto exit; } else { goto err_free_all; } } else if (ret != WARM) { goto err_free_all; } } dev_info(&d->udev->dev, "%s: found a '%s' in warm state\n", KBUILD_MODNAME, d->name); ret = dvb_usbv2_init(d); if (ret < 0) goto err_free_all; dev_info(&d->udev->dev, "%s: '%s' successfully initialized and connected\n", KBUILD_MODNAME, d->name); exit: usb_set_intfdata(intf, d); return 0; err_free_all: dvb_usbv2_exit(d); if (d->props->disconnect) d->props->disconnect(d); err_kfree_priv: kfree(d->priv); err_kfree_d: kfree(d); err: dev_dbg(&udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } EXPORT_SYMBOL(dvb_usbv2_probe); void dvb_usbv2_disconnect(struct usb_interface *intf) { struct dvb_usb_device *d = usb_get_intfdata(intf); const char *devname = kstrdup(dev_name(&d->udev->dev), GFP_KERNEL); const char *drvname = d->name; dev_dbg(&d->udev->dev, "%s: bInterfaceNumber=%d\n", __func__, intf->cur_altsetting->desc.bInterfaceNumber); if (d->props->exit) d->props->exit(d); dvb_usbv2_exit(d); if (d->props->disconnect) d->props->disconnect(d); kfree(d->priv); kfree(d); pr_info("%s: '%s:%s' successfully deinitialized and disconnected\n", KBUILD_MODNAME, drvname, devname); kfree(devname); } EXPORT_SYMBOL(dvb_usbv2_disconnect); int dvb_usbv2_suspend(struct usb_interface *intf, pm_message_t msg) { struct dvb_usb_device *d = usb_get_intfdata(intf); int ret = 0, i, active_fe; struct dvb_frontend *fe; dev_dbg(&d->udev->dev, "%s:\n", __func__); /* stop remote controller poll */ if (d->rc_polling_active) cancel_delayed_work_sync(&d->rc_query_work); for (i = MAX_NO_OF_ADAPTER_PER_DEVICE - 1; i >= 0; i--) { active_fe = d->adapter[i].active_fe; if (d->adapter[i].dvb_adap.priv && active_fe != -1) { fe = d->adapter[i].fe[active_fe]; d->adapter[i].suspend_resume_active = true; if (d->props->streaming_ctrl) d->props->streaming_ctrl(fe, 0); /* stop usb streaming */ usb_urb_killv2(&d->adapter[i].stream); ret = dvb_frontend_suspend(fe); } } return ret; } EXPORT_SYMBOL(dvb_usbv2_suspend); static int dvb_usbv2_resume_common(struct dvb_usb_device *d) { int ret = 0, i, active_fe; struct dvb_frontend *fe; dev_dbg(&d->udev->dev, "%s:\n", __func__); for (i = 0; i < MAX_NO_OF_ADAPTER_PER_DEVICE; i++) { active_fe = d->adapter[i].active_fe; if (d->adapter[i].dvb_adap.priv && active_fe != -1) { fe = d->adapter[i].fe[active_fe]; ret = dvb_frontend_resume(fe); /* resume usb streaming */ usb_urb_submitv2(&d->adapter[i].stream, NULL); if (d->props->streaming_ctrl) d->props->streaming_ctrl(fe, 1); d->adapter[i].suspend_resume_active = false; } } /* start remote controller poll */ if (d->rc_polling_active) schedule_delayed_work(&d->rc_query_work, msecs_to_jiffies(d->rc.interval)); return ret; } int dvb_usbv2_resume(struct usb_interface *intf) { struct dvb_usb_device *d = usb_get_intfdata(intf); dev_dbg(&d->udev->dev, "%s:\n", __func__); return dvb_usbv2_resume_common(d); } EXPORT_SYMBOL(dvb_usbv2_resume); int dvb_usbv2_reset_resume(struct usb_interface *intf) { struct dvb_usb_device *d = usb_get_intfdata(intf); int ret; dev_dbg(&d->udev->dev, "%s:\n", __func__); dvb_usbv2_device_power_ctrl(d, 1); if (d->props->init) d->props->init(d); ret = dvb_usbv2_resume_common(d); dvb_usbv2_device_power_ctrl(d, 0); return ret; } EXPORT_SYMBOL(dvb_usbv2_reset_resume); MODULE_VERSION("2.0"); MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>"); MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>"); MODULE_DESCRIPTION("DVB USB common"); MODULE_LICENSE("GPL"); |
81 80 11 78 16 73 53 52 38 15 43 10 53 8 46 74 1 1 83 4 81 4 78 81 5 76 76 5 5 13 1 12 1 1 6 2 9 1 1 1 1 4 1 104 1 53 1480 1471 105 77 1472 28 258 35 1407 221 71 14 15 1 2 8 1 2 50 4 20 4 2 20 2 20 36 1 4 5 2 3 2 1 1 1 35 1 34 32 1 2 1 1 4 1 2 1 13 6 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET 802.1Q VLAN * Ethernet-type device handling. * * Authors: Ben Greear <greearb@candelatech.com> * Please send support related email to: netdev@vger.kernel.org * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html * * Fixes: * Fix for packet capture - Nick Eggleston <nick@dccinc.com>; * Add HW acceleration hooks - David S. Miller <davem@redhat.com>; * Correct all the locking - David S. Miller <davem@redhat.com>; * Use hash table for VLAN groups - David S. Miller <davem@redhat.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/capability.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> #include <net/p8022.h> #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/uaccess.h> #include <linux/if_vlan.h> #include "vlan.h" #include "vlanproc.h" #define DRV_VERSION "1.8" /* Global VLAN variables */ unsigned int vlan_net_id __read_mostly; const char vlan_fullname[] = "802.1Q VLAN Support"; const char vlan_version[] = DRV_VERSION; /* End of global variables definitions. */ static int vlan_group_prealloc_vid(struct vlan_group *vg, __be16 vlan_proto, u16 vlan_id) { struct net_device **array; unsigned int vidx; unsigned int size; int pidx; ASSERT_RTNL(); pidx = vlan_proto_idx(vlan_proto); if (pidx < 0) return -EINVAL; vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN; array = vg->vlan_devices_arrays[pidx][vidx]; if (array != NULL) return 0; size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; array = kzalloc(size, GFP_KERNEL_ACCOUNT); if (array == NULL) return -ENOBUFS; /* paired with smp_rmb() in __vlan_group_get_device() */ smp_wmb(); vg->vlan_devices_arrays[pidx][vidx] = array; return 0; } static void vlan_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev, struct vlan_dev_priv *vlan) { if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING)) netif_stacked_transfer_operstate(rootdev, dev); } void unregister_vlan_dev(struct net_device *dev, struct list_head *head) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; struct vlan_info *vlan_info; struct vlan_group *grp; u16 vlan_id = vlan->vlan_id; ASSERT_RTNL(); vlan_info = rtnl_dereference(real_dev->vlan_info); BUG_ON(!vlan_info); grp = &vlan_info->grp; grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_MVRP) vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL); netdev_upper_dev_unlink(real_dev, dev); /* Because unregister_netdevice_queue() makes sure at least one rcu * grace period is respected before device freeing, * we dont need to call synchronize_net() here. */ unregister_netdevice_queue(dev, head); if (grp->nr_vlan_devs == 0) { vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); } vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); } int vlan_check_real_dev(struct net_device *real_dev, __be16 protocol, u16 vlan_id, struct netlink_ext_ack *extack) { const char *name = real_dev->name; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { pr_info("VLANs not supported on %s\n", name); NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device"); return -EOPNOTSUPP; } if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL) { NL_SET_ERR_MSG_MOD(extack, "VLAN device already exists"); return -EEXIST; } return 0; } int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; u16 vlan_id = vlan->vlan_id; struct vlan_info *vlan_info; struct vlan_group *grp; int err; err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id); if (err) return err; vlan_info = rtnl_dereference(real_dev->vlan_info); /* vlan_info should be there now. vlan_vid_add took care of it */ BUG_ON(!vlan_info); grp = &vlan_info->grp; if (grp->nr_vlan_devs == 0) { err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; err = vlan_mvrp_init_applicant(real_dev); if (err < 0) goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id); if (err < 0) goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; err = netdev_upper_dev_link(real_dev, dev, extack); if (err) goto out_unregister_netdev; vlan_stacked_transfer_operstate(real_dev, dev, vlan); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ /* So, got the sucker initialized, now lets place * it into our local structure. */ vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; return 0; out_unregister_netdev: unregister_netdevice(dev); out_uninit_mvrp: if (grp->nr_vlan_devs == 0) vlan_mvrp_uninit_applicant(real_dev); out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); return err; } /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns 0 if the device was created or a negative error code otherwise. */ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) { struct net_device *new_dev; struct vlan_dev_priv *vlan; struct net *net = dev_net(real_dev); struct vlan_net *vn = net_generic(net, vlan_net_id); char name[IFNAMSIZ]; int err; if (vlan_id >= VLAN_VID_MASK) return -ERANGE; err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id, NULL); if (err < 0) return err; /* Gotta set up the fields for the device. */ switch (vn->name_type) { case VLAN_NAME_TYPE_RAW_PLUS_VID: /* name will look like: eth1.0005 */ snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: vlan5 */ snprintf(name, IFNAMSIZ, "vlan%i", vlan_id); break; case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: eth0.5 */ snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID: /* Put our vlan.VID in the name. * Name will look like: vlan0005 */ default: snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, NET_NAME_UNKNOWN, vlan_setup); if (new_dev == NULL) return -ENOBUFS; dev_net_set(new_dev, net); /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. */ new_dev->mtu = real_dev->mtu; vlan = vlan_dev_priv(new_dev); vlan->vlan_proto = htons(ETH_P_8021Q); vlan->vlan_id = vlan_id; vlan->real_dev = real_dev; vlan->dent = NULL; vlan->flags = VLAN_FLAG_REORDER_HDR; new_dev->rtnl_link_ops = &vlan_link_ops; err = register_vlan_dev(new_dev, NULL); if (err < 0) goto out_free_newdev; return 0; out_free_newdev: free_netdev(new_dev); return err; } static void vlan_sync_address(struct net_device *dev, struct net_device *vlandev) { struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); /* May be called without an actual change */ if (ether_addr_equal(vlan->real_dev_addr, dev->dev_addr)) return; /* vlan continues to inherit address of lower device */ if (vlan_dev_inherit_address(vlandev, dev)) goto out; /* vlan address was different from the old address and is equal to * the new address */ if (!ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) && ether_addr_equal(vlandev->dev_addr, dev->dev_addr)) dev_uc_del(dev, vlandev->dev_addr); /* vlan address was equal to the old address and is different from * the new address */ if (ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) && !ether_addr_equal(vlandev->dev_addr, dev->dev_addr)) dev_uc_add(dev, vlandev->dev_addr); out: ether_addr_copy(vlan->real_dev_addr, dev->dev_addr); } static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); netif_inherit_tso_max(vlandev, dev); if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; #if IS_ENABLED(CONFIG_FCOE) vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE; vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE); vlandev->hw_enc_features = vlan_tnl_features(vlan->real_dev); netdev_update_features(vlandev); } static int __vlan_device_event(struct net_device *dev, unsigned long event) { int err = 0; switch (event) { case NETDEV_CHANGENAME: vlan_proc_rem_dev(dev); err = vlan_proc_add_dev(dev); break; case NETDEV_REGISTER: err = vlan_proc_add_dev(dev); break; case NETDEV_UNREGISTER: vlan_proc_rem_dev(dev); break; } return err; } static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; int i, flgs; struct net_device *vlandev; struct vlan_dev_priv *vlan; bool last = false; LIST_HEAD(list); int err; if (is_vlan_dev(dev)) { int err = __vlan_device_event(dev, event); if (err) return notifier_from_errno(err); } if ((event == NETDEV_UP) && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } if (event == NETDEV_DOWN && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) vlan_vid_del(dev, htons(ETH_P_8021Q), 0); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) goto out; grp = &vlan_info->grp; /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ switch (event) { case NETDEV_CHANGE: /* Propagate real device state to vlan devices */ vlan_group_for_each_dev(grp, i, vlandev) vlan_stacked_transfer_operstate(dev, vlandev, vlan_dev_priv(vlandev)); break; case NETDEV_CHANGEADDR: /* Adjust unicast filters on underlying device */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; vlan_sync_address(dev, vlandev); } break; case NETDEV_CHANGEMTU: vlan_group_for_each_dev(grp, i, vlandev) { if (vlandev->mtu <= dev->mtu) continue; dev_set_mtu(vlandev, dev->mtu); } break; case NETDEV_FEAT_CHANGE: /* Propagate device features to underlying device */ vlan_group_for_each_dev(grp, i, vlandev) vlan_transfer_features(dev, vlandev); break; case NETDEV_DOWN: { struct net_device *tmp; LIST_HEAD(close_list); /* Put all VLANs for this dev in the down state too. */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) list_add(&vlandev->close_list, &close_list); } dev_close_many(&close_list, false); list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { vlan_stacked_transfer_operstate(dev, vlandev, vlan_dev_priv(vlandev)); list_del_init(&vlandev->close_list); } list_del(&close_list); break; } case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = dev_get_flags(vlandev); if (flgs & IFF_UP) continue; vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) dev_change_flags(vlandev, flgs | IFF_UP, extack); vlan_stacked_transfer_operstate(dev, vlandev, vlan); } break; case NETDEV_UNREGISTER: /* twiddle thumbs on netns device moves */ if (dev->reg_state != NETREG_UNREGISTERING) break; vlan_group_for_each_dev(grp, i, vlandev) { /* removal of last vid destroys vlan_info, abort * afterwards */ if (vlan_info->nr_vids == 1) last = true; unregister_vlan_dev(vlandev, &list); if (last) break; } unregister_netdevice_many(&list); break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlaying device to change its type. */ if (vlan_uses_dev(dev)) return NOTIFY_BAD; break; case NETDEV_NOTIFY_PEERS: case NETDEV_BONDING_FAILOVER: case NETDEV_RESEND_IGMP: /* Propagate to vlan devices */ vlan_group_for_each_dev(grp, i, vlandev) call_netdevice_notifiers(event, vlandev); break; case NETDEV_CVLAN_FILTER_PUSH_INFO: err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021Q)); if (err) return notifier_from_errno(err); break; case NETDEV_CVLAN_FILTER_DROP_INFO: vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021Q)); break; case NETDEV_SVLAN_FILTER_PUSH_INFO: err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021AD)); if (err) return notifier_from_errno(err); break; case NETDEV_SVLAN_FILTER_DROP_INFO: vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021AD)); break; } out: return NOTIFY_DONE; } static struct notifier_block vlan_notifier_block __read_mostly = { .notifier_call = vlan_device_event, }; /* * VLAN IOCTL handler. * o execute requested action or pass command to the device driver * arg is really a struct vlan_ioctl_args __user *. */ static int vlan_ioctl_handler(struct net *net, void __user *arg) { int err; struct vlan_ioctl_args args; struct net_device *dev = NULL; if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args))) return -EFAULT; /* Null terminate this sucker, just in case. */ args.device1[sizeof(args.device1) - 1] = 0; args.u.device2[sizeof(args.u.device2) - 1] = 0; rtnl_lock(); switch (args.cmd) { case SET_VLAN_INGRESS_PRIORITY_CMD: case SET_VLAN_EGRESS_PRIORITY_CMD: case SET_VLAN_FLAG_CMD: case ADD_VLAN_CMD: case DEL_VLAN_CMD: case GET_VLAN_REALDEV_NAME_CMD: case GET_VLAN_VID_CMD: err = -ENODEV; dev = __dev_get_by_name(net, args.device1); if (!dev) goto out; err = -EINVAL; if (args.cmd != ADD_VLAN_CMD && !is_vlan_dev(dev)) goto out; } switch (args.cmd) { case SET_VLAN_INGRESS_PRIORITY_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; vlan_dev_set_ingress_priority(dev, args.u.skb_priority, args.vlan_qos); err = 0; break; case SET_VLAN_EGRESS_PRIORITY_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = vlan_dev_set_egress_priority(dev, args.u.skb_priority, args.vlan_qos); break; case SET_VLAN_FLAG_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = vlan_dev_change_flags(dev, args.vlan_qos ? args.u.flag : 0, args.u.flag); break; case SET_VLAN_NAME_TYPE_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { struct vlan_net *vn; vn = net_generic(net, vlan_net_id); vn->name_type = args.u.name_type; err = 0; } else { err = -EINVAL; } break; case ADD_VLAN_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = register_vlan_device(dev, args.u.VID); break; case DEL_VLAN_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; unregister_vlan_dev(dev, NULL); err = 0; break; case GET_VLAN_REALDEV_NAME_CMD: err = 0; vlan_dev_get_realdev_name(dev, args.u.device2, sizeof(args.u.device2)); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) err = -EFAULT; break; case GET_VLAN_VID_CMD: err = 0; args.u.VID = vlan_dev_vlan_id(dev); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) err = -EFAULT; break; default: err = -EOPNOTSUPP; break; } out: rtnl_unlock(); return err; } static int __net_init vlan_init_net(struct net *net) { struct vlan_net *vn = net_generic(net, vlan_net_id); int err; vn->name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD; err = vlan_proc_init(net); return err; } static void __net_exit vlan_exit_net(struct net *net) { vlan_proc_cleanup(net); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, .exit = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct vlan_net), }; static int __init vlan_proto_init(void) { int err; pr_info("%s v%s\n", vlan_fullname, vlan_version); err = register_pernet_subsys(&vlan_net_ops); if (err < 0) goto err0; err = register_netdevice_notifier(&vlan_notifier_block); if (err < 0) goto err2; err = vlan_gvrp_init(); if (err < 0) goto err3; err = vlan_mvrp_init(); if (err < 0) goto err4; err = vlan_netlink_init(); if (err < 0) goto err5; vlan_ioctl_set(vlan_ioctl_handler); return 0; err5: vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: unregister_netdevice_notifier(&vlan_notifier_block); err2: unregister_pernet_subsys(&vlan_net_ops); err0: return err; } static void __exit vlan_cleanup_module(void) { vlan_ioctl_set(NULL); vlan_netlink_fini(); unregister_netdevice_notifier(&vlan_notifier_block); unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ vlan_mvrp_uninit(); vlan_gvrp_uninit(); } module_init(vlan_proto_init); module_exit(vlan_cleanup_module); MODULE_DESCRIPTION("802.1Q/802.1ad VLAN Protocol"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); |
1 13 2 7 3 9 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. */ #ifndef BTRFS_INODE_H #define BTRFS_INODE_H #include <linux/hash.h> #include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/compiler.h> #include <linux/fscrypt.h> #include <linux/lockdep.h> #include <uapi/linux/btrfs_tree.h> #include <trace/events/btrfs.h> #include "block-rsv.h" #include "btrfs_inode.h" #include "extent_map.h" #include "extent_io.h" #include "extent-io-tree.h" #include "ordered-data.h" #include "delayed-inode.h" struct extent_state; struct posix_acl; struct iov_iter; struct writeback_control; struct btrfs_root; struct btrfs_fs_info; struct btrfs_trans_handle; /* * Since we search a directory based on f_pos (struct dir_context::pos) we have * to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so * everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()). */ #define BTRFS_DIR_START_INDEX 2 /* * ordered_data_close is set by truncate when a file that used * to have good data has been truncated to zero. When it is set * the btrfs file release call will add this inode to the * ordered operations list so that we make sure to flush out any * new data the application may have written before commit. */ enum { BTRFS_INODE_FLUSH_ON_CLOSE, BTRFS_INODE_DUMMY, BTRFS_INODE_IN_DEFRAG, BTRFS_INODE_HAS_ASYNC_EXTENT, /* * Always set under the VFS' inode lock, otherwise it can cause races * during fsync (we start as a fast fsync and then end up in a full * fsync racing with ordered extent completion). */ BTRFS_INODE_NEEDS_FULL_SYNC, BTRFS_INODE_COPY_EVERYTHING, BTRFS_INODE_HAS_PROPS, BTRFS_INODE_SNAPSHOT_FLUSH, /* * Set and used when logging an inode and it serves to signal that an * inode does not have xattrs, so subsequent fsyncs can avoid searching * for xattrs to log. This bit must be cleared whenever a xattr is added * to an inode. */ BTRFS_INODE_NO_XATTRS, /* * Set when we are in a context where we need to start a transaction and * have dirty pages with the respective file range locked. This is to * ensure that when reserving space for the transaction, if we are low * on available space and need to flush delalloc, we will not flush * delalloc for this inode, because that could result in a deadlock (on * the file range, inode's io_tree). */ BTRFS_INODE_NO_DELALLOC_FLUSH, /* * Set when we are working on enabling verity for a file. Computing and * writing the whole Merkle tree can take a while so we want to prevent * races where two separate tasks attempt to simultaneously start verity * on the same file. */ BTRFS_INODE_VERITY_IN_PROGRESS, /* Set when this inode is a free space inode. */ BTRFS_INODE_FREE_SPACE_INODE, /* Set when there are no capabilities in XATTs for the inode. */ BTRFS_INODE_NO_CAP_XATTR, }; /* in memory btrfs inode */ struct btrfs_inode { /* which subvolume this inode belongs to */ struct btrfs_root *root; /* key used to find this inode on disk. This is used by the code * to read in roots of subvolumes */ struct btrfs_key location; /* Cached value of inode property 'compression'. */ u8 prop_compress; /* * Force compression on the file using the defrag ioctl, could be * different from prop_compress and takes precedence if set. */ u8 defrag_compress; /* * Lock for counters and all fields used to determine if the inode is in * the log or not (last_trans, last_sub_trans, last_log_commit, * logged_trans), to access/update delalloc_bytes, new_delalloc_bytes, * defrag_bytes, disk_i_size, outstanding_extents, csum_bytes and to * update the VFS' inode number of bytes used. */ spinlock_t lock; /* the extent_tree has caches of all the extent mappings to disk */ struct extent_map_tree extent_tree; /* the io_tree does range state (DIRTY, LOCKED etc) */ struct extent_io_tree io_tree; /* * Keep track of where the inode has extent items mapped in order to * make sure the i_size adjustments are accurate. Not required when the * filesystem is NO_HOLES, the status can't be set while mounted as * it's a mkfs-time feature. */ struct extent_io_tree *file_extent_tree; /* held while logging the inode in tree-log.c */ struct mutex log_mutex; /* * Counters to keep track of the number of extent item's we may use due * to delalloc and such. outstanding_extents is the number of extent * items we think we'll end up using, and reserved_extents is the number * of extent items we've reserved metadata for. Protected by 'lock'. */ unsigned outstanding_extents; /* used to order data wrt metadata */ spinlock_t ordered_tree_lock; struct rb_root ordered_tree; struct rb_node *ordered_tree_last; /* list of all the delalloc inodes in the FS. There are times we need * to write all the delalloc pages to disk, and this list is used * to walk them all. */ struct list_head delalloc_inodes; /* node for the red-black tree that links inodes in subvolume root */ struct rb_node rb_node; unsigned long runtime_flags; /* full 64 bit generation number, struct vfs_inode doesn't have a big * enough field for this. */ u64 generation; /* * ID of the transaction handle that last modified this inode. * Protected by 'lock'. */ u64 last_trans; /* * ID of the transaction that last logged this inode. * Protected by 'lock'. */ u64 logged_trans; /* * Log transaction ID when this inode was last modified. * Protected by 'lock'. */ int last_sub_trans; /* A local copy of root's last_log_commit. Protected by 'lock'. */ int last_log_commit; union { /* * Total number of bytes pending delalloc, used by stat to * calculate the real block usage of the file. This is used * only for files. Protected by 'lock'. */ u64 delalloc_bytes; /* * The lowest possible index of the next dir index key which * points to an inode that needs to be logged. * This is used only for directories. * Use the helpers btrfs_get_first_dir_index_to_log() and * btrfs_set_first_dir_index_to_log() to access this field. */ u64 first_dir_index_to_log; }; union { /* * Total number of bytes pending delalloc that fall within a file * range that is either a hole or beyond EOF (and no prealloc extent * exists in the range). This is always <= delalloc_bytes and this * is used only for files. Protected by 'lock'. */ u64 new_delalloc_bytes; /* * The offset of the last dir index key that was logged. * This is used only for directories. */ u64 last_dir_index_offset; }; /* * Total number of bytes pending defrag, used by stat to check whether * it needs COW. Protected by 'lock'. */ u64 defrag_bytes; /* * The size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk * because not all the blocks are written yet. Protected by 'lock'. */ u64 disk_i_size; /* * If this is a directory then index_cnt is the counter for the index * number for new files that are created. For an empty directory, this * must be initialized to BTRFS_DIR_START_INDEX. */ u64 index_cnt; /* Cache the directory index number to speed the dir/file remove */ u64 dir_index; /* the fsync log has some corner cases that mean we have to check * directories to see if any unlinks have been done before * the directory was logged. See tree-log.c for all the * details */ u64 last_unlink_trans; /* * The id/generation of the last transaction where this inode was * either the source or the destination of a clone/dedupe operation. * Used when logging an inode to know if there are shared extents that * need special care when logging checksum items, to avoid duplicate * checksum items in a log (which can lead to a corruption where we end * up with missing checksum ranges after log replay). * Protected by the vfs inode lock. */ u64 last_reflink_trans; /* * Number of bytes outstanding that are going to need csums. This is * used in ENOSPC accounting. Protected by 'lock'. */ u64 csum_bytes; /* Backwards incompatible flags, lower half of inode_item::flags */ u32 flags; /* Read-only compatibility flags, upper half of inode_item::flags */ u32 ro_flags; struct btrfs_block_rsv block_rsv; struct btrfs_delayed_node *delayed_node; /* File creation time. */ u64 i_otime_sec; u32 i_otime_nsec; /* Hook into fs_info->delayed_iputs */ struct list_head delayed_iput; struct rw_semaphore i_mmap_lock; struct inode vfs_inode; }; static inline u64 btrfs_get_first_dir_index_to_log(const struct btrfs_inode *inode) { return READ_ONCE(inode->first_dir_index_to_log); } static inline void btrfs_set_first_dir_index_to_log(struct btrfs_inode *inode, u64 index) { WRITE_ONCE(inode->first_dir_index_to_log, index); } static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) { return container_of(inode, struct btrfs_inode, vfs_inode); } static inline unsigned long btrfs_inode_hash(u64 objectid, const struct btrfs_root *root) { u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME); #if BITS_PER_LONG == 32 h = (h >> 32) ^ (h & 0xffffffff); #endif return (unsigned long)h; } #if BITS_PER_LONG == 32 /* * On 32 bit systems the i_ino of struct inode is 32 bits (unsigned long), so * we use the inode's location objectid which is a u64 to avoid truncation. */ static inline u64 btrfs_ino(const struct btrfs_inode *inode) { u64 ino = inode->location.objectid; /* type == BTRFS_ROOT_ITEM_KEY: subvol dir */ if (inode->location.type == BTRFS_ROOT_ITEM_KEY) ino = inode->vfs_inode.i_ino; return ino; } #else static inline u64 btrfs_ino(const struct btrfs_inode *inode) { return inode->vfs_inode.i_ino; } #endif static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size) { i_size_write(&inode->vfs_inode, size); inode->disk_i_size = size; } static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode) { return test_bit(BTRFS_INODE_FREE_SPACE_INODE, &inode->runtime_flags); } static inline bool is_data_inode(struct inode *inode) { return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID; } static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, int mod) { lockdep_assert_held(&inode->lock); inode->outstanding_extents += mod; if (btrfs_is_free_space_inode(inode)) return; trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode), mod, inode->outstanding_extents); } /* * Called every time after doing a buffered, direct IO or memory mapped write. * * This is to ensure that if we write to a file that was previously fsynced in * the current transaction, then try to fsync it again in the same transaction, * we will know that there were changes in the file and that it needs to be * logged. */ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) { spin_lock(&inode->lock); inode->last_sub_trans = inode->root->log_transid; spin_unlock(&inode->lock); } /* * Should be called while holding the inode's VFS lock in exclusive mode, or * while holding the inode's mmap lock (struct btrfs_inode::i_mmap_lock) in * either shared or exclusive mode, or in a context where no one else can access * the inode concurrently (during inode creation or when loading an inode from * disk). */ static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode) { set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); /* * The inode may have been part of a reflink operation in the last * transaction that modified it, and then a fsync has reset the * last_reflink_trans to avoid subsequent fsyncs in the same * transaction to do unnecessary work. So update last_reflink_trans * to the last_trans value (we have to be pessimistic and assume a * reflink happened). * * The ->last_trans is protected by the inode's spinlock and we can * have a concurrent ordered extent completion update it. Also set * last_reflink_trans to ->last_trans only if the former is less than * the later, because we can be called in a context where * last_reflink_trans was set to the current transaction generation * while ->last_trans was not yet updated in the current transaction, * and therefore has a lower value. */ spin_lock(&inode->lock); if (inode->last_reflink_trans < inode->last_trans) inode->last_reflink_trans = inode->last_trans; spin_unlock(&inode->lock); } static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) { bool ret = false; spin_lock(&inode->lock); if (inode->logged_trans == generation && inode->last_sub_trans <= inode->last_log_commit && inode->last_sub_trans <= btrfs_get_root_last_log_commit(inode->root)) ret = true; spin_unlock(&inode->lock); return ret; } /* * Check if the inode has flags compatible with compression */ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode) { if (inode->flags & BTRFS_INODE_NODATACOW || inode->flags & BTRFS_INODE_NODATASUM) return false; return true; } /* Array of bytes with variable length, hexadecimal format 0x1234 */ #define CSUM_FMT "0x%*phN" #define CSUM_FMT_VALUE(size, bytes) size, bytes int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page, u32 pgoff, u8 *csum, const u8 * const csum_expected); bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, u32 bio_offset, struct bio_vec *bv); noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes, bool nowait, bool strict); void btrfs_del_delalloc_inode(struct btrfs_inode *inode); struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, const struct fscrypt_str *name); int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_inode *parent_inode, struct btrfs_inode *inode, const struct fscrypt_str *name, int add_backref, u64 index); int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry); int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, int front); int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, unsigned int extra_bits, struct extent_state **cached_state); struct btrfs_new_inode_args { /* Input */ struct inode *dir; struct dentry *dentry; struct inode *inode; bool orphan; bool subvol; /* Output from btrfs_new_inode_prepare(), input to btrfs_create_new_inode(). */ struct posix_acl *default_acl; struct posix_acl *acl; struct fscrypt_name fname; }; int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, unsigned int *trans_num_items); int btrfs_create_new_inode(struct btrfs_trans_handle *trans, struct btrfs_new_inode_args *args); void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args); struct inode *btrfs_new_subvol_inode(struct mnt_idmap *idmap, struct inode *dir); void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state, u32 bits); void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state, u32 bits); void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state *new, struct extent_state *other); void btrfs_split_delalloc_extent(struct btrfs_inode *inode, struct extent_state *orig, u64 split); void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end); void btrfs_evict_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); void btrfs_free_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); int __init btrfs_init_cachep(void); void __cold btrfs_destroy_cachep(void); struct inode *btrfs_iget_path(struct super_block *s, u64 ino, struct btrfs_root *root, struct btrfs_path *path); struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root); struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct page *page, u64 start, u64 len); int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_inode *inode); int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, struct btrfs_inode *inode); int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct btrfs_inode *inode); int btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size); void btrfs_add_delayed_iput(struct btrfs_inode *inode); void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info); int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info); int btrfs_prealloc_file_range(struct inode *inode, int mode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint); int btrfs_prealloc_file_range_trans(struct inode *inode, struct btrfs_trans_handle *trans, int mode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint); int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, struct writeback_control *wbc); int btrfs_writepage_cow_fixup(struct page *page); int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info, int compress_type); int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, u64 file_offset, u64 disk_bytenr, u64 disk_io_size, struct page **pages); ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, struct btrfs_ioctl_encoded_io_args *encoded); ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded); ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before); struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, size_t done_before); struct btrfs_inode *btrfs_find_first_inode(struct btrfs_root *root, u64 min_ino); extern const struct dentry_operations btrfs_dentry_operations; /* Inode locking type flags, by default the exclusive lock is taken. */ enum btrfs_ilock_type { ENUM_BIT(BTRFS_ILOCK_SHARED), ENUM_BIT(BTRFS_ILOCK_TRY), ENUM_BIT(BTRFS_ILOCK_MMAP), }; int btrfs_inode_lock(struct btrfs_inode *inode, unsigned int ilock_flags); void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags); void btrfs_update_inode_bytes(struct btrfs_inode *inode, const u64 add_bytes, const u64 del_bytes); void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end); #endif |
67 12 1932 1343 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_BRIDGE_NETFILTER_H #define __LINUX_BRIDGE_NETFILTER_H #include <uapi/linux/netfilter_bridge.h> #include <linux/skbuff.h> struct nf_bridge_frag_data { char mac[ETH_HLEN]; bool vlan_present; u16 vlan_tci; __be16 vlan_proto; }; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); if (dst && (dst->flags & DST_FAKE_RTABLE)) skb_dst_drop(skb); } static inline struct nf_bridge_info * nf_bridge_info_get(const struct sk_buff *skb) { return skb_ext_find(skb, SKB_EXT_BRIDGE_NF); } static inline bool nf_bridge_info_exists(const struct sk_buff *skb) { return skb_ext_exist(skb, SKB_EXT_BRIDGE_NF); } static inline int nf_bridge_get_physinif(const struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) return 0; return nf_bridge->physinif; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) return 0; return nf_bridge->physoutdev ? nf_bridge->physoutdev->ifindex : 0; } static inline struct net_device * nf_bridge_get_physindev(const struct sk_buff *skb, struct net *net) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); return nf_bridge ? dev_get_by_index_rcu(net, nf_bridge->physinif) : NULL; } static inline struct net_device * nf_bridge_get_physoutdev(const struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); return nf_bridge ? nf_bridge->physoutdev : NULL; } static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); return nf_bridge && nf_bridge->in_prerouting; } #else #define br_drop_fake_rtable(skb) do { } while (0) static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb) { return false; } #endif /* CONFIG_BRIDGE_NETFILTER */ #endif |
7 7 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 | // SPDX-License-Identifier: GPL-2.0-only /* * The NFC Controller Interface is the communication protocol between an * NFC Controller (NFCC) and a Device Host (DH). * This is the HCI over NCI implementation, as specified in the 10.2 * section of the NCI 1.1 specification. * * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. */ #include <linux/skbuff.h> #include "../nfc.h" #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include <linux/nfc.h> #include <linux/kcov.h> struct nci_data { u8 conn_id; u8 pipe; u8 cmd; const u8 *data; u32 data_len; } __packed; struct nci_hci_create_pipe_params { u8 src_gate; u8 dest_host; u8 dest_gate; } __packed; struct nci_hci_create_pipe_resp { u8 src_host; u8 src_gate; u8 dest_host; u8 dest_gate; u8 pipe; } __packed; struct nci_hci_delete_pipe_noti { u8 pipe; } __packed; struct nci_hci_all_pipe_cleared_noti { u8 host; } __packed; struct nci_hcp_message { u8 header; /* type -cmd,evt,rsp- + instruction */ u8 data[]; } __packed; struct nci_hcp_packet { u8 header; /* cbit+pipe */ struct nci_hcp_message message; } __packed; #define NCI_HCI_ANY_SET_PARAMETER 0x01 #define NCI_HCI_ANY_GET_PARAMETER 0x02 #define NCI_HCI_ANY_CLOSE_PIPE 0x04 #define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14 #define NCI_HFP_NO_CHAINING 0x80 #define NCI_NFCEE_ID_HCI 0x80 #define NCI_EVT_HOT_PLUG 0x03 #define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 #define NCI_HCI_ADM_CREATE_PIPE 0x10 #define NCI_HCI_ADM_DELETE_PIPE 0x11 /* HCP headers */ #define NCI_HCI_HCP_PACKET_HEADER_LEN 1 #define NCI_HCI_HCP_MESSAGE_HEADER_LEN 1 #define NCI_HCI_HCP_HEADER_LEN 2 /* HCP types */ #define NCI_HCI_HCP_COMMAND 0x00 #define NCI_HCI_HCP_EVENT 0x01 #define NCI_HCI_HCP_RESPONSE 0x02 #define NCI_HCI_ADM_NOTIFY_PIPE_CREATED 0x12 #define NCI_HCI_ADM_NOTIFY_PIPE_DELETED 0x13 #define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15 #define NCI_HCI_FRAGMENT 0x7f #define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\ ((instr) & 0x3f)) #define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6) #define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) static int nci_hci_result_to_errno(u8 result) { switch (result) { case NCI_HCI_ANY_OK: return 0; case NCI_HCI_ANY_E_REG_PAR_UNKNOWN: return -EOPNOTSUPP; case NCI_HCI_ANY_E_TIMEOUT: return -ETIME; default: return -1; } } /* HCI core */ static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) { int i; for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { hdev->pipes[i].gate = NCI_HCI_INVALID_GATE; hdev->pipes[i].host = NCI_HCI_INVALID_HOST; } memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); } static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host) { int i; for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { if (ndev->hci_dev->pipes[i].host == host) { ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE; ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST; } } } /* Fragment HCI data over NCI packet. * NFC Forum NCI 10.2.2 Data Exchange: * The payload of the Data Packets sent on the Logical Connection SHALL be * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be * applied to Data Messages in either direction. The HCI fragmentation mechanism * is used if required. */ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, const u8 data_type, const u8 *data, size_t data_len) { const struct nci_conn_info *conn_info; struct sk_buff *skb; int len, i, r; u8 cb = pipe; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); *(u8 *)skb_push(skb, 1) = data_type; do { /* If last packet add NCI_HFP_NO_CHAINING */ if (i + conn_info->max_pkt_payload_len - (skb->len + 1) >= data_len) { cb |= NCI_HFP_NO_CHAINING; len = data_len - i; } else { len = conn_info->max_pkt_payload_len - skb->len - 1; } *(u8 *)skb_push(skb, 1) = cb; if (len > 0) skb_put_data(skb, data + i, len); r = nci_send_data(ndev, conn_info->conn_id, skb); if (r < 0) return r; i += len; if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE + 1); } } while (i < data_len); return i; } static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt) { const struct nci_data *data = opt; nci_hci_send_data(ndev, data->pipe, data->cmd, data->data, data->data_len); } int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, const u8 *param, size_t param_len) { u8 pipe = ndev->hci_dev->gate2pipe[gate]; if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; return nci_hci_send_data(ndev, pipe, NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event), param, param_len); } EXPORT_SYMBOL(nci_hci_send_event); int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd); data.data = param; data.data_len = param_len; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); if (!r && skb) *skb = conn_info->rx_skb; } return r; } EXPORT_SYMBOL(nci_hci_send_cmd); int nci_hci_clear_all_pipes(struct nci_dev *ndev) { int r; r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL); if (r < 0) return r; nci_hci_reset_pipes(ndev->hci_dev); return r; } EXPORT_SYMBOL(nci_hci_clear_all_pipes); static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe, u8 event, struct sk_buff *skb) { if (ndev->ops->hci_event_received) ndev->ops->hci_event_received(ndev, pipe, event, skb); } static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb) { u8 gate = ndev->hci_dev->pipes[pipe].gate; u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT; u8 dest_gate, new_pipe; struct nci_hci_create_pipe_resp *create_info; struct nci_hci_delete_pipe_noti *delete_info; struct nci_hci_all_pipe_cleared_noti *cleared_info; pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); switch (cmd) { case NCI_HCI_ADM_NOTIFY_PIPE_CREATED: if (skb->len != 5) { status = NCI_HCI_ANY_E_NOK; goto exit; } create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; if (new_pipe >= NCI_HCI_MAX_PIPES) { status = NCI_HCI_ANY_E_NOK; goto exit; } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we * are the destination and it is our local gate */ ndev->hci_dev->gate2pipe[dest_gate] = new_pipe; ndev->hci_dev->pipes[new_pipe].gate = dest_gate; ndev->hci_dev->pipes[new_pipe].host = create_info->src_host; break; case NCI_HCI_ANY_OPEN_PIPE: /* If the pipe is not created report an error */ if (gate == NCI_HCI_INVALID_GATE) { status = NCI_HCI_ANY_E_NOK; goto exit; } break; case NCI_HCI_ADM_NOTIFY_PIPE_DELETED: if (skb->len != 1) { status = NCI_HCI_ANY_E_NOK; goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { status = NCI_HCI_ANY_E_NOK; goto exit; } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; ndev->hci_dev->pipes[delete_info->pipe].host = NCI_HCI_INVALID_HOST; break; case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: if (skb->len != 1) { status = NCI_HCI_ANY_E_NOK; goto exit; } cleared_info = (struct nci_hci_all_pipe_cleared_noti *)skb->data; nci_hci_reset_pipes_per_host(ndev, cleared_info->host); break; default: pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate); break; } if (ndev->ops->hci_cmd_received) ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb); exit: nci_hci_send_data(ndev, pipe, status, NULL, 0); kfree_skb(skb); } static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct sk_buff *skb) { struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) goto exit; conn_info->rx_skb = skb; exit: nci_req_complete(ndev, NCI_STATUS_OK); } /* Receive hcp message for pipe, with type and cmd. * skb contains optional message data only. */ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe, u8 type, u8 instruction, struct sk_buff *skb) { switch (type) { case NCI_HCI_HCP_RESPONSE: nci_hci_resp_received(ndev, pipe, skb); break; case NCI_HCI_HCP_COMMAND: nci_hci_cmd_received(ndev, pipe, instruction, skb); break; case NCI_HCI_HCP_EVENT: nci_hci_event_received(ndev, pipe, instruction, skb); break; default: pr_err("UNKNOWN MSG Type %d, instruction=%d\n", type, instruction); kfree_skb(skb); break; } nci_req_complete(ndev, NCI_STATUS_OK); } static void nci_hci_msg_rx_work(struct work_struct *work) { struct nci_hci_dev *hdev = container_of(work, struct nci_hci_dev, msg_rx_work); struct sk_buff *skb; const struct nci_hcp_message *message; u8 pipe, type, instruction; for (; (skb = skb_dequeue(&hdev->msg_rx_queue)); kcov_remote_stop()) { kcov_remote_start_common(skb_get_kcov_handle(skb)); pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; type = NCI_HCP_MSG_GET_TYPE(message->header); instruction = NCI_HCP_MSG_GET_CMD(message->header); skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); nci_hci_hcp_message_rx(hdev->ndev, pipe, type, instruction, skb); } } void nci_hci_data_received_cb(void *context, struct sk_buff *skb, int err) { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_hcp_packet *packet; u8 pipe, type; struct sk_buff *hcp_skb; struct sk_buff *frag_skb; int msg_len; if (err) { nci_req_complete(ndev, err); return; } packet = (struct nci_hcp_packet *)skb->data; if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); return; } /* it's the last fragment. Does it need re-aggregation? */ if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); msg_len = 0; skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { msg_len += (frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN); } hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN + msg_len, GFP_KERNEL); if (!hcp_skb) { nci_req_complete(ndev, -ENOMEM); return; } skb_put_u8(hcp_skb, pipe); skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; skb_put_data(hcp_skb, frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); } skb_queue_purge(&ndev->hci_dev->rx_hcp_frags); } else { packet->header &= NCI_HCI_FRAGMENT; hcp_skb = skb; } /* if this is a response, dispatch immediately to * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN); nci_hci_hcp_message_rx(ndev, pipe, type, NCI_STATUS_OK, hcp_skb); } else { skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); schedule_work(&ndev->hci_dev->msg_rx_work); } } int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) { struct nci_data data; const struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_OPEN_PIPE); data.data = NULL; data.data_len = 0; return nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); } EXPORT_SYMBOL(nci_hci_open_pipe); static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, int *result) { u8 pipe; struct sk_buff *skb; struct nci_hci_create_pipe_params params; const struct nci_hci_create_pipe_resp *resp; pr_debug("gate=%d\n", dest_gate); params.src_gate = NCI_HCI_ADMIN_GATE; params.dest_host = dest_host; params.dest_gate = dest_gate; *result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_CREATE_PIPE, (u8 *)¶ms, sizeof(params), &skb); if (*result < 0) return NCI_HCI_INVALID_PIPE; resp = (struct nci_hci_create_pipe_resp *)skb->data; pipe = resp->pipe; kfree_skb(skb); pr_debug("pipe created=%d\n", pipe); return pipe; } static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) { return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 *tmp; u8 pipe = ndev->hci_dev->gate2pipe[gate]; pr_debug("idx=%d to gate %d\n", idx, gate); if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; tmp = kmalloc(1 + param_len, GFP_KERNEL); if (!tmp) return -ENOMEM; *tmp = idx; memcpy(tmp + 1, param, param_len); data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_SET_PARAMETER); data.data = tmp; data.data_len = param_len + 1; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); } kfree(tmp); return r; } EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; pr_debug("idx=%d to gate %d\n", idx, gate); if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_GET_PARAMETER); data.data = &idx; data.data_len = 1; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); if (!r && skb) *skb = conn_info->rx_skb; } return r; } EXPORT_SYMBOL(nci_hci_get_param); int nci_hci_connect_gate(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, u8 pipe) { bool pipe_created = false; int r; if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE) return 0; if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE) return -EADDRINUSE; if (pipe != NCI_HCI_INVALID_PIPE) goto open_pipe; switch (dest_gate) { case NCI_HCI_LINK_MGMT_GATE: pipe = NCI_HCI_LINK_MGMT_PIPE; break; case NCI_HCI_ADMIN_GATE: pipe = NCI_HCI_ADMIN_PIPE; break; default: pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r); if (pipe == NCI_HCI_INVALID_PIPE) return r; pipe_created = true; break; } open_pipe: r = nci_hci_open_pipe(ndev, pipe); if (r < 0) { if (pipe_created) { if (nci_hci_delete_pipe(ndev, pipe) < 0) { /* TODO: Cannot clean by deleting pipe... * -> inconsistent state */ } } return r; } ndev->hci_dev->pipes[pipe].gate = dest_gate; ndev->hci_dev->pipes[pipe].host = dest_host; ndev->hci_dev->gate2pipe[dest_gate] = pipe; return 0; } EXPORT_SYMBOL(nci_hci_connect_gate); static int nci_hci_dev_connect_gates(struct nci_dev *ndev, u8 gate_count, const struct nci_hci_gate *gates) { int r; while (gate_count--) { r = nci_hci_connect_gate(ndev, gates->dest_host, gates->gate, gates->pipe); if (r < 0) return r; gates++; } return 0; } int nci_hci_dev_session_init(struct nci_dev *ndev) { struct nci_conn_info *conn_info; struct sk_buff *skb; int r; ndev->hci_dev->count_pipes = 0; ndev->hci_dev->expected_pipes = 0; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; conn_info->data_exchange_cb = nci_hci_data_received_cb; conn_info->data_exchange_cb_context = ndev; nci_hci_reset_pipes(ndev->hci_dev); if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE) return -EPROTO; r = nci_hci_connect_gate(ndev, ndev->hci_dev->init_data.gates[0].dest_host, ndev->hci_dev->init_data.gates[0].gate, ndev->hci_dev->init_data.gates[0].pipe); if (r < 0) return r; r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb); if (r < 0) return r; if (skb->len && skb->len == strlen(ndev->hci_dev->init_data.session_id) && !memcmp(ndev->hci_dev->init_data.session_id, skb->data, skb->len) && ndev->ops->hci_load_session) { /* Restore gate<->pipe table from some proprietary location. */ r = ndev->ops->hci_load_session(ndev); } else { r = nci_hci_clear_all_pipes(ndev); if (r < 0) goto exit; r = nci_hci_dev_connect_gates(ndev, ndev->hci_dev->init_data.gate_count, ndev->hci_dev->init_data.gates); if (r < 0) goto exit; r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, ndev->hci_dev->init_data.session_id, strlen(ndev->hci_dev->init_data.session_id)); } exit: kfree_skb(skb); return r; } EXPORT_SYMBOL(nci_hci_dev_session_init); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) { struct nci_hci_dev *hdev; hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); if (!hdev) return NULL; skb_queue_head_init(&hdev->rx_hcp_frags); INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work); skb_queue_head_init(&hdev->msg_rx_queue); hdev->ndev = ndev; return hdev; } void nci_hci_deallocate(struct nci_dev *ndev) { kfree(ndev->hci_dev); } |
20 10 1 19 21 21 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | /* * linux/fs/nls/nls_koi8-ru.c * * Charset koi8-ru translation based on charset koi8-u. * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static struct nls_table *p_nls; static int uni2char(const wchar_t uni, unsigned char *out, int boundlen) { if (boundlen <= 0) return -ENAMETOOLONG; if ((uni & 0xffaf) == 0x040e || (uni & 0xffce) == 0x254c) { /* koi8-ru and koi8-u differ only on two characters */ if (uni == 0x040e) out[0] = 0xbe; else if (uni == 0x045e) out[0] = 0xae; else if (uni == 0x255d || uni == 0x256c) return 0; else return p_nls->uni2char(uni, out, boundlen); return 1; } else /* fast path */ return p_nls->uni2char(uni, out, boundlen); } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { int n; if ((*rawstring & 0xef) != 0xae) { /* koi8-ru and koi8-u differ only on two characters */ *uni = (*rawstring & 0x10) ? 0x040e : 0x045e; return 1; } n = p_nls->char2uni(rawstring, boundlen, uni); return n; } static struct nls_table table = { .charset = "koi8-ru", .uni2char = uni2char, .char2uni = char2uni, }; static int __init init_nls_koi8_ru(void) { p_nls = load_nls("koi8-u"); if (p_nls) { table.charset2upper = p_nls->charset2upper; table.charset2lower = p_nls->charset2lower; return register_nls(&table); } return -EINVAL; } static void __exit exit_nls_koi8_ru(void) { unregister_nls(&table); unload_nls(p_nls); } module_init(init_nls_koi8_ru) module_exit(exit_nls_koi8_ru) MODULE_LICENSE("Dual BSD/GPL"); |
4 4 3 1 4 4 4 4 1 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. */ #include <linux/skbuff.h> #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_bonding.h> #include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/bonding.h> #include <net/bond_3ad.h> #include <net/netlink.h> /* General definitions */ #define AD_SHORT_TIMEOUT 1 #define AD_LONG_TIMEOUT 0 #define AD_STANDBY 0x2 #define AD_MAX_TX_IN_SECOND 3 #define AD_COLLECTOR_MAX_DELAY 0 /* Timer definitions (43.4.4 in the 802.3ad standard) */ #define AD_FAST_PERIODIC_TIME 1 #define AD_SLOW_PERIODIC_TIME 30 #define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME) #define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME) #define AD_CHURN_DETECTION_TIME 60 #define AD_AGGREGATE_WAIT_TIME 2 /* Port Variables definitions used by the State Machines (43.4.7 in the * 802.3ad standard) */ #define AD_PORT_BEGIN 0x1 #define AD_PORT_LACP_ENABLED 0x2 #define AD_PORT_ACTOR_CHURN 0x4 #define AD_PORT_PARTNER_CHURN 0x8 #define AD_PORT_READY 0x10 #define AD_PORT_READY_N 0x20 #define AD_PORT_MATCHED 0x40 #define AD_PORT_STANDBY 0x80 #define AD_PORT_SELECTED 0x100 #define AD_PORT_MOVED 0x200 #define AD_PORT_CHURNED (AD_PORT_ACTOR_CHURN | AD_PORT_PARTNER_CHURN) /* Port Key definitions * key is determined according to the link speed, duplex and * user key (which is yet not supported) * -------------------------------------------------------------- * Port key | User key (10 bits) | Speed (5 bits) | Duplex| * -------------------------------------------------------------- * |15 6|5 1|0 */ #define AD_DUPLEX_KEY_MASKS 0x1 #define AD_SPEED_KEY_MASKS 0x3E #define AD_USER_KEY_MASKS 0xFFC0 enum ad_link_speed_type { AD_LINK_SPEED_1MBPS = 1, AD_LINK_SPEED_10MBPS, AD_LINK_SPEED_100MBPS, AD_LINK_SPEED_1000MBPS, AD_LINK_SPEED_2500MBPS, AD_LINK_SPEED_5000MBPS, AD_LINK_SPEED_10000MBPS, AD_LINK_SPEED_14000MBPS, AD_LINK_SPEED_20000MBPS, AD_LINK_SPEED_25000MBPS, AD_LINK_SPEED_40000MBPS, AD_LINK_SPEED_50000MBPS, AD_LINK_SPEED_56000MBPS, AD_LINK_SPEED_100000MBPS, AD_LINK_SPEED_200000MBPS, AD_LINK_SPEED_400000MBPS, AD_LINK_SPEED_800000MBPS, }; /* compare MAC addresses */ #define MAC_ADDRESS_EQUAL(A, B) \ ether_addr_equal_64bits((const u8 *)A, (const u8 *)B) static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }; /* ================= main 802.3ad protocol functions ================== */ static int ad_lacpdu_send(struct port *port); static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); static void ad_periodic_machine(struct port *port, struct bond_params *bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); static void ad_initialize_port(struct port *port, int lacp_fast); static void ad_enable_collecting(struct port *port); static void ad_disable_distributing(struct port *port, bool *update_slave_arr); static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); static void ad_marker_response_received(struct bond_marker *marker, struct port *port); static void ad_update_actor_keys(struct port *port, bool reset); /* ================= api to bonding and kernel code ================== */ /** * __get_bond_by_port - get the port's bonding struct * @port: the port we're looking at * * Return @port's bonding struct, or %NULL if it can't be found. */ static inline struct bonding *__get_bond_by_port(struct port *port) { if (port->slave == NULL) return NULL; return bond_get_bond_by_slave(port->slave); } /** * __get_first_agg - get the first aggregator in the bond * @port: the port we're looking at * * Return the aggregator of the first slave in @bond, or %NULL if it can't be * found. * The caller must hold RCU or RTNL lock. */ static inline struct aggregator *__get_first_agg(struct port *port) { struct bonding *bond = __get_bond_by_port(port); struct slave *first_slave; struct aggregator *agg; /* If there's no bond for this port, or bond has no slaves */ if (bond == NULL) return NULL; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); agg = first_slave ? &(SLAVE_AD_INFO(first_slave)->aggregator) : NULL; rcu_read_unlock(); return agg; } /** * __agg_has_partner - see if we have a partner * @agg: the agregator we're looking at * * Return nonzero if aggregator has a partner (denoted by a non-zero ether * address for the partner). Return 0 if not. */ static inline int __agg_has_partner(struct aggregator *agg) { return !is_zero_ether_addr(agg->partner_system.mac_addr_value); } /** * __disable_distributing_port - disable the port's slave for distributing. * Port will still be able to collect. * @port: the port we're looking at * * This will disable only distributing on the port's slave. */ static void __disable_distributing_port(struct port *port) { bond_set_slave_tx_disabled_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** * __enable_collecting_port - enable the port's slave for collecting, * if it's up * @port: the port we're looking at * * This will enable only collecting on the port's slave. */ static void __enable_collecting_port(struct port *port) { struct slave *slave = port->slave; if (slave->link == BOND_LINK_UP && bond_slave_is_up(slave)) bond_set_slave_rx_enabled_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** * __disable_port - disable the port's slave * @port: the port we're looking at * * This will disable both collecting and distributing on the port's slave. */ static inline void __disable_port(struct port *port) { bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** * __enable_port - enable the port's slave, if it's up * @port: the port we're looking at * * This will enable both collecting and distributing on the port's slave. */ static inline void __enable_port(struct port *port) { struct slave *slave = port->slave; if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** * __port_move_to_attached_state - check if port should transition back to attached * state. * @port: the port we're looking at */ static bool __port_move_to_attached_state(struct port *port) { if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) port->sm_mux_state = AD_MUX_ATTACHED; return port->sm_mux_state == AD_MUX_ATTACHED; } /** * __port_is_collecting_distributing - check if the port's slave is in the * combined collecting/distributing state * @port: the port we're looking at */ static int __port_is_collecting_distributing(struct port *port) { return bond_is_active_slave(port->slave); } /** * __get_agg_selection_mode - get the aggregator selection mode * @port: the port we're looking at * * Get the aggregator selection mode. Can be %STABLE, %BANDWIDTH or %COUNT. */ static inline u32 __get_agg_selection_mode(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return BOND_AD_STABLE; return bond->params.ad_select; } /** * __check_agg_selection_timer - check if the selection timer has expired * @port: the port we're looking at */ static inline int __check_agg_selection_timer(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return 0; return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0; } /** * __get_link_speed - get a port's speed * @port: the port we're looking at * * Return @port's speed in 802.3ad enum format. i.e. one of: * 0, * %AD_LINK_SPEED_10MBPS, * %AD_LINK_SPEED_100MBPS, * %AD_LINK_SPEED_1000MBPS, * %AD_LINK_SPEED_2500MBPS, * %AD_LINK_SPEED_5000MBPS, * %AD_LINK_SPEED_10000MBPS * %AD_LINK_SPEED_14000MBPS, * %AD_LINK_SPEED_20000MBPS * %AD_LINK_SPEED_25000MBPS * %AD_LINK_SPEED_40000MBPS * %AD_LINK_SPEED_50000MBPS * %AD_LINK_SPEED_56000MBPS * %AD_LINK_SPEED_100000MBPS * %AD_LINK_SPEED_200000MBPS * %AD_LINK_SPEED_400000MBPS * %AD_LINK_SPEED_800000MBPS */ static u16 __get_link_speed(struct port *port) { struct slave *slave = port->slave; u16 speed; /* this if covers only a special case: when the configuration starts * with link down, it sets the speed to 0. * This is done in spite of the fact that the e100 driver reports 0 * to be compatible with MVT in the future. */ if (slave->link != BOND_LINK_UP) speed = 0; else { switch (slave->speed) { case SPEED_10: speed = AD_LINK_SPEED_10MBPS; break; case SPEED_100: speed = AD_LINK_SPEED_100MBPS; break; case SPEED_1000: speed = AD_LINK_SPEED_1000MBPS; break; case SPEED_2500: speed = AD_LINK_SPEED_2500MBPS; break; case SPEED_5000: speed = AD_LINK_SPEED_5000MBPS; break; case SPEED_10000: speed = AD_LINK_SPEED_10000MBPS; break; case SPEED_14000: speed = AD_LINK_SPEED_14000MBPS; break; case SPEED_20000: speed = AD_LINK_SPEED_20000MBPS; break; case SPEED_25000: speed = AD_LINK_SPEED_25000MBPS; break; case SPEED_40000: speed = AD_LINK_SPEED_40000MBPS; break; case SPEED_50000: speed = AD_LINK_SPEED_50000MBPS; break; case SPEED_56000: speed = AD_LINK_SPEED_56000MBPS; break; case SPEED_100000: speed = AD_LINK_SPEED_100000MBPS; break; case SPEED_200000: speed = AD_LINK_SPEED_200000MBPS; break; case SPEED_400000: speed = AD_LINK_SPEED_400000MBPS; break; case SPEED_800000: speed = AD_LINK_SPEED_800000MBPS; break; default: /* unknown speed value from ethtool. shouldn't happen */ if (slave->speed != SPEED_UNKNOWN) pr_err_once("%s: (slave %s): unknown ethtool speed (%d) for port %d (set it to 0)\n", slave->bond->dev->name, slave->dev->name, slave->speed, port->actor_port_number); speed = 0; break; } } slave_dbg(slave->bond->dev, slave->dev, "Port %d Received link speed %d update from adapter\n", port->actor_port_number, speed); return speed; } /** * __get_duplex - get a port's duplex * @port: the port we're looking at * * Return @port's duplex in 802.3ad bitmask format. i.e.: * 0x01 if in full duplex * 0x00 otherwise */ static u8 __get_duplex(struct port *port) { struct slave *slave = port->slave; u8 retval = 0x0; /* handling a special case: when the configuration starts with * link down, it sets the duplex to 0. */ if (slave->link == BOND_LINK_UP) { switch (slave->duplex) { case DUPLEX_FULL: retval = 0x1; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status full duplex update from adapter\n", port->actor_port_number); break; case DUPLEX_HALF: default: retval = 0x0; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status NOT full duplex update from adapter\n", port->actor_port_number); break; } } return retval; } static void __ad_actor_update_port(struct port *port) { const struct bonding *bond = bond_get_bond_by_slave(port->slave); port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; } /* Conversions */ /** * __ad_timer_to_ticks - convert a given timer type to AD module ticks * @timer_type: which timer to operate * @par: timer parameter. see below * * If @timer_type is %current_while_timer, @par indicates long/short timer. * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, * %SLOW_PERIODIC_TIME. */ static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) { u16 retval = 0; /* to silence the compiler */ switch (timer_type) { case AD_CURRENT_WHILE_TIMER: /* for rx machine usage */ if (par) retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); else retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); break; case AD_ACTOR_CHURN_TIMER: /* for local churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_PERIODIC_TIMER: /* for periodic machine */ retval = (par*ad_ticks_per_sec); /* long timeout */ break; case AD_PARTNER_CHURN_TIMER: /* for remote churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_WAIT_WHILE_TIMER: /* for selection machine */ retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec); break; } return retval; } /* ================= ad_rx_machine helper functions ================== */ /** * __choose_matched - update a port's matched variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the matched variable, using parameter values from a * newly received lacpdu. Parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the actor. Matched is set to TRUE if all of these parameters * match and the PDU parameter partner_state.aggregation has the same value as * actor_oper_port_state.aggregation and lacp will actively maintain the link * in the aggregation. Matched is also set to TRUE if the value of * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates * an individual link and lacp will actively maintain the link. Otherwise, * matched is set to FALSE. LACP is considered to be actively maintaining the * link if either the PDU's actor_state.lacp_activity variable is TRUE or both * the actor's actor_oper_port_state.lacp_activity and the PDU's * partner_state.lacp_activity variables are TRUE. * * Note: the AD_PORT_MATCHED "variable" is not specified by 802.3ad; it is * used here to implement the language from 802.3ad 43.4.9 that requires * recordPDU to "match" the LACPDU parameters to the stored values. */ static void __choose_matched(struct lacpdu *lacpdu, struct port *port) { /* check if all parameters are alike * or this is individual link(aggregation == FALSE) * then update the state machine Matched variable. */ if (((ntohs(lacpdu->partner_port) == port->actor_port_number) && (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) && (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) && (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) && ((lacpdu->partner_state & LACP_STATE_AGGREGATION) == (port->actor_oper_port_state & LACP_STATE_AGGREGATION))) || ((lacpdu->actor_state & LACP_STATE_AGGREGATION) == 0) ) { port->sm_vars |= AD_PORT_MATCHED; } else { port->sm_vars &= ~AD_PORT_MATCHED; } } /** * __record_pdu - record parameters from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Record the parameter values for the Actor carried in a received lacpdu as * the current partner operational parameter values and sets * actor_oper_port_state.defaulted to FALSE. */ static void __record_pdu(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { struct port_params *partner = &port->partner_oper; __choose_matched(lacpdu, port); /* record the new parameter values for the partner * operational */ partner->port_number = ntohs(lacpdu->actor_port); partner->port_priority = ntohs(lacpdu->actor_port_priority); partner->system = lacpdu->actor_system; partner->system_priority = ntohs(lacpdu->actor_system_priority); partner->key = ntohs(lacpdu->actor_key); partner->port_state = lacpdu->actor_state; /* set actor_oper_port_state.defaulted to FALSE */ port->actor_oper_port_state &= ~LACP_STATE_DEFAULTED; /* set the partner sync. to on if the partner is sync, * and the port is matched */ if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & LACP_STATE_SYNCHRONIZATION)) { partner->port_state |= LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=1\n"); } else { partner->port_state &= ~LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=0\n"); } } } /** * __record_default - record default parameters * @port: the port we're looking at * * This function records the default parameter values for the partner carried * in the Partner Admin parameters as the current partner operational parameter * values and sets actor_oper_port_state.defaulted to TRUE. */ static void __record_default(struct port *port) { if (port) { /* record the partner admin parameters */ memcpy(&port->partner_oper, &port->partner_admin, sizeof(struct port_params)); /* set actor_oper_port_state.defaulted to true */ port->actor_oper_port_state |= LACP_STATE_DEFAULTED; } } /** * __update_selected - update a port's Selected variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the selected variable, using parameter values from a * newly received lacpdu. The parameter values for the Actor carried in the * received PDU are compared with the corresponding operational parameter * values for the ports partner. If one or more of the comparisons shows that * the value(s) received in the PDU differ from the current operational values, * then selected is set to FALSE and actor_oper_port_state.synchronization is * set to out_of_sync. Otherwise, selected remains unchanged. */ static void __update_selected(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { const struct port_params *partner = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (ntohs(lacpdu->actor_port) != partner->port_number || ntohs(lacpdu->actor_port_priority) != partner->port_priority || !MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) || ntohs(lacpdu->actor_system_priority) != partner->system_priority || ntohs(lacpdu->actor_key) != partner->key || (lacpdu->actor_state & LACP_STATE_AGGREGATION) != (partner->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_default_selected - update a port's Selected variable from Partner * @port: the port we're looking at * * This function updates the value of the selected variable, using the partner * administrative parameter values. The administrative values are compared with * the corresponding operational parameter values for the partner. If one or * more of the comparisons shows that the administrative value(s) differ from * the current operational values, then Selected is set to FALSE and * actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise, * Selected remains unchanged. */ static void __update_default_selected(struct port *port) { if (port) { const struct port_params *admin = &port->partner_admin; const struct port_params *oper = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (admin->port_number != oper->port_number || admin->port_priority != oper->port_priority || !MAC_ADDRESS_EQUAL(&admin->system, &oper->system) || admin->system_priority != oper->system_priority || admin->key != oper->key || (admin->port_state & LACP_STATE_AGGREGATION) != (oper->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_ntt - update a port's ntt variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Updates the value of the ntt variable, using parameter values from a newly * received lacpdu. The parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the Actor. If one or more of the comparisons shows that the * value(s) received in the PDU differ from the current operational values, * then ntt is set to TRUE. Otherwise, ntt remains unchanged. */ static void __update_ntt(struct lacpdu *lacpdu, struct port *port) { /* validate lacpdu and port */ if (lacpdu && port) { /* check if any parameter is different then * update the port->ntt. */ if ((ntohs(lacpdu->partner_port) != port->actor_port_number) || (ntohs(lacpdu->partner_port_priority) != port->actor_port_priority) || !MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) || (ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) || (ntohs(lacpdu->partner_key) != port->actor_oper_port_key) || ((lacpdu->partner_state & LACP_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY)) || ((lacpdu->partner_state & LACP_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)) || ((lacpdu->partner_state & LACP_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) || ((lacpdu->partner_state & LACP_STATE_AGGREGATION) != (port->actor_oper_port_state & LACP_STATE_AGGREGATION)) ) { port->ntt = true; } } } /** * __agg_ports_are_ready - check if all ports in an aggregator are ready * @aggregator: the aggregator we're looking at * */ static int __agg_ports_are_ready(struct aggregator *aggregator) { struct port *port; int retval = 1; if (aggregator) { /* scan all ports in this aggregator to verfy if they are * all ready. */ for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (!(port->sm_vars & AD_PORT_READY_N)) { retval = 0; break; } } } return retval; } /** * __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator * @aggregator: the aggregator we're looking at * @val: Should the ports' ready bit be set on or off * */ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) { struct port *port; for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (val) port->sm_vars |= AD_PORT_READY; else port->sm_vars &= ~AD_PORT_READY; } } static int __agg_active_ports(struct aggregator *agg) { struct port *port; int active = 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (port->is_enabled) active++; } return active; } /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at * */ static u32 __get_agg_bandwidth(struct aggregator *aggregator) { int nports = __agg_active_ports(aggregator); u32 bandwidth = 0; if (nports) { switch (__get_link_speed(aggregator->lag_ports)) { case AD_LINK_SPEED_1MBPS: bandwidth = nports; break; case AD_LINK_SPEED_10MBPS: bandwidth = nports * 10; break; case AD_LINK_SPEED_100MBPS: bandwidth = nports * 100; break; case AD_LINK_SPEED_1000MBPS: bandwidth = nports * 1000; break; case AD_LINK_SPEED_2500MBPS: bandwidth = nports * 2500; break; case AD_LINK_SPEED_5000MBPS: bandwidth = nports * 5000; break; case AD_LINK_SPEED_10000MBPS: bandwidth = nports * 10000; break; case AD_LINK_SPEED_14000MBPS: bandwidth = nports * 14000; break; case AD_LINK_SPEED_20000MBPS: bandwidth = nports * 20000; break; case AD_LINK_SPEED_25000MBPS: bandwidth = nports * 25000; break; case AD_LINK_SPEED_40000MBPS: bandwidth = nports * 40000; break; case AD_LINK_SPEED_50000MBPS: bandwidth = nports * 50000; break; case AD_LINK_SPEED_56000MBPS: bandwidth = nports * 56000; break; case AD_LINK_SPEED_100000MBPS: bandwidth = nports * 100000; break; case AD_LINK_SPEED_200000MBPS: bandwidth = nports * 200000; break; case AD_LINK_SPEED_400000MBPS: bandwidth = nports * 400000; break; case AD_LINK_SPEED_800000MBPS: bandwidth = nports * 800000; break; default: bandwidth = 0; /* to silence the compiler */ } } return bandwidth; } /** * __get_active_agg - get the current active aggregator * @aggregator: the aggregator we're looking at * * Caller must hold RCU lock. */ static struct aggregator *__get_active_agg(struct aggregator *aggregator) { struct bonding *bond = aggregator->slave->bond; struct list_head *iter; struct slave *slave; bond_for_each_slave_rcu(bond, slave, iter) if (SLAVE_AD_INFO(slave)->aggregator.is_active) return &(SLAVE_AD_INFO(slave)->aggregator); return NULL; } /** * __update_lacpdu_from_port - update a port's lacpdu fields * @port: the port we're looking at */ static inline void __update_lacpdu_from_port(struct port *port) { struct lacpdu *lacpdu = &port->lacpdu; const struct port_params *partner = &port->partner_oper; /* update current actual Actor parameters * lacpdu->subtype initialized * lacpdu->version_number initialized * lacpdu->tlv_type_actor_info initialized * lacpdu->actor_information_length initialized */ lacpdu->actor_system_priority = htons(port->actor_system_priority); lacpdu->actor_system = port->actor_system; lacpdu->actor_key = htons(port->actor_oper_port_key); lacpdu->actor_port_priority = htons(port->actor_port_priority); lacpdu->actor_port = htons(port->actor_port_number); lacpdu->actor_state = port->actor_oper_port_state; slave_dbg(port->slave->bond->dev, port->slave->dev, "update lacpdu: actor port state %x\n", port->actor_oper_port_state); /* lacpdu->reserved_3_1 initialized * lacpdu->tlv_type_partner_info initialized * lacpdu->partner_information_length initialized */ lacpdu->partner_system_priority = htons(partner->system_priority); lacpdu->partner_system = partner->system; lacpdu->partner_key = htons(partner->key); lacpdu->partner_port_priority = htons(partner->port_priority); lacpdu->partner_port = htons(partner->port_number); lacpdu->partner_state = partner->port_state; /* lacpdu->reserved_3_2 initialized * lacpdu->tlv_type_collector_info initialized * lacpdu->collector_information_length initialized * collector_max_delay initialized * reserved_12[12] initialized * tlv_type_terminator initialized * terminator_length initialized * reserved_50[50] initialized */ } /* ================= main 802.3ad protocol code ========================= */ /** * ad_lacpdu_send - send out a lacpdu packet on a given port * @port: the port we're looking at * * Returns: 0 on success * < 0 on error */ static int ad_lacpdu_send(struct port *port) { struct slave *slave = port->slave; struct sk_buff *skb; struct lacpdu_header *lacpdu_header; int length = sizeof(struct lacpdu_header); skb = dev_alloc_skb(length); if (!skb) return -ENOMEM; atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.lacpdu_tx); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; skb->priority = TC_PRIO_CONTROL; lacpdu_header = skb_put(skb, length); ether_addr_copy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback lacpdus in receive. */ ether_addr_copy(lacpdu_header->hdr.h_source, slave->perm_hwaddr); lacpdu_header->hdr.h_proto = PKT_TYPE_LACPDU; lacpdu_header->lacpdu = port->lacpdu; dev_queue_xmit(skb); return 0; } /** * ad_marker_send - send marker information/response on a given port * @port: the port we're looking at * @marker: marker data to send * * Returns: 0 on success * < 0 on error */ static int ad_marker_send(struct port *port, struct bond_marker *marker) { struct slave *slave = port->slave; struct sk_buff *skb; struct bond_marker_header *marker_header; int length = sizeof(struct bond_marker_header); skb = dev_alloc_skb(length + 16); if (!skb) return -ENOMEM; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_tx); break; case AD_MARKER_RESPONSE_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_resp_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_resp_tx); break; } skb_reserve(skb, 16); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; marker_header = skb_put(skb, length); ether_addr_copy(marker_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback MARKERs in receive. */ ether_addr_copy(marker_header->hdr.h_source, slave->perm_hwaddr); marker_header->hdr.h_proto = PKT_TYPE_LACPDU; marker_header->marker = *marker; dev_queue_xmit(skb); return 0; } /** * ad_mux_machine - handle a port's mux state machine * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_mux_machine(struct port *port, bool *update_slave_arr) { struct bonding *bond = __get_bond_by_port(port); mux_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_mux_state; if (port->sm_vars & AD_PORT_BEGIN) { port->sm_mux_state = AD_MUX_DETACHED; } else { switch (port->sm_mux_state) { case AD_MUX_DETACHED: if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) /* if SELECTED or STANDBY */ port->sm_mux_state = AD_MUX_WAITING; break; case AD_MUX_WAITING: /* if SELECTED == FALSE return to DETACH state */ if (!(port->sm_vars & AD_PORT_SELECTED)) { port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the Selection Logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; break; } /* check if the wait_while_timer expired */ if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) port->sm_vars |= AD_PORT_READY_N; /* in order to withhold the selection logic to check * all ports READY_N value every callback cycle to * update ready variable, we check READY_N and update * READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); /* if the wait_while_timer expired, and the port is * in READY state, move to ATTACHED state */ if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) port->sm_mux_state = AD_MUX_ATTACHED; break; case AD_MUX_ATTACHED: /* check also if agg_select_timer expired (so the * edable port will take place only after this timer) */ if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { if (port->aggregator->is_active) { int state = AD_MUX_COLLECTING_DISTRIBUTING; if (!bond->params.coupled_control) state = AD_MUX_COLLECTING; port->sm_mux_state = state; } } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { /* if UNSELECTED or STANDBY */ port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the selection logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; } else if (port->aggregator->is_active) { port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; } break; case AD_MUX_COLLECTING_DISTRIBUTING: if (!__port_move_to_attached_state(port)) { /* if port state hasn't changed make * sure that a collecting distributing * port in an active aggregator is enabled */ if (port->aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; } } break; case AD_MUX_COLLECTING: if (!__port_move_to_attached_state(port)) { if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && (port->partner_oper.port_state & LACP_STATE_COLLECTING)) { port->sm_mux_state = AD_MUX_DISTRIBUTING; } else { /* If port state hasn't changed, make sure that a collecting * port is enabled for an active aggregator. */ struct slave *slave = port->slave; if (port->aggregator->is_active && bond_is_slave_rx_disabled(slave)) { ad_enable_collecting(port); *update_slave_arr = true; } } } break; case AD_MUX_DISTRIBUTING: if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || !(port->partner_oper.port_state & LACP_STATE_COLLECTING) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) { port->sm_mux_state = AD_MUX_COLLECTING; } else { /* if port state hasn't changed make * sure that a collecting distributing * port in an active aggregator is enabled */ if (port->aggregator && port->aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; } } break; default: break; } } /* check if the state machine was changed */ if (port->sm_mux_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Mux Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_mux_state); switch (port->sm_mux_state) { case AD_MUX_DETACHED: port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; ad_disable_collecting_distributing(port, update_slave_arr); port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->ntt = true; break; case AD_MUX_WAITING: port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); break; case AD_MUX_ATTACHED: if (port->aggregator->is_active) port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; else port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; ad_disable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING_DISTRIBUTING: port->actor_oper_port_state |= LACP_STATE_COLLECTING; port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING: port->actor_oper_port_state |= LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting(port); ad_disable_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_DISTRIBUTING: port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); break; default: break; } } } /** * ad_rx_machine - handle a port's rx State Machine * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * If lacpdu arrived, stop previous timer (if exists) and set the next state as * CURRENT. If timer expired set the state machine in the proper state. * In other cases, this function checks if we need to switch to other state. */ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) { rx_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_rx_state; if (lacpdu) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.lacpdu_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.lacpdu_rx); } /* check if state machine should change state */ /* first, check if port was reinitialized */ if (port->sm_vars & AD_PORT_BEGIN) { port->sm_rx_state = AD_RX_INITIALIZE; port->sm_vars |= AD_PORT_CHURNED; /* check if port is not enabled */ } else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled) port->sm_rx_state = AD_RX_PORT_DISABLED; /* check if new lacpdu arrived */ else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { if (port->sm_rx_state != AD_RX_CURRENT) port->sm_vars |= AD_PORT_CHURNED; port->sm_rx_timer_counter = 0; port->sm_rx_state = AD_RX_CURRENT; } else { /* if timer is on, and if it is expired */ if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { switch (port->sm_rx_state) { case AD_RX_EXPIRED: port->sm_rx_state = AD_RX_DEFAULTED; break; case AD_RX_CURRENT: port->sm_rx_state = AD_RX_EXPIRED; break; default: break; } } else { /* if no lacpdu arrived and no timer is on */ switch (port->sm_rx_state) { case AD_RX_PORT_DISABLED: if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) port->sm_rx_state = AD_RX_EXPIRED; else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) port->sm_rx_state = AD_RX_LACP_DISABLED; break; default: break; } } } /* check if the State machine was changed or new lacpdu arrived */ if ((port->sm_rx_state != last_state) || (lacpdu)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Rx Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_rx_state); switch (port->sm_rx_state) { case AD_RX_INITIALIZE: if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)) port->sm_vars &= ~AD_PORT_LACP_ENABLED; else port->sm_vars |= AD_PORT_LACP_ENABLED; port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; port->sm_rx_state = AD_RX_PORT_DISABLED; fallthrough; case AD_RX_PORT_DISABLED: port->sm_vars &= ~AD_PORT_MATCHED; break; case AD_RX_LACP_DISABLED: port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->partner_oper.port_state &= ~LACP_STATE_AGGREGATION; port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_EXPIRED: /* Reset of the Synchronization flag (Standard 43.4.12) * This reset cause to disable this port in the * COLLECTING_DISTRIBUTING state of the mux machine in * case of EXPIRED even if LINK_DOWN didn't arrive for * the port. */ port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->sm_vars &= ~AD_PORT_MATCHED; port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT; port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY; port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); port->actor_oper_port_state |= LACP_STATE_EXPIRED; port->sm_vars |= AD_PORT_CHURNED; break; case AD_RX_DEFAULTED: __update_default_selected(port); __record_default(port); port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_CURRENT: /* detect loopback situation */ if (MAC_ADDRESS_EQUAL(&(lacpdu->actor_system), &(port->actor_system))) { slave_err(port->slave->bond->dev, port->slave->dev, "An illegal loopback occurred on slave\n" "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n"); return; } __update_selected(lacpdu, port); __update_ntt(lacpdu, port); __record_pdu(lacpdu, port); port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; default: break; } } } /** * ad_churn_machine - handle port churn's state machine * @port: the port we're looking at * */ static void ad_churn_machine(struct port *port) { if (port->sm_vars & AD_PORT_CHURNED) { port->sm_vars &= ~AD_PORT_CHURNED; port->sm_churn_actor_state = AD_CHURN_MONITOR; port->sm_churn_partner_state = AD_CHURN_MONITOR; port->sm_churn_actor_timer_counter = __ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0); port->sm_churn_partner_timer_counter = __ad_timer_to_ticks(AD_PARTNER_CHURN_TIMER, 0); return; } if (port->sm_churn_actor_timer_counter && !(--port->sm_churn_actor_timer_counter) && port->sm_churn_actor_state == AD_CHURN_MONITOR) { if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_actor_state = AD_NO_CHURN; } else { port->churn_actor_count++; port->sm_churn_actor_state = AD_CHURN; } } if (port->sm_churn_partner_timer_counter && !(--port->sm_churn_partner_timer_counter) && port->sm_churn_partner_state == AD_CHURN_MONITOR) { if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_partner_state = AD_NO_CHURN; } else { port->churn_partner_count++; port->sm_churn_partner_state = AD_CHURN; } } } /** * ad_tx_machine - handle a port's tx state machine * @port: the port we're looking at */ static void ad_tx_machine(struct port *port) { /* check if tx timer expired, to verify that we do not send more than * 3 packets per second */ if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { /* check if there is something to send */ if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { __update_lacpdu_from_port(port); if (ad_lacpdu_send(port) >= 0) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent LACPDU on port %d\n", port->actor_port_number); /* mark ntt as false, so it will not be sent * again until demanded */ port->ntt = false; } } /* restart tx timer(to verify that we will not exceed * AD_MAX_TX_IN_SECOND */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; } } /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ static void ad_periodic_machine(struct port *port, struct bond_params *bond_params) { periodic_states_t last_state; /* keep current state machine state to compare later if it was changed */ last_state = port->sm_periodic_state; /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || !bond_params->lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ else if (port->sm_periodic_timer_counter) { /* check if periodic state machine expired */ if (!(--port->sm_periodic_timer_counter)) { /* if expired then do tx */ port->sm_periodic_state = AD_PERIODIC_TX; } else { /* If not expired, check if there is some new timeout * parameter from the partner state */ switch (port->sm_periodic_state) { case AD_FAST_PERIODIC: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; break; case AD_SLOW_PERIODIC: if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) { port->sm_periodic_timer_counter = 0; port->sm_periodic_state = AD_PERIODIC_TX; } break; default: break; } } } else { switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_state = AD_FAST_PERIODIC; break; case AD_PERIODIC_TX: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; else port->sm_periodic_state = AD_FAST_PERIODIC; break; default: break; } } /* check if the state machine was changed */ if (port->sm_periodic_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_periodic_state); switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_timer_counter = 0; break; case AD_FAST_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; break; case AD_SLOW_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; break; case AD_PERIODIC_TX: port->ntt = true; break; default: break; } } } /** * ad_port_selection_logic - select aggregation groups * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Select aggregation groups, and assign each port for it's aggregetor. The * selection logic is called in the inititalization (after all the handshkes), * and after every lacpdu receive (if selected is off). */ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) { struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; struct port *last_port = NULL, *curr_port; struct list_head *iter; struct bonding *bond; struct slave *slave; int found = 0; /* if the port is already Selected, do nothing */ if (port->sm_vars & AD_PORT_SELECTED) return; bond = __get_bond_by_port(port); /* if the port is connected to other aggregator, detach it */ if (port->aggregator) { /* detach the port from its former aggregator */ temp_aggregator = port->aggregator; for (curr_port = temp_aggregator->lag_ports; curr_port; last_port = curr_port, curr_port = curr_port->next_port_in_aggregator) { if (curr_port == port) { temp_aggregator->num_of_ports--; /* if it is the first port attached to the * aggregator */ if (!last_port) { temp_aggregator->lag_ports = port->next_port_in_aggregator; } else { /* not the first port attached to the * aggregator */ last_port->next_port_in_aggregator = port->next_port_in_aggregator; } /* clear the port's relations to this * aggregator */ port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->actor_port_aggregator_identifier = 0; slave_dbg(bond->dev, port->slave->dev, "Port %d left LAG %d\n", port->actor_port_number, temp_aggregator->aggregator_identifier); /* if the aggregator is empty, clear its * parameters, and set it ready to be attached */ if (!temp_aggregator->lag_ports) ad_clear_agg(temp_aggregator); break; } } if (!curr_port) { /* meaning: the port was related to an aggregator * but was not on the aggregator port list */ net_warn_ratelimited("%s: (slave %s): Warning: Port %d was related to aggregator %d but was not on its port list\n", port->slave->bond->dev->name, port->slave->dev->name, port->actor_port_number, port->aggregator->aggregator_identifier); } } /* search on all aggregators for a suitable aggregator for this port */ bond_for_each_slave(bond, slave, iter) { aggregator = &(SLAVE_AD_INFO(slave)->aggregator); /* keep a free aggregator for later use(if needed) */ if (!aggregator->lag_ports) { if (!free_aggregator) free_aggregator = aggregator; continue; } /* check if current aggregator suits us */ if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */ MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) && (aggregator->partner_system_priority == port->partner_oper.system_priority) && (aggregator->partner_oper_aggregator_key == port->partner_oper.key) ) && ((__agg_has_partner(aggregator) && /* partner answers */ !aggregator->is_individual) /* but is not individual OR */ ) ) { /* attach to the founded aggregator */ port->aggregator = aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; port->next_port_in_aggregator = aggregator->lag_ports; port->aggregator->num_of_ports++; aggregator->lag_ports = port; slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; found = 1; break; } } /* the port couldn't find an aggregator - attach it to a new * aggregator */ if (!found) { if (free_aggregator) { /* assign port a new aggregator */ port->aggregator = free_aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; /* update the new aggregator's parameters * if port was responsed from the end-user */ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS) /* if port is full duplex */ port->aggregator->is_individual = false; else port->aggregator->is_individual = true; port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; port->aggregator->partner_system = port->partner_oper.system; port->aggregator->partner_system_priority = port->partner_oper.system_priority; port->aggregator->partner_oper_aggregator_key = port->partner_oper.key; port->aggregator->receive_state = 1; port->aggregator->transmit_state = 1; port->aggregator->lag_ports = port; port->aggregator->num_of_ports++; /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); } else { slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", port->actor_port_number); return; } } /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE * in all aggregator's ports, else set ready=FALSE in all * aggregator's ports */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, update_slave_arr); if (!port->aggregator->is_active) port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; } /* Decide if "agg" is a better choice for the new active aggregator that * the current best, according to the ad_select policy. */ static struct aggregator *ad_agg_selection_test(struct aggregator *best, struct aggregator *curr) { /* 0. If no best, select current. * * 1. If the current agg is not individual, and the best is * individual, select current. * * 2. If current agg is individual and the best is not, keep best. * * 3. Therefore, current and best are both individual or both not * individual, so: * * 3a. If current agg partner replied, and best agg partner did not, * select current. * * 3b. If current agg partner did not reply and best agg partner * did reply, keep best. * * 4. Therefore, current and best both have partner replies or * both do not, so perform selection policy: * * BOND_AD_COUNT: Select by count of ports. If count is equal, * select by bandwidth. * * BOND_AD_STABLE, BOND_AD_BANDWIDTH: Select by bandwidth. */ if (!best) return curr; if (!curr->is_individual && best->is_individual) return curr; if (curr->is_individual && !best->is_individual) return best; if (__agg_has_partner(curr) && !__agg_has_partner(best)) return curr; if (!__agg_has_partner(curr) && __agg_has_partner(best)) return best; switch (__get_agg_selection_mode(curr->lag_ports)) { case BOND_AD_COUNT: if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; if (__agg_active_ports(curr) < __agg_active_ports(best)) return best; fallthrough; case BOND_AD_STABLE: case BOND_AD_BANDWIDTH: if (__get_agg_bandwidth(curr) > __get_agg_bandwidth(best)) return curr; break; default: net_warn_ratelimited("%s: (slave %s): Impossible agg select mode %d\n", curr->slave->bond->dev->name, curr->slave->dev->name, __get_agg_selection_mode(curr->lag_ports)); break; } return best; } static int agg_device_up(const struct aggregator *agg) { struct port *port = agg->lag_ports; if (!port) return 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (netif_running(port->slave->dev) && netif_carrier_ok(port->slave->dev)) return 1; } return 0; } /** * ad_agg_selection_logic - select an aggregation group for a team * @agg: the aggregator we're looking at * @update_slave_arr: Does slave array need update? * * It is assumed that only one aggregator may be selected for a team. * * The logic of this function is to select the aggregator according to * the ad_select policy: * * BOND_AD_STABLE: select the aggregator with the most ports attached to * it, and to reselect the active aggregator only if the previous * aggregator has no more ports related to it. * * BOND_AD_BANDWIDTH: select the aggregator with the highest total * bandwidth, and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * BOND_AD_COUNT: select the aggregator with largest number of ports * (slaves), and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * FIXME: this function MUST be called with the first agg in the bond, or * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. */ static void ad_agg_selection_logic(struct aggregator *agg, bool *update_slave_arr) { struct aggregator *best, *active, *origin; struct bonding *bond = agg->slave->bond; struct list_head *iter; struct slave *slave; struct port *port; rcu_read_lock(); origin = agg; active = __get_active_agg(agg); best = (active && agg_device_up(active)) ? active : NULL; bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); agg->is_active = 0; if (__agg_active_ports(agg) && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } if (best && __get_agg_selection_mode(best->lag_ports) == BOND_AD_STABLE) { /* For the STABLE policy, don't replace the old active * aggregator if it's still active (it has an answering * partner) or if both the best and active don't have an * answering partner. */ if (active && active->lag_ports && __agg_active_ports(active) && (__agg_has_partner(active) || (!__agg_has_partner(active) && !__agg_has_partner(best)))) { if (!(!active->actor_oper_aggregator_key && best->actor_oper_aggregator_key)) { best = NULL; active->is_active = 1; } } } if (best && (best == active)) { best = NULL; active->is_active = 1; } /* if there is new best aggregator, activate it */ if (best) { netdev_dbg(bond->dev, "(slave %s): best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); netdev_dbg(bond->dev, "(slave %s): best ports %p slave %p\n", best->slave ? best->slave->dev->name : "NULL", best->lag_ports, best->slave); bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); slave_dbg(bond->dev, slave->dev, "Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", agg->aggregator_identifier, agg->num_of_ports, agg->actor_oper_aggregator_key, agg->partner_oper_aggregator_key, agg->is_individual, agg->is_active); } /* check if any partner replies */ if (best->is_individual) net_warn_ratelimited("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", bond->dev->name); best->is_active = 1; netdev_dbg(bond->dev, "(slave %s): LAG %d chosen as the active LAG\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier); netdev_dbg(bond->dev, "(slave %s): Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); /* disable the ports that were related to the former * active_aggregator */ if (active) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __disable_port(port); } } /* Slave array needs update. */ *update_slave_arr = true; } /* if the selected aggregator is of join individuals * (partner_system is NULL), enable their ports */ active = __get_active_agg(origin); if (active) { if (!__agg_has_partner(active)) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __enable_port(port); } *update_slave_arr = true; } } rcu_read_unlock(); bond_3ad_set_carrier(bond); } /** * ad_clear_agg - clear a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_clear_agg(struct aggregator *aggregator) { if (aggregator) { aggregator->is_individual = false; aggregator->actor_admin_aggregator_key = 0; aggregator->actor_oper_aggregator_key = 0; eth_zero_addr(aggregator->partner_system.mac_addr_value); aggregator->partner_system_priority = 0; aggregator->partner_oper_aggregator_key = 0; aggregator->receive_state = 0; aggregator->transmit_state = 0; aggregator->lag_ports = NULL; aggregator->is_active = 0; aggregator->num_of_ports = 0; pr_debug("%s: LAG %d was cleared\n", aggregator->slave ? aggregator->slave->dev->name : "NULL", aggregator->aggregator_identifier); } } /** * ad_initialize_agg - initialize a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_initialize_agg(struct aggregator *aggregator) { if (aggregator) { ad_clear_agg(aggregator); eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value); aggregator->aggregator_identifier = 0; aggregator->slave = NULL; } } /** * ad_initialize_port - initialize a given port's parameters * @port: the port we're looking at * @lacp_fast: boolean. whether fast periodic should be used */ static void ad_initialize_port(struct port *port, int lacp_fast) { static const struct port_params tmpl = { .system_priority = 0xffff, .key = 1, .port_number = 1, .port_priority = 0xff, .port_state = 1, }; static const struct lacpdu lacpdu = { .subtype = 0x01, .version_number = 0x01, .tlv_type_actor_info = 0x01, .actor_information_length = 0x14, .tlv_type_partner_info = 0x02, .partner_information_length = 0x14, .tlv_type_collector_info = 0x03, .collector_information_length = 0x10, .collector_max_delay = htons(AD_COLLECTOR_MAX_DELAY), }; if (port) { port->actor_port_priority = 0xff; port->actor_port_aggregator_identifier = 0; port->ntt = false; port->actor_admin_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; port->actor_oper_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; memcpy(&port->partner_admin, &tmpl, sizeof(tmpl)); memcpy(&port->partner_oper, &tmpl, sizeof(tmpl)); port->is_enabled = true; /* private parameters */ port->sm_vars = AD_PORT_BEGIN | AD_PORT_LACP_ENABLED; port->sm_rx_state = 0; port->sm_rx_timer_counter = 0; port->sm_periodic_state = 0; port->sm_periodic_timer_counter = 0; port->sm_mux_state = 0; port->sm_mux_timer_counter = 0; port->sm_tx_state = 0; port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->transaction_id = 0; port->sm_churn_actor_timer_counter = 0; port->sm_churn_actor_state = 0; port->churn_actor_count = 0; port->sm_churn_partner_timer_counter = 0; port->sm_churn_partner_state = 0; port->churn_partner_count = 0; memcpy(&port->lacpdu, &lacpdu, sizeof(lacpdu)); } } /** * ad_enable_collecting - enable a port's receive * @port: the port we're looking at * * Enable @port if it's in an active aggregator */ static void ad_enable_collecting(struct port *port) { if (port->aggregator->is_active) { struct slave *slave = port->slave; slave_dbg(slave->bond->dev, slave->dev, "Enabling collecting on port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_collecting_port(port); } } /** * ad_disable_distributing - disable a port's transmit * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_disable_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator && __agg_has_partner(port->aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling distributing on port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __disable_distributing_port(port); /* Slave array needs an update */ *update_slave_arr = true; } } /** * ad_enable_collecting_distributing - enable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Enable @port if it's in an active aggregator */ static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator->is_active) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Enabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_port(port); /* Slave array needs update */ *update_slave_arr = true; } } /** * ad_disable_collecting_distributing - disable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator && __agg_has_partner(port->aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __disable_port(port); /* Slave array needs an update */ *update_slave_arr = true; } } /** * ad_marker_info_received - handle receive of a Marker information frame * @marker_info: Marker info received * @port: the port we're looking at */ static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port) { struct bond_marker marker; atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_rx); /* copy the received marker data to the response marker */ memcpy(&marker, marker_info, sizeof(struct bond_marker)); /* change the marker subtype to marker response */ marker.tlv_type = AD_MARKER_RESPONSE_SUBTYPE; /* send the marker response */ if (ad_marker_send(port, &marker) >= 0) slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent Marker Response on port %d\n", port->actor_port_number); } /** * ad_marker_response_received - handle receive of a marker response frame * @marker: marker PDU received * @port: the port we're looking at * * This function does nothing since we decided not to implement send and handle * response for marker PDU's, in this stage, but only to respond to marker * information. */ static void ad_marker_response_received(struct bond_marker *marker, struct port *port) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_resp_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_resp_rx); /* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */ } /* ========= AD exported functions to the main bonding code ========= */ /* Check aggregators status in team every T seconds */ #define AD_AGGREGATOR_SELECTION_TIMER 8 /** * bond_3ad_initiate_agg_selection - initate aggregator selection * @bond: bonding struct * @timeout: timeout value to set * * Set the aggregation selection timer, to initiate an agg selection in * the very near future. Called during first initialization, and during * any down to up transitions of the bond. */ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) { atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout); } /** * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures * @bond: bonding struct to work on * * Can be called only after the mac address of the bond is set. */ void bond_3ad_initialize(struct bonding *bond) { BOND_AD_INFO(bond).aggregator_identifier = 0; BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); bond_3ad_initiate_agg_selection(bond, AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); } /** * bond_3ad_bind_slave - initialize a slave's port * @slave: slave struct to work on * * Returns: 0 on success * < 0 on error */ void bond_3ad_bind_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); struct port *port; struct aggregator *aggregator; /* check that the slave has not been initialized yet. */ if (SLAVE_AD_INFO(slave)->port.slave != slave) { /* port initialization */ port = &(SLAVE_AD_INFO(slave)->port); ad_initialize_port(port, bond->params.lacp_fast); port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; /* key is determined according to the link speed, duplex and * user key */ port->actor_admin_port_key = bond->params.ad_user_port_key << 6; ad_update_actor_keys(port, false); /* actor system is the bond's system */ __ad_actor_update_port(port); /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; __disable_port(port); /* aggregator initialization */ aggregator = &(SLAVE_AD_INFO(slave)->aggregator); ad_initialize_agg(aggregator); aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier; aggregator->slave = slave; aggregator->is_active = 0; aggregator->num_of_ports = 0; } } /** * bond_3ad_unbind_slave - deinitialize a slave's port * @slave: slave struct to work on * * Search for the aggregator that is related to this port, remove the * aggregator and assign another aggregator for other port related to it * (if any), and remove the port. */ void bond_3ad_unbind_slave(struct slave *slave) { struct port *port, *prev_port, *temp_port; struct aggregator *aggregator, *new_aggregator, *temp_aggregator; int select_new_active_agg = 0; struct bonding *bond = slave->bond; struct slave *slave_iter; struct list_head *iter; bool dummy_slave_update; /* Ignore this value as caller updates array */ /* Sync against bond_3ad_state_machine_handler() */ spin_lock_bh(&bond->mode_lock); aggregator = &(SLAVE_AD_INFO(slave)->aggregator); port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(bond->dev, slave->dev, "Trying to unbind an uninitialized port\n"); goto out; } slave_dbg(bond->dev, slave->dev, "Unbinding Link Aggregation Group %d\n", aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~LACP_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); /* check if this aggregator is occupied */ if (aggregator->lag_ports) { /* check if there are other ports related to this aggregator * except the port related to this slave(thats ensure us that * there is a reason to search for new aggregator, and that we * will find one */ if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { /* find new aggregator for the related port(s) */ bond_for_each_slave(bond, slave_iter, iter) { new_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); /* if the new aggregator is empty, or it is * connected to our port only */ if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) break; } if (!slave_iter) new_aggregator = NULL; /* if new aggregator found, copy the aggregator's * parameters and connect the related lag_ports to the * new aggregator */ if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { slave_dbg(bond->dev, slave->dev, "Some port(s) related to LAG %d - replacing with LAG %d\n", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier); if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); select_new_active_agg = 1; } new_aggregator->is_individual = aggregator->is_individual; new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key; new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key; new_aggregator->partner_system = aggregator->partner_system; new_aggregator->partner_system_priority = aggregator->partner_system_priority; new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key; new_aggregator->receive_state = aggregator->receive_state; new_aggregator->transmit_state = aggregator->transmit_state; new_aggregator->lag_ports = aggregator->lag_ports; new_aggregator->is_active = aggregator->is_active; new_aggregator->num_of_ports = aggregator->num_of_ports; /* update the information that is written on * the ports about the aggregator */ for (temp_port = aggregator->lag_ports; temp_port; temp_port = temp_port->next_port_in_aggregator) { temp_port->aggregator = new_aggregator; temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; } ad_clear_agg(aggregator); if (select_new_active_agg) ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } else { slave_warn(bond->dev, slave->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); } } else { /* in case that the only port related to this * aggregator is the one we want to remove */ select_new_active_agg = aggregator->is_active; ad_clear_agg(aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ temp_aggregator = __get_first_agg(port); if (temp_aggregator) ad_agg_selection_logic(temp_aggregator, &dummy_slave_update); } } } slave_dbg(bond->dev, slave->dev, "Unbinding port %d\n", port->actor_port_number); /* find the aggregator that this port is connected to */ bond_for_each_slave(bond, slave_iter, iter) { temp_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); prev_port = NULL; /* search the port in the aggregator's related ports */ for (temp_port = temp_aggregator->lag_ports; temp_port; prev_port = temp_port, temp_port = temp_port->next_port_in_aggregator) { if (temp_port == port) { /* the aggregator found - detach the port from * this aggregator */ if (prev_port) prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator; else temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; if (temp_aggregator->num_of_ports == 0) ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } } break; } } } port->slave = NULL; out: spin_unlock_bh(&bond->mode_lock); } /** * bond_3ad_update_ad_actor_settings - reflect change of actor settings to ports * @bond: bonding struct to work on * * If an ad_actor setting gets changed we need to update the individual port * settings so the bond device will use the new values when it gets upped. */ void bond_3ad_update_ad_actor_settings(struct bonding *bond) { struct list_head *iter; struct slave *slave; ASSERT_RTNL(); BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { struct port *port = &(SLAVE_AD_INFO(slave))->port; __ad_actor_update_port(port); port->ntt = true; } spin_unlock_bh(&bond->mode_lock); } /** * bond_agg_timer_advance - advance agg_select_timer * @bond: bonding structure * * Return true when agg_select_timer reaches 0. */ static bool bond_agg_timer_advance(struct bonding *bond) { int val, nval; while (1) { val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer); if (!val) return false; nval = val - 1; if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer, val, nval) == val) break; } return nval == 0; } /** * bond_3ad_state_machine_handler - handle state machines timeout * @work: work context to fetch bonding struct to work on from * * The state machine handling concept in this module is to check every tick * which state machine should operate any function. The execution order is * round robin, so when we have an interaction between state machines, the * reply of one to each other might be delayed until next tick. * * This function also complete the initialization when the agg_select_timer * times out, and it selects an aggregator for the ports that are yet not * related to any aggregator, and selects the active aggregator for a bond. */ void bond_3ad_state_machine_handler(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, ad_work.work); struct aggregator *aggregator; struct list_head *iter; struct slave *slave; struct port *port; bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; bool update_slave_arr = false; /* Lock to protect data accessed by all (e.g., port->sm_vars) and * against running with bond_3ad_unbind_slave. ad_rx_machine may run * concurrently due to incoming LACPDU as well. */ spin_lock_bh(&bond->mode_lock); rcu_read_lock(); /* check if there are any slaves */ if (!bond_has_slaves(bond)) goto re_arm; if (bond_agg_timer_advance(bond)) { slave = bond_first_slave_rcu(bond); port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; /* select the active aggregator for the bond */ if (port) { if (!port->slave) { net_warn_ratelimited("%s: Warning: bond's first port is uninitialized\n", bond->dev->name); goto re_arm; } aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, &update_slave_arr); } bond_3ad_set_carrier(bond); } /* for each port run the state machines */ bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: Found an uninitialized port\n", bond->dev->name); goto re_arm; } ad_rx_machine(NULL, port); ad_periodic_machine(port, &bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); ad_churn_machine(port); /* turn off the BEGIN bit, since we already handled it */ if (port->sm_vars & AD_PORT_BEGIN) port->sm_vars &= ~AD_PORT_BEGIN; } re_arm: bond_for_each_slave_rcu(bond, slave, iter) { if (slave->should_notify) { should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; break; } } rcu_read_unlock(); spin_unlock_bh(&bond->mode_lock); if (update_slave_arr) bond_slave_arr_work_rearm(bond, 0); if (should_notify_rtnl && rtnl_trylock()) { bond_slave_state_notify(bond); rtnl_unlock(); } queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); } /** * bond_3ad_rx_indication - handle a received frame * @lacpdu: received lacpdu * @slave: slave struct to work on * * It is assumed that frames that were sent on this NIC don't returned as new * received frames (loopback). Since only the payload is given to this * function, it check for loopback. */ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave) { struct bonding *bond = slave->bond; int ret = RX_HANDLER_ANOTHER; struct bond_marker *marker; struct port *port; atomic64_t *stat; port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", slave->dev->name, slave->bond->dev->name); return ret; } switch (lacpdu->subtype) { case AD_TYPE_LACPDU: ret = RX_HANDLER_CONSUMED; slave_dbg(slave->bond->dev, slave->dev, "Received LACPDU on port %d\n", port->actor_port_number); /* Protect against concurrent state machines */ spin_lock(&slave->bond->mode_lock); ad_rx_machine(lacpdu, port); spin_unlock(&slave->bond->mode_lock); break; case AD_TYPE_MARKER: ret = RX_HANDLER_CONSUMED; /* No need to convert fields to Little Endian since we * don't use the marker's fields. */ marker = (struct bond_marker *)lacpdu; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Information on port %d\n", port->actor_port_number); ad_marker_info_received(marker, port); break; case AD_MARKER_RESPONSE_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Response on port %d\n", port->actor_port_number); ad_marker_response_received(marker, port); break; default: slave_dbg(slave->bond->dev, slave->dev, "Received an unknown Marker subtype on port %d\n", port->actor_port_number); stat = &SLAVE_AD_INFO(slave)->stats.marker_unknown_rx; atomic64_inc(stat); stat = &BOND_AD_INFO(bond).stats.marker_unknown_rx; atomic64_inc(stat); } break; default: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_unknown_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_unknown_rx); } return ret; } /** * ad_update_actor_keys - Update the oper / admin keys for a port based on * its current speed and duplex settings. * * @port: the port we'are looking at * @reset: Boolean to just reset the speed and the duplex part of the key * * The logic to change the oper / admin keys is: * (a) A full duplex port can participate in LACP with partner. * (b) When the speed is changed, LACP need to be reinitiated. */ static void ad_update_actor_keys(struct port *port, bool reset) { u8 duplex = 0; u16 ospeed = 0, speed = 0; u16 old_oper_key = port->actor_oper_port_key; port->actor_admin_port_key &= ~(AD_SPEED_KEY_MASKS|AD_DUPLEX_KEY_MASKS); if (!reset) { speed = __get_link_speed(port); ospeed = (old_oper_key & AD_SPEED_KEY_MASKS) >> 1; duplex = __get_duplex(port); port->actor_admin_port_key |= (speed << 1) | duplex; } port->actor_oper_port_key = port->actor_admin_port_key; if (old_oper_key != port->actor_oper_port_key) { /* Only 'duplex' port participates in LACP */ if (duplex) port->sm_vars |= AD_PORT_LACP_ENABLED; else port->sm_vars &= ~AD_PORT_LACP_ENABLED; if (!reset) { if (!speed) { slave_err(port->slave->bond->dev, port->slave->dev, "speed changed to 0 on port %d\n", port->actor_port_number); } else if (duplex && ospeed != speed) { /* Speed change restarts LACP state-machine */ port->sm_vars |= AD_PORT_BEGIN; } } } } /** * bond_3ad_adapter_speed_duplex_changed - handle a slave's speed / duplex * change indication * * @slave: slave struct to work on * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave) { struct port *port; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "speed/duplex changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); ad_update_actor_keys(port, false); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed speed/duplex\n", port->actor_port_number); } /** * bond_3ad_handle_link_change - handle a slave's link status change indication * @slave: slave struct to work on * @link: whether the link is now up or down * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_handle_link_change(struct slave *slave, char link) { struct aggregator *agg; struct port *port; bool dummy; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "link status changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); /* on link down we are zeroing duplex and speed since * some of the adaptors(ce1000.lan) report full duplex/speed * instead of N/A(duplex) / 0(speed). * * on link up we are forcing recheck on the duplex and speed since * some of he adaptors(ce1000.lan) report. */ if (link == BOND_LINK_UP) { port->is_enabled = true; ad_update_actor_keys(port, false); } else { /* link has failed */ port->is_enabled = false; ad_update_actor_keys(port, true); } agg = __get_first_agg(port); ad_agg_selection_logic(agg, &dummy); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed link status to %s\n", port->actor_port_number, link == BOND_LINK_UP ? "UP" : "DOWN"); /* RTNL is held and mode_lock is released so it's safe * to update slave_array here. */ bond_update_slave_arr(slave->bond, NULL); } /** * bond_3ad_set_carrier - set link state for bonding master * @bond: bonding structure * * if we have an active aggregator, we're up, if not, we're down. * Presumes that we cannot have an active aggregator if there are * no slaves with link up. * * This behavior complies with IEEE 802.3 section 43.3.9. * * Called by bond_set_carrier(). Return zero if carrier state does not * change, nonzero if it does. */ int bond_3ad_set_carrier(struct bonding *bond) { struct aggregator *active; struct slave *first_slave; int ret = 1; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); if (!first_slave) { ret = 0; goto out; } active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator)); if (active) { /* are enough slaves available to consider link up? */ if (__agg_active_ports(active) < bond->params.min_links) { if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); goto out; } } else if (!netif_carrier_ok(bond->dev)) { netif_carrier_on(bond->dev); goto out; } } else if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); } out: rcu_read_unlock(); return ret; } /** * __bond_3ad_get_active_agg_info - get information of the active aggregator * @bond: bonding struct to work on * @ad_info: ad_info struct to fill with the bond's info * * Returns: 0 on success * < 0 on error */ int __bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { struct aggregator *aggregator = NULL; struct list_head *iter; struct slave *slave; struct port *port; bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (port->aggregator && port->aggregator->is_active) { aggregator = port->aggregator; break; } } if (!aggregator) return -1; ad_info->aggregator_id = aggregator->aggregator_identifier; ad_info->ports = __agg_active_ports(aggregator); ad_info->actor_key = aggregator->actor_oper_aggregator_key; ad_info->partner_key = aggregator->partner_oper_aggregator_key; ether_addr_copy(ad_info->partner_system, aggregator->partner_system.mac_addr_value); return 0; } int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; rcu_read_lock(); ret = __bond_3ad_get_active_agg_info(bond, ad_info); rcu_read_unlock(); return ret; } int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct lacpdu *lacpdu, _lacpdu; if (skb->protocol != PKT_TYPE_LACPDU) return RX_HANDLER_ANOTHER; if (!MAC_ADDRESS_EQUAL(eth_hdr(skb)->h_dest, lacpdu_mcast_addr)) return RX_HANDLER_ANOTHER; lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); if (!lacpdu) { atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_illegal_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_illegal_rx); return RX_HANDLER_ANOTHER; } return bond_3ad_rx_indication(lacpdu, slave); } /** * bond_3ad_update_lacp_rate - change the lacp rate * @bond: bonding struct * * When modify lacp_rate parameter via sysfs, * update actor_oper_port_state of each port. * * Hold bond->mode_lock, * so we can modify port->actor_oper_port_state, * no matter bond is up or down. */ void bond_3ad_update_lacp_rate(struct bonding *bond) { struct port *port = NULL; struct list_head *iter; struct slave *slave; int lacp_fast; lacp_fast = bond->params.lacp_fast; spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; else port->actor_oper_port_state &= ~LACP_STATE_LACP_TIMEOUT; } spin_unlock_bh(&bond->mode_lock); } size_t bond_3ad_stats_size(void) { return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_UNKNOWN_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_ILLEGAL_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_TX */ nla_total_size_64bit(sizeof(u64)); /* BOND_3AD_STAT_MARKER_UNKNOWN_RX */ } int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats) { u64 val; val = atomic64_read(&stats->lacpdu_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_illegal_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_ILLEGAL_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; return 0; } |
10 10 10 10 10 10 10 10 10 10 25 26 26 22 18 18 18 5 5 5 5 5 2 2 26 26 26 6 6 2 6 10 14 2 2 16 16 16 16 8 8 8 3 33 1481 21 20 1 21 1 5 8 9 14 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 | // SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * This file contains device methods for creating, using and destroying * virtual HSR or PRP devices. */ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> #include "hsr_device.h" #include "hsr_slave.h" #include "hsr_framereg.h" #include "hsr_main.h" #include "hsr_forward.h" static bool is_admin_up(struct net_device *dev) { return dev && (dev->flags & IFF_UP); } static bool is_slave_up(struct net_device *dev) { return dev && is_admin_up(dev) && netif_oper_up(dev); } static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) { struct net_device *dev = master->dev; if (!is_admin_up(dev)) { netdev_set_operstate(dev, IF_OPER_DOWN); return; } if (has_carrier) netdev_set_operstate(dev, IF_OPER_UP); else netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); } static bool hsr_check_carrier(struct hsr_port *master) { struct hsr_port *port; ASSERT_RTNL(); hsr_for_each_port(master->hsr, port) { if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) { netif_carrier_on(master->dev); return true; } } netif_carrier_off(master->dev); return false; } static void hsr_check_announce(struct net_device *hsr_dev) { struct hsr_priv *hsr; hsr = netdev_priv(hsr_dev); if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) { /* Enable announce timer and start sending supervisory frames */ if (!timer_pending(&hsr->announce_timer)) { hsr->announce_count = 0; mod_timer(&hsr->announce_timer, jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } } else { /* Deactivate the announce timer */ timer_delete(&hsr->announce_timer); } } void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) { struct hsr_port *master; bool has_carrier; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); hsr_check_announce(master->dev); } int hsr_get_max_mtu(struct hsr_priv *hsr) { unsigned int mtu_max; struct hsr_port *port; mtu_max = ETH_DATA_LEN; hsr_for_each_port(hsr, port) if (port->type != HSR_PT_MASTER) mtu_max = min(port->dev->mtu, mtu_max); if (mtu_max < HSR_HLEN) return 0; return mtu_max - HSR_HLEN; } static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) { struct hsr_priv *hsr; hsr = netdev_priv(dev); if (new_mtu > hsr_get_max_mtu(hsr)) { netdev_info(dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", HSR_HLEN); return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); return 0; } static int hsr_dev_open(struct net_device *dev) { struct hsr_priv *hsr; struct hsr_port *port; const char *designation = NULL; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: designation = "Slave A"; break; case HSR_PT_SLAVE_B: designation = "Slave B"; break; case HSR_PT_INTERLINK: designation = "Interlink"; break; default: designation = "Unknown"; } if (!is_slave_up(port->dev)) netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n", designation, port->dev->name); } if (!designation) netdev_warn(dev, "No slave devices configured\n"); return 0; } static int hsr_dev_close(struct net_device *dev) { struct hsr_port *port; struct hsr_priv *hsr; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: case HSR_PT_SLAVE_B: dev_uc_unsync(port->dev, dev); dev_mc_unsync(port->dev, dev); break; default: break; } } return 0; } static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr, netdev_features_t features) { netdev_features_t mask; struct hsr_port *port; mask = features; /* Mask out all features that, if supported by one device, should be * enabled for all devices (see NETIF_F_ONE_FOR_ALL). * * Anything that's off in mask will not be enabled - so only things * that were in features originally, and also is in NETIF_F_ONE_FOR_ALL, * may become enabled. */ features &= ~NETIF_F_ONE_FOR_ALL; hsr_for_each_port(hsr, port) features = netdev_increment_features(features, port->dev->features, mask); return features; } static netdev_features_t hsr_fix_features(struct net_device *dev, netdev_features_t features) { struct hsr_priv *hsr = netdev_priv(dev); return hsr_features_recompute(hsr, features); } static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct hsr_priv *hsr = netdev_priv(dev); struct hsr_port *master; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (master) { skb->dev = master->dev; skb_reset_mac_header(skb); skb_reset_mac_len(skb); spin_lock_bh(&hsr->seqnr_lock); hsr_forward_skb(skb, master); spin_unlock_bh(&hsr->seqnr_lock); } else { dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); } return NETDEV_TX_OK; } static const struct header_ops hsr_header_ops = { .create = eth_header, .parse = eth_header_parse, }; static struct sk_buff *hsr_init_skb(struct hsr_port *master) { struct hsr_priv *hsr = master->hsr; struct sk_buff *skb; int hlen, tlen; hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; /* skb size is same for PRP/HSR frames, only difference * being, for PRP it is a trailer and for HSR it is a * header */ skb = dev_alloc_skb(sizeof(struct hsr_sup_tag) + sizeof(struct hsr_sup_payload) + hlen + tlen); if (!skb) return skb; skb_reserve(skb, hlen); skb->dev = master->dev; skb->priority = TC_PRIO_CONTROL; if (dev_hard_header(skb, skb->dev, ETH_P_PRP, hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); skb_reset_mac_len(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); return skb; out: kfree_skb(skb); return NULL; } static void send_hsr_supervision_frame(struct hsr_port *master, unsigned long *interval) { struct hsr_priv *hsr = master->hsr; __u8 type = HSR_TLV_LIFE_CHECK; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tlv *hsr_stlv; struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); if (hsr->announce_count < 3 && hsr->prot_version == 0) { type = HSR_TLV_ANNOUNCE; *interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); hsr->announce_count++; } skb = hsr_init_skb(master); if (!skb) { netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_bh(&hsr->seqnr_lock); if (hsr->prot_version > 0) { hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; } else { hsr_stag->sequence_nr = htons(hsr->sequence_nr); hsr->sequence_nr++; } hsr_stag->tlv.HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); if (hsr->redbox) { hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv)); hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC; hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload); /* Payload: MacAddressRedBox */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox); } if (skb_put_padto(skb, ETH_ZLEN)) { spin_unlock_bh(&hsr->seqnr_lock); return; } hsr_forward_skb(skb, master); spin_unlock_bh(&hsr->seqnr_lock); return; } static void send_prp_supervision_frame(struct hsr_port *master, unsigned long *interval) { struct hsr_priv *hsr = master->hsr; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; skb = hsr_init_skb(master); if (!skb) { netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0)); /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_bh(&hsr->seqnr_lock); hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload); /* Payload: MacAddressA */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); if (skb_put_padto(skb, ETH_ZLEN)) { spin_unlock_bh(&hsr->seqnr_lock); return; } hsr_forward_skb(skb, master); spin_unlock_bh(&hsr->seqnr_lock); } /* Announce (supervision frame) timer function */ static void hsr_announce(struct timer_list *t) { struct hsr_priv *hsr; struct hsr_port *master; unsigned long interval; hsr = from_timer(hsr, t, announce_timer); rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); hsr->proto_ops->send_sv_frame(master, &interval); if (is_admin_up(master->dev)) mod_timer(&hsr->announce_timer, jiffies + interval); rcu_read_unlock(); } void hsr_del_ports(struct hsr_priv *hsr) { struct hsr_port *port; port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); if (port) hsr_del_port(port); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); if (port) hsr_del_port(port); port = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK); if (port) hsr_del_port(port); port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port) hsr_del_port(port); } static void hsr_set_rx_mode(struct net_device *dev) { struct hsr_port *port; struct hsr_priv *hsr; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: case HSR_PT_SLAVE_B: dev_mc_sync_multiple(port->dev, dev); dev_uc_sync_multiple(port->dev, dev); break; default: break; } } } static void hsr_change_rx_flags(struct net_device *dev, int change) { struct hsr_port *port; struct hsr_priv *hsr; hsr = netdev_priv(dev); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { case HSR_PT_SLAVE_A: case HSR_PT_SLAVE_B: if (change & IFF_ALLMULTI) dev_set_allmulti(port->dev, dev->flags & IFF_ALLMULTI ? 1 : -1); break; default: break; } } } static const struct net_device_ops hsr_device_ops = { .ndo_change_mtu = hsr_dev_change_mtu, .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, .ndo_change_rx_flags = hsr_change_rx_flags, .ndo_fix_features = hsr_fix_features, .ndo_set_rx_mode = hsr_set_rx_mode, }; static const struct device_type hsr_type = { .name = "hsr", }; static struct hsr_proto_ops hsr_ops = { .send_sv_frame = send_hsr_supervision_frame, .create_tagged_frame = hsr_create_tagged_frame, .get_untagged_frame = hsr_get_untagged_frame, .drop_frame = hsr_drop_frame, .fill_frame_info = hsr_fill_frame_info, .invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame, }; static struct hsr_proto_ops prp_ops = { .send_sv_frame = send_prp_supervision_frame, .create_tagged_frame = prp_create_tagged_frame, .get_untagged_frame = prp_get_untagged_frame, .drop_frame = prp_drop_frame, .fill_frame_info = prp_fill_frame_info, .handle_san_frame = prp_handle_san_frame, .update_san_info = prp_update_san_info, }; void hsr_dev_setup(struct net_device *dev) { eth_hw_addr_random(dev); ether_setup(dev); dev->min_mtu = 0; dev->header_ops = &hsr_header_ops; dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; dev->needs_free_netdev = true; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX; dev->features = dev->hw_features; /* Prevent recursive tx locking */ dev->features |= NETIF_F_LLTX; /* VLAN on top of HSR needs testing and probably some work on * hsr_header_create() etc. */ dev->features |= NETIF_F_VLAN_CHALLENGED; /* Not sure about this. Taken from bridge code. netdev_features.h says * it means "Does not change network namespaces". */ dev->features |= NETIF_F_NETNS_LOCAL; } /* Return true if dev is a HSR master; return false otherwise. */ bool is_hsr_master(struct net_device *dev) { return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit); } EXPORT_SYMBOL(is_hsr_master); /* Default multicast address for HSR Supervision frames */ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00 }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], struct net_device *interlink, unsigned char multicast_spec, u8 protocol_version, struct netlink_ext_ack *extack) { bool unregister = false; struct hsr_priv *hsr; int res; hsr = netdev_priv(hsr_dev); INIT_LIST_HEAD(&hsr->ports); INIT_LIST_HEAD(&hsr->node_db); INIT_LIST_HEAD(&hsr->proxy_node_db); spin_lock_init(&hsr->list_lock); eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); /* initialize protocol specific functions */ if (protocol_version == PRP_V1) { /* For PRP, lan_id has most significant 3 bits holding * the net_id of PRP_LAN_ID */ hsr->net_id = PRP_LAN_ID << 1; hsr->proto_ops = &prp_ops; } else { hsr->proto_ops = &hsr_ops; } /* Make sure we recognize frames from ourselves in hsr_rcv() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); if (res < 0) return res; spin_lock_init(&hsr->seqnr_lock); /* Overflow soon to find bugs easier: */ hsr->sequence_nr = HSR_SEQNR_START; hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; hsr->interlink_sequence_nr = HSR_SEQNR_START; timer_setup(&hsr->announce_timer, hsr_announce, 0); timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0); ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; hsr->prot_version = protocol_version; /* Make sure the 1st call to netif_carrier_on() gets through */ netif_carrier_off(hsr_dev); res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER, extack); if (res) goto err_add_master; /* HSR forwarding offload supported in lower device? */ if ((slave[0]->features & NETIF_F_HW_HSR_FWD) && (slave[1]->features & NETIF_F_HW_HSR_FWD)) hsr->fwd_offloaded = true; res = register_netdevice(hsr_dev); if (res) goto err_unregister; unregister = true; res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack); if (res) goto err_unregister; res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack); if (res) goto err_unregister; if (interlink) { res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack); if (res) goto err_unregister; hsr->redbox = true; ether_addr_copy(hsr->macaddress_redbox, interlink->dev_addr); mod_timer(&hsr->prune_proxy_timer, jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD)); } hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); return 0; err_unregister: hsr_del_ports(hsr); err_add_master: hsr_del_self_node(hsr); if (unregister) unregister_netdevice(hsr_dev); return res; } |
2 7 2 1 1 1 1 1 1 1 1 2 2 2 5 5 5 1 2 2 2 1 1 2 2 3 3 8 1 1 2 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 | // SPDX-License-Identifier: GPL-2.0-or-later /* * taskstats.c - Export per-task statistics to userland * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 * (C) Balbir Singh, IBM Corp. 2006 */ #include <linux/kernel.h> #include <linux/taskstats_kern.h> #include <linux/tsacct_kern.h> #include <linux/acct.h> #include <linux/delayacct.h> #include <linux/cpumask.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/cgroupstats.h> #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pid_namespace.h> #include <net/genetlink.h> #include <linux/atomic.h> #include <linux/sched/cputime.h> /* * Maximum length of a cpumask that can be specified in * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute */ #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; static const struct nla_policy cgroupstats_cmd_get_policy[] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; struct listener { struct list_head list; pid_t pid; char valid; }; struct listener_list { struct rw_semaphore sem; struct list_head list; }; static DEFINE_PER_CPU(struct listener_list, listener_array); enum actions { REGISTER, DEREGISTER, CPU_DONT_CARE }; static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, size_t size) { struct sk_buff *skb; void *reply; /* * If new attributes are added, please revisit this allocation */ skb = genlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; if (!info) { int seq = this_cpu_inc_return(taskstats_seqnum) - 1; reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); } else reply = genlmsg_put_reply(skb, info, &family, 0, cmd); if (reply == NULL) { nlmsg_free(skb); return -EINVAL; } *skbp = skb; return 0; } /* * Send taskstats data in @skb to listener with nl_pid @pid */ static int send_reply(struct sk_buff *skb, struct genl_info *info) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); void *reply = genlmsg_data(genlhdr); genlmsg_end(skb, reply); return genlmsg_reply(skb, info); } /* * Send taskstats data in @skb to listeners registered for @cpu's exit data */ static void send_cpu_listeners(struct sk_buff *skb, struct listener_list *listeners) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); int delcount = 0; genlmsg_end(skb, reply); down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { int rc; skb_next = NULL; if (!list_is_last(&s->list, &listeners->list)) { skb_next = skb_clone(skb_cur, GFP_KERNEL); if (!skb_next) break; } rc = genlmsg_unicast(&init_net, skb_cur, s->pid); if (rc == -ECONNREFUSED) { s->valid = 0; delcount++; } skb_cur = skb_next; } up_read(&listeners->sem); if (skb_cur) nlmsg_free(skb_cur); if (!delcount) return; /* Delete invalidated entries */ down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (!s->valid) { list_del(&s->list); kfree(s); } } up_write(&listeners->sem); } static void exe_add_tsk(struct taskstats *stats, struct task_struct *tsk) { /* No idea if I'm allowed to access that here, now. */ struct file *exe_file = get_task_exe_file(tsk); if (exe_file) { /* Following cp_new_stat64() in stat.c . */ stats->ac_exe_dev = huge_encode_dev(exe_file->f_inode->i_sb->s_dev); stats->ac_exe_inode = exe_file->f_inode->i_ino; fput(exe_file); } else { stats->ac_exe_dev = 0; stats->ac_exe_inode = 0; } } static void fill_stats(struct user_namespace *user_ns, struct pid_namespace *pid_ns, struct task_struct *tsk, struct taskstats *stats) { memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; stats->nvcsw = tsk->nvcsw; stats->nivcsw = tsk->nivcsw; bacct_add_tsk(user_ns, pid_ns, stats, tsk); /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); /* add executable info */ exe_add_tsk(stats, tsk); } static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) { struct task_struct *tsk; tsk = find_get_task_by_vpid(pid); if (!tsk) return -ESRCH; fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); put_task_struct(tsk); return 0; } static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) { struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; u64 delta, utime, stime; u64 start_time; /* * Add additional stats from live tasks except zombie thread group * leaders who are already counted with the dead tasks */ rcu_read_lock(); first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; if (first->signal->stats) memcpy(stats, first->signal->stats, sizeof(*stats)); else memset(stats, 0, sizeof(*stats)); start_time = ktime_get_ns(); for_each_thread(first, tsk) { if (tsk->exit_state) continue; /* * Accounting subsystem can call its functions here to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); /* calculate task elapsed time in nsec */ delta = start_time - tsk->start_time; /* Convert to micro seconds */ do_div(delta, NSEC_PER_USEC); stats->ac_etime += delta; task_cputime(tsk, &utime, &stime); stats->ac_utime += div_u64(utime, NSEC_PER_USEC); stats->ac_stime += div_u64(stime, NSEC_PER_USEC); stats->nvcsw += tsk->nvcsw; stats->nivcsw += tsk->nivcsw; } unlock_task_sighand(first, &flags); rc = 0; out: rcu_read_unlock(); stats->version = TASKSTATS_VERSION; /* * Accounting subsystems can also add calls here to modify * fields of taskstats. */ return rc; } static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; spin_lock_irqsave(&tsk->sighand->siglock, flags); if (!tsk->signal->stats) goto ret; /* * Each accounting subsystem calls its functions here to * accumalate its per-task stats for tsk, into the per-tgid structure * * per-task-foo(tsk->signal->stats, tsk); */ delayacct_add_tsk(tsk->signal->stats, tsk); ret: spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return; } static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) { struct listener_list *listeners; struct listener *s, *tmp, *s2; unsigned int cpu; int ret = 0; if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; if (current_user_ns() != &init_user_ns) return -EINVAL; if (task_active_pid_ns(current) != &init_pid_ns) return -EINVAL; if (isadd == REGISTER) { for_each_cpu(cpu, mask) { s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, cpu_to_node(cpu)); if (!s) { ret = -ENOMEM; goto cleanup; } s->pid = pid; s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry(s2, &listeners->list, list) { if (s2->pid == pid && s2->valid) goto exists; } list_add(&s->list, &listeners->list); s = NULL; exists: up_write(&listeners->sem); kfree(s); /* nop if NULL */ } return 0; } /* Deregister or cleanup */ cleanup: for_each_cpu(cpu, mask) { listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (s->pid == pid) { list_del(&s->list); kfree(s); break; } } up_write(&listeners->sem); } return ret; } static int parse(struct nlattr *na, struct cpumask *mask) { char *data; int len; int ret; if (na == NULL) return 1; len = nla_len(na); if (len > TASKSTATS_CPUMASK_MAXLEN) return -E2BIG; if (len < 1) return -EINVAL; data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; nla_strscpy(data, na, len); ret = cpulist_parse(data, mask); kfree(data); return ret; } static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; int aggr; aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; na = nla_nest_start_noflag(skb, aggr); if (!na) goto err; if (nla_put(skb, type, sizeof(pid), &pid) < 0) { nla_nest_cancel(skb, na); goto err; } ret = nla_reserve_64bit(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats), TASKSTATS_TYPE_NULL); if (!ret) { nla_nest_cancel(skb, na); goto err; } nla_nest_end(skb, na); return nla_data(ret); err: return NULL; } static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { int rc = 0; struct sk_buff *rep_skb; struct cgroupstats *stats; struct nlattr *na; size_t size; u32 fd; struct fd f; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); f = fdget(fd); if (!f.file) return 0; size = nla_total_size(sizeof(struct cgroupstats)); rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) goto err; na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); if (na == NULL) { nlmsg_free(rep_skb); rc = -EMSGSIZE; goto err; } stats = nla_data(na); memset(stats, 0, sizeof(*stats)); rc = cgroupstats_build(stats, f.file->f_path.dentry); if (rc < 0) { nlmsg_free(rep_skb); goto err; } rc = send_reply(rep_skb, info); err: fdput(f); return rc; } static int cmd_attr_register_cpumask(struct genl_info *info) { cpumask_var_t mask; int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) goto out; rc = add_del_listener(info->snd_portid, mask, REGISTER); out: free_cpumask_var(mask); return rc; } static int cmd_attr_deregister_cpumask(struct genl_info *info) { cpumask_var_t mask; int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) goto out; rc = add_del_listener(info->snd_portid, mask, DEREGISTER); out: free_cpumask_var(mask); return rc; } static size_t taskstats_packet_size(void) { size_t size; size = nla_total_size(sizeof(u32)) + nla_total_size_64bit(sizeof(struct taskstats)) + nla_total_size(0); return size; } static int cmd_attr_pid(struct genl_info *info) { struct taskstats *stats; struct sk_buff *rep_skb; size_t size; u32 pid; int rc; size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; rc = -EINVAL; pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); if (!stats) goto err; rc = fill_stats_for_pid(pid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } static int cmd_attr_tgid(struct genl_info *info) { struct taskstats *stats; struct sk_buff *rep_skb; size_t size; u32 tgid; int rc; size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; rc = -EINVAL; tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); if (!stats) goto err; rc = fill_stats_for_tgid(tgid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) return cmd_attr_register_cpumask(info); else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) return cmd_attr_deregister_cpumask(info); else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) return cmd_attr_pid(info); else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) return cmd_attr_tgid(info); else return -EINVAL; } static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; struct taskstats *stats_new, *stats; /* Pairs with smp_store_release() below. */ stats = smp_load_acquire(&sig->stats); if (stats || thread_group_empty(tsk)) return stats; /* No problem if kmem_cache_zalloc() fails */ stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); stats = sig->stats; if (!stats) { /* * Pairs with smp_store_release() above and order the * kmem_cache_zalloc(). */ smp_store_release(&sig->stats, stats_new); stats = stats_new; stats_new = NULL; } spin_unlock_irq(&tsk->sighand->siglock); if (stats_new) kmem_cache_free(taskstats_cache, stats_new); return stats; } /* Send pid data out on exit */ void taskstats_exit(struct task_struct *tsk, int group_dead) { int rc; struct listener_list *listeners; struct taskstats *stats; struct sk_buff *rep_skb; size_t size; int is_thread_group; if (!family_registered) return; /* * Size includes space for nested attributes */ size = taskstats_packet_size(); is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { /* PID + STATS + TGID + STATS */ size = 2 * size; /* fill the tsk->signal->stats structure */ fill_tgid_exit(tsk); } listeners = raw_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return; stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, task_pid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); if (group_dead) stats->ac_flag |= AGROUP; /* * Doesn't matter if tsk is the leader or the last group member leaving */ if (!is_thread_group || !group_dead) goto send; stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, task_tgid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; memcpy(stats, tsk->signal->stats, sizeof(*stats)); send: send_cpu_listeners(rep_skb, listeners); return; err: nlmsg_free(rep_skb); } static const struct genl_ops taskstats_ops[] = { { .cmd = TASKSTATS_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = taskstats_user_cmd, .policy = taskstats_cmd_get_policy, .maxattr = ARRAY_SIZE(taskstats_cmd_get_policy) - 1, .flags = GENL_ADMIN_PERM, }, { .cmd = CGROUPSTATS_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = cgroupstats_user_cmd, .policy = cgroupstats_cmd_get_policy, .maxattr = ARRAY_SIZE(cgroupstats_cmd_get_policy) - 1, }, }; static struct genl_family family __ro_after_init = { .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, .module = THIS_MODULE, .ops = taskstats_ops, .n_ops = ARRAY_SIZE(taskstats_ops), .resv_start_op = CGROUPSTATS_CMD_GET + 1, .netnsok = true, }; /* Needed early in initialization */ void __init taskstats_init_early(void) { unsigned int i; taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); for_each_possible_cpu(i) { INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); init_rwsem(&(per_cpu(listener_array, i).sem)); } } static int __init taskstats_init(void) { int rc; rc = genl_register_family(&family); if (rc) return rc; family_registered = 1; pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); return 0; } /* * late initcall ensures initialization of statistics collection * mechanisms precedes initialization of the taskstats interface */ late_initcall(taskstats_init); |
2 2 3 1 2 1 2 2 3 1 3 2 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <linux/xattr.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "xattr.h" struct io_xattr { struct file *file; struct xattr_ctx ctx; struct filename *filename; }; void io_xattr_cleanup(struct io_kiocb *req) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); if (ix->filename) putname(ix->filename); kfree(ix->ctx.kname); kvfree(ix->ctx.kvalue); } static void io_xattr_finish(struct io_kiocb *req, int ret) { req->flags &= ~REQ_F_NEED_CLEANUP; io_xattr_cleanup(req); io_req_set_res(req, ret, 0); } static int __io_getxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); const char __user *name; int ret; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; ix->filename = NULL; ix->ctx.kvalue = NULL; name = u64_to_user_ptr(READ_ONCE(sqe->addr)); ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ix->ctx.size = READ_ONCE(sqe->len); ix->ctx.flags = READ_ONCE(sqe->xattr_flags); if (ix->ctx.flags) return -EINVAL; ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); if (!ix->ctx.kname) return -ENOMEM; ret = strncpy_from_user(ix->ctx.kname->name, name, sizeof(ix->ctx.kname->name)); if (!ret || ret == sizeof(ix->ctx.kname->name)) ret = -ERANGE; if (ret < 0) { kfree(ix->ctx.kname); return ret; } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_fgetxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return __io_getxattr_prep(req, sqe); } int io_getxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); const char __user *path; int ret; ret = __io_getxattr_prep(req, sqe); if (ret) return ret; path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); if (IS_ERR(ix->filename)) { ret = PTR_ERR(ix->filename); ix->filename = NULL; } return ret; } int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_getxattr(file_mnt_idmap(req->file), req->file->f_path.dentry, &ix->ctx); io_xattr_finish(req, ret); return IOU_OK; } int io_getxattr(struct io_kiocb *req, unsigned int issue_flags) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); retry: ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); if (!ret) { ret = do_getxattr(mnt_idmap(path.mnt), path.dentry, &ix->ctx); path_put(&path); if (retry_estale(ret, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } } io_xattr_finish(req, ret); return IOU_OK; } static int __io_setxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); const char __user *name; int ret; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; ix->filename = NULL; name = u64_to_user_ptr(READ_ONCE(sqe->addr)); ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ix->ctx.kvalue = NULL; ix->ctx.size = READ_ONCE(sqe->len); ix->ctx.flags = READ_ONCE(sqe->xattr_flags); ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); if (!ix->ctx.kname) return -ENOMEM; ret = setxattr_copy(name, &ix->ctx); if (ret) { kfree(ix->ctx.kname); return ret; } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_setxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); const char __user *path; int ret; ret = __io_setxattr_prep(req, sqe); if (ret) return ret; path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); if (IS_ERR(ix->filename)) { ret = PTR_ERR(ix->filename); ix->filename = NULL; } return ret; } int io_fsetxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return __io_setxattr_prep(req, sqe); } static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags, const struct path *path) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); int ret; ret = mnt_want_write(path->mnt); if (!ret) { ret = do_setxattr(mnt_idmap(path->mnt), path->dentry, &ix->ctx); mnt_drop_write(path->mnt); } return ret; } int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags) { int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = __io_setxattr(req, issue_flags, &req->file->f_path); io_xattr_finish(req, ret); return IOU_OK; } int io_setxattr(struct io_kiocb *req, unsigned int issue_flags) { struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr); unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); retry: ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); if (!ret) { ret = __io_setxattr(req, issue_flags, &path); path_put(&path); if (retry_estale(ret, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } } io_xattr_finish(req, ret); return IOU_OK; } |
25 25 20 40 41 12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA timer back-end using hrtimer * Copyright (C) 2008 Takashi Iwai */ #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/hrtimer.h> #include <sound/core.h> #include <sound/timer.h> MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("ALSA hrtimer backend"); MODULE_LICENSE("GPL"); MODULE_ALIAS("snd-timer-" __stringify(SNDRV_TIMER_GLOBAL_HRTIMER)); #define NANO_SEC 1000000000UL /* 10^9 in sec */ static unsigned int resolution; struct snd_hrtimer { struct snd_timer *timer; struct hrtimer hrt; bool in_callback; }; static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt) { struct snd_hrtimer *stime = container_of(hrt, struct snd_hrtimer, hrt); struct snd_timer *t = stime->timer; ktime_t delta; unsigned long ticks; enum hrtimer_restart ret = HRTIMER_NORESTART; scoped_guard(spinlock, &t->lock) { if (!t->running) return HRTIMER_NORESTART; /* fast path */ stime->in_callback = true; ticks = t->sticks; } /* calculate the drift */ delta = ktime_sub(hrt->base->get_time(), hrtimer_get_expires(hrt)); if (delta > 0) ticks += ktime_divns(delta, ticks * resolution); snd_timer_interrupt(stime->timer, ticks); guard(spinlock)(&t->lock); if (t->running) { hrtimer_add_expires_ns(hrt, t->sticks * resolution); ret = HRTIMER_RESTART; } stime->in_callback = false; return ret; } static int snd_hrtimer_open(struct snd_timer *t) { struct snd_hrtimer *stime; stime = kzalloc(sizeof(*stime), GFP_KERNEL); if (!stime) return -ENOMEM; hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stime->timer = t; stime->hrt.function = snd_hrtimer_callback; t->private_data = stime; return 0; } static int snd_hrtimer_close(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; if (stime) { scoped_guard(spinlock_irq, &t->lock) { t->running = 0; /* just to be sure */ stime->in_callback = 1; /* skip start/stop */ } hrtimer_cancel(&stime->hrt); kfree(stime); t->private_data = NULL; } return 0; } static int snd_hrtimer_start(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; if (stime->in_callback) return 0; hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); return 0; } static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; if (stime->in_callback) return 0; hrtimer_try_to_cancel(&stime->hrt); return 0; } static const struct snd_timer_hardware hrtimer_hw __initconst = { .flags = SNDRV_TIMER_HW_AUTO | SNDRV_TIMER_HW_WORK, .open = snd_hrtimer_open, .close = snd_hrtimer_close, .start = snd_hrtimer_start, .stop = snd_hrtimer_stop, }; /* * entry functions */ static struct snd_timer *mytimer; static int __init snd_hrtimer_init(void) { struct snd_timer *timer; int err; resolution = hrtimer_resolution; /* Create a new timer and set up the fields */ err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER, &timer); if (err < 0) return err; timer->module = THIS_MODULE; strcpy(timer->name, "HR timer"); timer->hw = hrtimer_hw; timer->hw.resolution = resolution; timer->hw.ticks = NANO_SEC / resolution; timer->max_instances = 100; /* lower the limit */ err = snd_timer_global_register(timer); if (err < 0) { snd_timer_global_free(timer); return err; } mytimer = timer; /* remember this */ return 0; } static void __exit snd_hrtimer_exit(void) { if (mytimer) { snd_timer_global_free(mytimer); mytimer = NULL; } } module_init(snd_hrtimer_init); module_exit(snd_hrtimer_exit); |
31 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 | // SPDX-License-Identifier: GPL-2.0+ /* * drivers/of/property.c - Procedures for accessing and interpreting * Devicetree properties and graphs. * * Initially created by copying procedures from drivers/of/base.c. This * file contains the OF property as well as the OF graph interface * functions. * * Paul Mackerras August 1996. * Copyright (C) 1996-2005 Paul Mackerras. * * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. * {engebret|bergner}@us.ibm.com * * Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net * * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and * Grant Likely. */ #define pr_fmt(fmt) "OF: " fmt #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_graph.h> #include <linux/of_irq.h> #include <linux/string.h> #include <linux/moduleparam.h> #include "of_private.h" /** * of_graph_is_present() - check graph's presence * @node: pointer to device_node containing graph port * * Return: True if @node has a port or ports (with a port) sub-node, * false otherwise. */ bool of_graph_is_present(const struct device_node *node) { struct device_node *ports, *port; ports = of_get_child_by_name(node, "ports"); if (ports) node = ports; port = of_get_child_by_name(node, "port"); of_node_put(ports); of_node_put(port); return !!port; } EXPORT_SYMBOL(of_graph_is_present); /** * of_property_count_elems_of_size - Count the number of elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @elem_size: size of the individual element * * Search for a property in a device node and count the number of elements of * size elem_size in it. * * Return: The number of elements on sucess, -EINVAL if the property does not * exist or its length does not match a multiple of elem_size and -ENODATA if * the property does not have a value. */ int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; if (prop->length % elem_size != 0) { pr_err("size of %s in node %pOF is not a multiple of %d\n", propname, np, elem_size); return -EINVAL; } return prop->length / elem_size; } EXPORT_SYMBOL_GPL(of_property_count_elems_of_size); /** * of_find_property_value_of_size * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @min: minimum allowed length of property value * @max: maximum allowed length of property value (0 means unlimited) * @len: if !=NULL, actual length is written to here * * Search for a property in a device node and valid the requested size. * * Return: The property value on success, -EINVAL if the property does not * exist, -ENODATA if property does not have a value, and -EOVERFLOW if the * property data is too small or too large. * */ static void *of_find_property_value_of_size(const struct device_node *np, const char *propname, u32 min, u32 max, size_t *len) { struct property *prop = of_find_property(np, propname, NULL); if (!prop) return ERR_PTR(-EINVAL); if (!prop->value) return ERR_PTR(-ENODATA); if (prop->length < min) return ERR_PTR(-EOVERFLOW); if (max && prop->length > max) return ERR_PTR(-EOVERFLOW); if (len) *len = prop->length; return prop->value; } /** * of_property_read_u32_index - Find and read a u32 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u32 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 32-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u32 value can be decoded. */ int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { const u32 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be32_to_cpup(((__be32 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u32_index); /** * of_property_read_u64_index - Find and read a u64 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u64 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64_index(const struct device_node *np, const char *propname, u32 index, u64 *out_value) { const u64 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be64_to_cpup(((__be64 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64_index); /** * of_property_read_variable_u8_array - Find and read an array of u8 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 8-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 8 <0x50 0x60 0x70>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u8 value can be decoded. */ int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const u8 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = *val++; return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u8_array); /** * of_property_read_variable_u16_array - Find and read an array of u16 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 16-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u16 value can be decoded. */ int of_property_read_variable_u16_array(const struct device_node *np, const char *propname, u16 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be16 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be16_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u16_array); /** * of_property_read_variable_u32_array - Find and read an array of 32 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 32-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u32 value can be decoded. */ int of_property_read_variable_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be32_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u32_array); /** * of_property_read_u64 - Find and read a 64 bit integer from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_value: pointer to return value, modified only if return value is 0. * * Search for a property in a device node and read a 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { const __be32 *val = of_find_property_value_of_size(np, propname, sizeof(*out_value), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = of_read_number(val, 2); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64); /** * of_property_read_variable_u64_array - Find and read an array of 64 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 64-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u64 value can be decoded. */ int of_property_read_variable_u64_array(const struct device_node *np, const char *propname, u64 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) { *out_values++ = of_read_number(val, 2); val += 2; } return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u64_array); /** * of_property_read_string - Find and read a string from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_string: pointer to null terminated return string, modified only if * return value is 0. * * Search for a property in a device tree node and retrieve a null * terminated string value (pointer to data, not a copy). * * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if * property does not have a value, and -EILSEQ if the string is not * null-terminated within the length of the property data. * * Note that the empty string "" has length of 1, thus -ENODATA cannot * be interpreted as an empty string. * * The out_string pointer is modified only if a valid string can be decoded. */ int of_property_read_string(const struct device_node *np, const char *propname, const char **out_string) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->length) return -ENODATA; if (strnlen(prop->value, prop->length) >= prop->length) return -EILSEQ; *out_string = prop->value; return 0; } EXPORT_SYMBOL_GPL(of_property_read_string); /** * of_property_match_string() - Find string in a list and return index * @np: pointer to node containing string list property * @propname: string list property name * @string: pointer to string to search for in string list * * This function searches a string list property and returns the index * of a specific string value. */ int of_property_match_string(const struct device_node *np, const char *propname, const char *string) { const struct property *prop = of_find_property(np, propname, NULL); size_t l; int i; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end; i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; pr_debug("comparing %s with %s\n", string, p); if (strcmp(string, p) == 0) return i; /* Found it; return index */ } return -ENODATA; } EXPORT_SYMBOL_GPL(of_property_match_string); /** * of_property_read_string_helper() - Utility helper for parsing string properties * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_strs: output array of string pointers. * @sz: number of array elements to read. * @skip: Number of strings to skip over at beginning of list. * * Don't call this function directly. It is a utility helper for the * of_property_read_string*() family of functions. */ int of_property_read_string_helper(const struct device_node *np, const char *propname, const char **out_strs, size_t sz, int skip) { const struct property *prop = of_find_property(np, propname, NULL); int l = 0, i = 0; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; if (out_strs && i >= skip) *out_strs++ = p; } i -= skip; return i <= 0 ? -ENODATA : i; } EXPORT_SYMBOL_GPL(of_property_read_string_helper); const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, u32 *pu) { const void *curv = cur; if (!prop) return NULL; if (!cur) { curv = prop->value; goto out_val; } curv += sizeof(*cur); if (curv >= prop->value + prop->length) return NULL; out_val: *pu = be32_to_cpup(curv); return curv; } EXPORT_SYMBOL_GPL(of_prop_next_u32); const char *of_prop_next_string(struct property *prop, const char *cur) { const void *curv = cur; if (!prop) return NULL; if (!cur) return prop->value; curv += strlen(cur) + 1; if (curv >= prop->value + prop->length) return NULL; return curv; } EXPORT_SYMBOL_GPL(of_prop_next_string); /** * of_graph_parse_endpoint() - parse common endpoint node properties * @node: pointer to endpoint device_node * @endpoint: pointer to the OF endpoint data structure * * The caller should hold a reference to @node. */ int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint) { struct device_node *port_node = of_get_parent(node); WARN_ONCE(!port_node, "%s(): endpoint %pOF has no parent node\n", __func__, node); memset(endpoint, 0, sizeof(*endpoint)); endpoint->local_node = node; /* * It doesn't matter whether the two calls below succeed. * If they don't then the default value 0 is used. */ of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); of_node_put(port_node); return 0; } EXPORT_SYMBOL(of_graph_parse_endpoint); /** * of_graph_get_port_by_id() - get the port matching a given id * @parent: pointer to the parent device node * @id: id of the port * * Return: A 'port' node pointer with refcount incremented. The caller * has to use of_node_put() on it when done. */ struct device_node *of_graph_get_port_by_id(struct device_node *parent, u32 id) { struct device_node *node, *port; node = of_get_child_by_name(parent, "ports"); if (node) parent = node; for_each_child_of_node(parent, port) { u32 port_id = 0; if (!of_node_name_eq(port, "port")) continue; of_property_read_u32(port, "reg", &port_id); if (id == port_id) break; } of_node_put(node); return port; } EXPORT_SYMBOL(of_graph_get_port_by_id); /** * of_graph_get_next_endpoint() - get next endpoint node * @parent: pointer to the parent device node * @prev: previous endpoint node, or NULL to get first * * Return: An 'endpoint' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *prev) { struct device_node *endpoint; struct device_node *port; if (!parent) return NULL; /* * Start by locating the port node. If no previous endpoint is specified * search for the first port node, otherwise get the previous endpoint * parent port node. */ if (!prev) { struct device_node *node; node = of_get_child_by_name(parent, "ports"); if (node) parent = node; port = of_get_child_by_name(parent, "port"); of_node_put(node); if (!port) { pr_debug("graph: no port node found in %pOF\n", parent); return NULL; } } else { port = of_get_parent(prev); if (WARN_ONCE(!port, "%s(): endpoint %pOF has no parent node\n", __func__, prev)) return NULL; } while (1) { /* * Now that we have a port node, get the next endpoint by * getting the next child. If the previous endpoint is NULL this * will return the first child. */ endpoint = of_get_next_child(port, prev); if (endpoint) { of_node_put(port); return endpoint; } /* No more endpoints under this port, try the next one. */ prev = NULL; do { port = of_get_next_child(parent, port); if (!port) return NULL; } while (!of_node_name_eq(port, "port")); } } EXPORT_SYMBOL(of_graph_get_next_endpoint); /** * of_graph_get_endpoint_by_regs() - get endpoint node of specific identifiers * @parent: pointer to the parent device node * @port_reg: identifier (value of reg property) of the parent port node * @reg: identifier (value of reg property) of the endpoint node * * Return: An 'endpoint' node pointer which is identified by reg and at the same * is the child of a port node identified by port_reg. reg and port_reg are * ignored when they are -1. Use of_node_put() on the pointer when done. */ struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { struct of_endpoint endpoint; struct device_node *node = NULL; for_each_endpoint_of_node(parent, node) { of_graph_parse_endpoint(node, &endpoint); if (((port_reg == -1) || (endpoint.port == port_reg)) && ((reg == -1) || (endpoint.id == reg))) return node; } return NULL; } EXPORT_SYMBOL(of_graph_get_endpoint_by_regs); /** * of_graph_get_remote_endpoint() - get remote endpoint node * @node: pointer to a local endpoint device_node * * Return: Remote endpoint node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_endpoint(const struct device_node *node) { /* Get remote endpoint node. */ return of_parse_phandle(node, "remote-endpoint", 0); } EXPORT_SYMBOL(of_graph_get_remote_endpoint); /** * of_graph_get_port_parent() - get port's parent node * @node: pointer to a local endpoint device_node * * Return: device node associated with endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_port_parent(struct device_node *node) { unsigned int depth; if (!node) return NULL; /* * Preserve usecount for passed in node as of_get_next_parent() * will do of_node_put() on it. */ of_node_get(node); /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); if (depth == 2 && !of_node_name_eq(node, "ports") && !of_node_name_eq(node, "in-ports") && !of_node_name_eq(node, "out-ports")) break; } return node; } EXPORT_SYMBOL(of_graph_get_port_parent); /** * of_graph_get_remote_port_parent() - get remote port's parent node * @node: pointer to a local endpoint device_node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port_parent( const struct device_node *node) { struct device_node *np, *pp; /* Get remote endpoint node. */ np = of_graph_get_remote_endpoint(node); pp = of_graph_get_port_parent(np); of_node_put(np); return pp; } EXPORT_SYMBOL(of_graph_get_remote_port_parent); /** * of_graph_get_remote_port() - get remote port node * @node: pointer to a local endpoint device_node * * Return: Remote port node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port(const struct device_node *node) { struct device_node *np; /* Get remote endpoint node. */ np = of_graph_get_remote_endpoint(node); if (!np) return NULL; return of_get_next_parent(np); } EXPORT_SYMBOL(of_graph_get_remote_port); /** * of_graph_get_endpoint_count() - get the number of endpoints in a device node * @np: parent device node containing ports and endpoints * * Return: count of endpoint of this device node */ unsigned int of_graph_get_endpoint_count(const struct device_node *np) { struct device_node *endpoint; unsigned int num = 0; for_each_endpoint_of_node(np, endpoint) num++; return num; } EXPORT_SYMBOL(of_graph_get_endpoint_count); /** * of_graph_get_remote_node() - get remote parent device_node for given port/endpoint * @node: pointer to parent device_node containing graph port/endpoint * @port: identifier (value of reg property) of the parent port node * @endpoint: identifier (value of reg property) of the endpoint node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_node(const struct device_node *node, u32 port, u32 endpoint) { struct device_node *endpoint_node, *remote; endpoint_node = of_graph_get_endpoint_by_regs(node, port, endpoint); if (!endpoint_node) { pr_debug("no valid endpoint (%d, %d) for node %pOF\n", port, endpoint, node); return NULL; } remote = of_graph_get_remote_port_parent(endpoint_node); of_node_put(endpoint_node); if (!remote) { pr_debug("no valid remote node\n"); return NULL; } if (!of_device_is_available(remote)) { pr_debug("not available for remote node\n"); of_node_put(remote); return NULL; } return remote; } EXPORT_SYMBOL(of_graph_get_remote_node); static struct fwnode_handle *of_fwnode_get(struct fwnode_handle *fwnode) { return of_fwnode_handle(of_node_get(to_of_node(fwnode))); } static void of_fwnode_put(struct fwnode_handle *fwnode) { of_node_put(to_of_node(fwnode)); } static bool of_fwnode_device_is_available(const struct fwnode_handle *fwnode) { return of_device_is_available(to_of_node(fwnode)); } static bool of_fwnode_device_dma_supported(const struct fwnode_handle *fwnode) { return true; } static enum dev_dma_attr of_fwnode_device_get_dma_attr(const struct fwnode_handle *fwnode) { if (of_dma_is_coherent(to_of_node(fwnode))) return DEV_DMA_COHERENT; else return DEV_DMA_NON_COHERENT; } static bool of_fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname) { return of_property_read_bool(to_of_node(fwnode), propname); } static int of_fwnode_property_read_int_array(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, size_t nval) { const struct device_node *node = to_of_node(fwnode); if (!val) return of_property_count_elems_of_size(node, propname, elem_size); switch (elem_size) { case sizeof(u8): return of_property_read_u8_array(node, propname, val, nval); case sizeof(u16): return of_property_read_u16_array(node, propname, val, nval); case sizeof(u32): return of_property_read_u32_array(node, propname, val, nval); case sizeof(u64): return of_property_read_u64_array(node, propname, val, nval); } return -ENXIO; } static int of_fwnode_property_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval) { const struct device_node *node = to_of_node(fwnode); return val ? of_property_read_string_array(node, propname, val, nval) : of_property_count_strings(node, propname); } static const char *of_fwnode_get_name(const struct fwnode_handle *fwnode) { return kbasename(to_of_node(fwnode)->full_name); } static const char *of_fwnode_get_name_prefix(const struct fwnode_handle *fwnode) { /* Root needs no prefix here (its name is "/"). */ if (!to_of_node(fwnode)->parent) return ""; return "/"; } static struct fwnode_handle * of_fwnode_get_parent(const struct fwnode_handle *fwnode) { return of_fwnode_handle(of_get_parent(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return of_fwnode_handle(of_get_next_available_child(to_of_node(fwnode), to_of_node(child))); } static struct fwnode_handle * of_fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname) { const struct device_node *node = to_of_node(fwnode); struct device_node *child; for_each_available_child_of_node(node, child) if (of_node_name_eq(child, childname)) return of_fwnode_handle(child); return NULL; } static int of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args) { struct of_phandle_args of_args; unsigned int i; int ret; if (nargs_prop) ret = of_parse_phandle_with_args(to_of_node(fwnode), prop, nargs_prop, index, &of_args); else ret = of_parse_phandle_with_fixed_args(to_of_node(fwnode), prop, nargs, index, &of_args); if (ret < 0) return ret; if (!args) { of_node_put(of_args.np); return 0; } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); for (i = 0; i < NR_FWNODE_REFERENCE_ARGS; i++) args->args[i] = i < of_args.args_count ? of_args.args[i] : 0; return 0; } static struct fwnode_handle * of_fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return of_fwnode_handle(of_graph_get_next_endpoint(to_of_node(fwnode), to_of_node(prev))); } static struct fwnode_handle * of_fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode) { return of_fwnode_handle( of_graph_get_remote_endpoint(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode) { struct device_node *np; /* Get the parent of the port */ np = of_get_parent(to_of_node(fwnode)); if (!np) return NULL; /* Is this the "ports" node? If not, it's the port parent. */ if (!of_node_name_eq(np, "ports")) return of_fwnode_handle(np); return of_fwnode_handle(of_get_next_parent(np)); } static int of_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint) { const struct device_node *node = to_of_node(fwnode); struct device_node *port_node = of_get_parent(node); endpoint->local_fwnode = fwnode; of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); of_node_put(port_node); return 0; } static const void * of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode, const struct device *dev) { return of_device_get_match_data(dev); } static void of_link_to_phandle(struct device_node *con_np, struct device_node *sup_np, u8 flags) { struct device_node *tmp_np = of_node_get(sup_np); /* Check that sup_np and its ancestors are available. */ while (tmp_np) { if (of_fwnode_handle(tmp_np)->dev) { of_node_put(tmp_np); break; } if (!of_device_is_available(tmp_np)) { of_node_put(tmp_np); return; } tmp_np = of_get_next_parent(tmp_np); } fwnode_link_add(of_fwnode_handle(con_np), of_fwnode_handle(sup_np), flags); } /** * parse_prop_cells - Property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @list_name: Property name that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed name * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_prop_cells(struct device_node *np, const char *prop_name, int index, const char *list_name, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp(prop_name, list_name)) return NULL; if (__of_parse_phandle_with_args(np, list_name, cells_name, 0, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SIMPLE_PROP(fname, name, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_prop_cells(np, prop_name, index, name, cells); \ } static int strcmp_suffix(const char *str, const char *suffix) { unsigned int len, suffix_len; len = strlen(str); suffix_len = strlen(suffix); if (len <= suffix_len) return -1; return strcmp(str + len - suffix_len, suffix); } /** * parse_suffix_prop_cells - Suffix property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @suffix: Property suffix that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed suffix * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_suffix_prop_cells(struct device_node *np, const char *prop_name, int index, const char *suffix, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp_suffix(prop_name, suffix)) return NULL; if (of_parse_phandle_with_args(np, prop_name, cells_name, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SUFFIX_PROP(fname, suffix, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_suffix_prop_cells(np, prop_name, index, suffix, cells); \ } /** * struct supplier_bindings - Property parsing functions for suppliers * * @parse_prop: function name * parse_prop() finds the node corresponding to a supplier phandle * parse_prop.np: Pointer to device node holding supplier phandle property * parse_prop.prop_name: Name of property holding a phandle value * parse_prop.index: For properties holding a list of phandles, this is the * index into the list * @get_con_dev: If the consumer node containing the property is never converted * to a struct device, implement this ops so fw_devlink can use it * to find the true consumer. * @optional: Describes whether a supplier is mandatory or not * @fwlink_flags: Optional fwnode link flags to use when creating a fwnode link * for this property. * * Returns: * parse_prop() return values are * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ struct supplier_bindings { struct device_node *(*parse_prop)(struct device_node *np, const char *prop_name, int index); struct device_node *(*get_con_dev)(struct device_node *np); bool optional; u8 fwlink_flags; }; DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells") DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells") DEFINE_SIMPLE_PROP(extcon, "extcon", NULL) DEFINE_SIMPLE_PROP(nvmem_cells, "nvmem-cells", "#nvmem-cell-cells") DEFINE_SIMPLE_PROP(phys, "phys", "#phy-cells") DEFINE_SIMPLE_PROP(wakeup_parent, "wakeup-parent", NULL) DEFINE_SIMPLE_PROP(pinctrl0, "pinctrl-0", NULL) DEFINE_SIMPLE_PROP(pinctrl1, "pinctrl-1", NULL) DEFINE_SIMPLE_PROP(pinctrl2, "pinctrl-2", NULL) DEFINE_SIMPLE_PROP(pinctrl3, "pinctrl-3", NULL) DEFINE_SIMPLE_PROP(pinctrl4, "pinctrl-4", NULL) DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) DEFINE_SIMPLE_PROP(panel, "panel", NULL) DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells") DEFINE_SIMPLE_PROP(post_init_providers, "post-init-providers", NULL) DEFINE_SIMPLE_PROP(access_controllers, "access-controllers", "#access-controller-cells") DEFINE_SIMPLE_PROP(pses, "pses", "#pse-cells") DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") static struct device_node *parse_gpios(struct device_node *np, const char *prop_name, int index) { if (!strcmp_suffix(prop_name, ",nr-gpios")) return NULL; return parse_suffix_prop_cells(np, prop_name, index, "-gpios", "#gpio-cells"); } static struct device_node *parse_iommu_maps(struct device_node *np, const char *prop_name, int index) { if (strcmp(prop_name, "iommu-map")) return NULL; return of_parse_phandle(np, prop_name, (index * 4) + 1); } static struct device_node *parse_gpio_compat(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (strcmp(prop_name, "gpio") && strcmp(prop_name, "gpios")) return NULL; /* * Ignore node with gpio-hog property since its gpios are all provided * by its parent. */ if (of_property_read_bool(np, "gpio-hog")) return NULL; if (of_parse_phandle_with_args(np, prop_name, "#gpio-cells", index, &sup_args)) return NULL; return sup_args.np; } static struct device_node *parse_interrupts(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (!IS_ENABLED(CONFIG_OF_IRQ) || IS_ENABLED(CONFIG_PPC)) return NULL; if (strcmp(prop_name, "interrupts") && strcmp(prop_name, "interrupts-extended")) return NULL; return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } static struct device_node *parse_remote_endpoint(struct device_node *np, const char *prop_name, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); } static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, { .parse_prop = parse_iommus, .optional = true, }, { .parse_prop = parse_iommu_maps, .optional = true, }, { .parse_prop = parse_mboxes, }, { .parse_prop = parse_io_channels, }, { .parse_prop = parse_io_backends, }, { .parse_prop = parse_interrupt_parent, }, { .parse_prop = parse_dmas, .optional = true, }, { .parse_prop = parse_power_domains, }, { .parse_prop = parse_hwlocks, }, { .parse_prop = parse_extcon, }, { .parse_prop = parse_nvmem_cells, }, { .parse_prop = parse_phys, }, { .parse_prop = parse_wakeup_parent, }, { .parse_prop = parse_pinctrl0, }, { .parse_prop = parse_pinctrl1, }, { .parse_prop = parse_pinctrl2, }, { .parse_prop = parse_pinctrl3, }, { .parse_prop = parse_pinctrl4, }, { .parse_prop = parse_pinctrl5, }, { .parse_prop = parse_pinctrl6, }, { .parse_prop = parse_pinctrl7, }, { .parse_prop = parse_pinctrl8, }, { .parse_prop = parse_remote_endpoint, .get_con_dev = of_graph_get_port_parent, }, { .parse_prop = parse_pwms, }, { .parse_prop = parse_resets, }, { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, { .parse_prop = parse_panel, }, { .parse_prop = parse_msi_parent, }, { .parse_prop = parse_pses, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_access_controllers, }, { .parse_prop = parse_regulators, }, { .parse_prop = parse_gpio, }, { .parse_prop = parse_gpios, }, { .parse_prop = parse_post_init_providers, .fwlink_flags = FWLINK_FLAG_IGNORE, }, {} }; /** * of_link_property - Create device links to suppliers listed in a property * @con_np: The consumer device tree node which contains the property * @prop_name: Name of property to be parsed * * This function checks if the property @prop_name that is present in the * @con_np device tree node is one of the known common device tree bindings * that list phandles to suppliers. If @prop_name isn't one, this function * doesn't do anything. * * If @prop_name is one, this function attempts to create fwnode links from the * consumer device tree node @con_np to all the suppliers device tree nodes * listed in @prop_name. * * Any failed attempt to create a fwnode link will NOT result in an immediate * return. of_link_property() must create links to all the available supplier * device tree nodes even when attempts to create a link to one or more * suppliers fail. */ static int of_link_property(struct device_node *con_np, const char *prop_name) { struct device_node *phandle; const struct supplier_bindings *s = of_supplier_bindings; unsigned int i = 0; bool matched = false; /* Do not stop at first failed link, link all available suppliers. */ while (!matched && s->parse_prop) { if (s->optional && !fw_devlink_is_strict()) { s++; continue; } while ((phandle = s->parse_prop(con_np, prop_name, i))) { struct device_node *con_dev_np; con_dev_np = s->get_con_dev ? s->get_con_dev(con_np) : of_node_get(con_np); matched = true; i++; of_link_to_phandle(con_dev_np, phandle, s->fwlink_flags); of_node_put(phandle); of_node_put(con_dev_np); } s++; } return 0; } static void __iomem *of_fwnode_iomap(struct fwnode_handle *fwnode, int index) { #ifdef CONFIG_OF_ADDRESS return of_iomap(to_of_node(fwnode), index); #else return NULL; #endif } static int of_fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index) { return of_irq_get(to_of_node(fwnode), index); } static int of_fwnode_add_links(struct fwnode_handle *fwnode) { struct property *p; struct device_node *con_np = to_of_node(fwnode); if (IS_ENABLED(CONFIG_X86)) return 0; if (!con_np) return -EINVAL; for_each_property_of_node(con_np, p) of_link_property(con_np, p->name); return 0; } const struct fwnode_operations of_fwnode_ops = { .get = of_fwnode_get, .put = of_fwnode_put, .device_is_available = of_fwnode_device_is_available, .device_get_match_data = of_fwnode_device_get_match_data, .device_dma_supported = of_fwnode_device_dma_supported, .device_get_dma_attr = of_fwnode_device_get_dma_attr, .property_present = of_fwnode_property_present, .property_read_int_array = of_fwnode_property_read_int_array, .property_read_string_array = of_fwnode_property_read_string_array, .get_name = of_fwnode_get_name, .get_name_prefix = of_fwnode_get_name_prefix, .get_parent = of_fwnode_get_parent, .get_next_child_node = of_fwnode_get_next_child_node, .get_named_child_node = of_fwnode_get_named_child_node, .get_reference_args = of_fwnode_get_reference_args, .graph_get_next_endpoint = of_fwnode_graph_get_next_endpoint, .graph_get_remote_endpoint = of_fwnode_graph_get_remote_endpoint, .graph_get_port_parent = of_fwnode_graph_get_port_parent, .graph_parse_endpoint = of_fwnode_graph_parse_endpoint, .iomap = of_fwnode_iomap, .irq_get = of_fwnode_irq_get, .add_links = of_fwnode_add_links, }; EXPORT_SYMBOL_GPL(of_fwnode_ops); |
16 16 16 2 8 4 17 12 8 5 7 9 11 8 2 2 2 1 2 6 1 15 15 25 25 25 15 1 3 2 2 2 15 15 21 21 6 15 21 16 1 14 1 11 6 38 2 4 2 1 7 21 31 12 4 1 7 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 | // SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * * Authors: * John McCutchan <ttb@tentacle.dhs.org> * Robert Love <rml@novell.com> * * Copyright (C) 2005 John McCutchan * Copyright 2006 Hewlett-Packard Development Company, L.P. * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure */ #include <linux/file.h> #include <linux/fs.h> /* struct inode */ #include <linux/fsnotify_backend.h> #include <linux/idr.h> #include <linux/init.h> /* fs_initcall */ #include <linux/inotify.h> #include <linux/kernel.h> /* roundup() */ #include <linux/namei.h> /* LOOKUP_FOLLOW */ #include <linux/sched/signal.h> #include <linux/slab.h> /* struct kmem_cache */ #include <linux/syscalls.h> #include <linux/types.h> #include <linux/anon_inodes.h> #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/wait.h> #include <linux/memcontrol.h> #include <linux/security.h> #include "inotify.h" #include "../fdinfo.h" #include <asm/ioctls.h> /* * An inotify watch requires allocating an inotify_inode_mark structure as * well as pinning the watched inode. Doubling the size of a VFS inode * should be more than enough to cover the additional filesystem inode * size increase. */ #define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \ 2 * sizeof(struct inode)) /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; struct kmem_cache *inotify_inode_mark_cachep __ro_after_init; #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> static long it_zero = 0; static long it_int_max = INT_MAX; static struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &it_zero, .extra2 = &it_int_max, }, { .procname = "max_user_watches", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &it_zero, .extra2 = &it_int_max, }, { .procname = "max_queued_events", .data = &inotify_max_queued_events, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO }, }; static void __init inotify_sysctls_init(void) { register_sysctl("fs/inotify", inotify_table); } #else #define inotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) { __u32 mask; /* * Everything should receive events when the inode is unmounted. * All directories care about children. */ mask = (FS_UNMOUNT); if (S_ISDIR(inode->i_mode)) mask |= FS_EVENT_ON_CHILD; /* mask off the flags used to open the fd */ mask |= (arg & INOTIFY_USER_MASK); return mask; } #define INOTIFY_MARK_FLAGS \ (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) static inline unsigned int inotify_arg_to_flags(u32 arg) { unsigned int flags = 0; if (arg & IN_EXCL_UNLINK) flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; if (arg & IN_ONESHOT) flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; return flags; } static inline u32 inotify_mask_to_arg(__u32 mask) { return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | IN_Q_OVERFLOW); } /* inotify userspace file descriptor functions */ static __poll_t inotify_poll(struct file *file, poll_table *wait) { struct fsnotify_group *group = file->private_data; __poll_t ret = 0; poll_wait(file, &group->notification_waitq, wait); spin_lock(&group->notification_lock); if (!fsnotify_notify_queue_is_empty(group)) ret = EPOLLIN | EPOLLRDNORM; spin_unlock(&group->notification_lock); return ret; } static int round_event_name_len(struct fsnotify_event *fsn_event) { struct inotify_event_info *event; event = INOTIFY_E(fsn_event); if (!event->name_len) return 0; return roundup(event->name_len + 1, sizeof(struct inotify_event)); } /* * Get an inotify_kernel_event if one exists and is small * enough to fit in "count". Return an error pointer if * not large enough. * * Called with the group->notification_lock held. */ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t count) { size_t event_size = sizeof(struct inotify_event); struct fsnotify_event *event; event = fsnotify_peek_first_event(group); if (!event) return NULL; pr_debug("%s: group=%p event=%p\n", __func__, group, event); event_size += round_event_name_len(event); if (event_size > count) return ERR_PTR(-EINVAL); /* held the notification_lock the whole time, so this is the * same event we peeked above */ fsnotify_remove_first_event(group); return event; } /* * Copy an event to user space, returning how much we copied. * * We already checked that the event size is smaller than the * buffer we had in "get_one_event()" above. */ static ssize_t copy_event_to_user(struct fsnotify_group *group, struct fsnotify_event *fsn_event, char __user *buf) { struct inotify_event inotify_event; struct inotify_event_info *event; size_t event_size = sizeof(struct inotify_event); size_t name_len; size_t pad_name_len; pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); event = INOTIFY_E(fsn_event); name_len = event->name_len; /* * round up name length so it is a multiple of event_size * plus an extra byte for the terminating '\0'. */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; /* send the main event */ if (copy_to_user(buf, &inotify_event, event_size)) return -EFAULT; buf += event_size; /* * fsnotify only stores the pathname, so here we have to send the pathname * and then pad that pathname out to a multiple of sizeof(inotify_event) * with zeros. */ if (pad_name_len) { /* copy the path name */ if (copy_to_user(buf, event->name, name_len)) return -EFAULT; buf += name_len; /* fill userspace with 0's */ if (clear_user(buf, pad_name_len - name_len)) return -EFAULT; event_size += pad_name_len; } return event_size; } static ssize_t inotify_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct fsnotify_group *group; struct fsnotify_event *kevent; char __user *start; int ret; DEFINE_WAIT_FUNC(wait, woken_wake_function); start = buf; group = file->private_data; add_wait_queue(&group->notification_waitq, &wait); while (1) { spin_lock(&group->notification_lock); kevent = get_one_event(group, count); spin_unlock(&group->notification_lock); pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent); if (kevent) { ret = PTR_ERR(kevent); if (IS_ERR(kevent)) break; ret = copy_event_to_user(group, kevent, buf); fsnotify_destroy_event(group, kevent); if (ret < 0) break; buf += ret; count -= ret; continue; } ret = -EAGAIN; if (file->f_flags & O_NONBLOCK) break; ret = -ERESTARTSYS; if (signal_pending(current)) break; if (start != buf) break; wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; } static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; pr_debug("%s: group=%p\n", __func__, group); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_destroy_group(group); return 0; } static long inotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fsnotify_group *group; struct fsnotify_event *fsn_event; void __user *p; int ret = -ENOTTY; size_t send_len = 0; group = file->private_data; p = (void __user *) arg; pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd); switch (cmd) { case FIONREAD: spin_lock(&group->notification_lock); list_for_each_entry(fsn_event, &group->notification_list, list) { send_len += sizeof(struct inotify_event); send_len += round_event_name_len(fsn_event); } spin_unlock(&group->notification_lock); ret = put_user(send_len, (int __user *) p); break; #ifdef CONFIG_CHECKPOINT_RESTORE case INOTIFY_IOC_SETNEXTWD: ret = -EINVAL; if (arg >= 1 && arg <= INT_MAX) { struct inotify_group_private_data *data; data = &group->inotify_data; spin_lock(&data->idr_lock); idr_set_cursor(&data->idr, (unsigned int)arg); spin_unlock(&data->idr_lock); ret = 0; } break; #endif /* CONFIG_CHECKPOINT_RESTORE */ } return ret; } static const struct file_operations inotify_fops = { .show_fdinfo = inotify_show_fdinfo, .poll = inotify_poll, .read = inotify_read, .fasync = fsnotify_fasync, .release = inotify_release, .unlocked_ioctl = inotify_ioctl, .compat_ioctl = inotify_ioctl, .llseek = noop_llseek, }; /* * find_inode - resolve a user-given path to a specific inode */ static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned int flags, __u64 mask) { int error; error = user_path_at(AT_FDCWD, dirname, flags, path); if (error) return error; /* you can only watch an inode if you have read permissions on it */ error = path_permission(path, MAY_READ); if (error) { path_put(path); return error; } error = security_path_notify(path, mask, FSNOTIFY_OBJ_TYPE_INODE); if (error) path_put(path); return error; } static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, struct inotify_inode_mark *i_mark) { int ret; idr_preload(GFP_KERNEL); spin_lock(idr_lock); ret = idr_alloc_cyclic(idr, i_mark, 1, 0, GFP_NOWAIT); if (ret >= 0) { /* we added the mark to the idr, take a reference */ i_mark->wd = ret; fsnotify_get_mark(&i_mark->fsn_mark); } spin_unlock(idr_lock); idr_preload_end(); return ret < 0 ? ret : 0; } static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group, int wd) { struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; struct inotify_inode_mark *i_mark; assert_spin_locked(idr_lock); i_mark = idr_find(idr, wd); if (i_mark) { struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark; fsnotify_get_mark(fsn_mark); /* One ref for being in the idr, one ref we just took */ BUG_ON(refcount_read(&fsn_mark->refcnt) < 2); } return i_mark; } static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group, int wd) { struct inotify_inode_mark *i_mark; spinlock_t *idr_lock = &group->inotify_data.idr_lock; spin_lock(idr_lock); i_mark = inotify_idr_find_locked(group, wd); spin_unlock(idr_lock); return i_mark; } /* * Remove the mark from the idr (if present) and drop the reference * on the mark because it was in the idr. */ static void inotify_remove_from_idr(struct fsnotify_group *group, struct inotify_inode_mark *i_mark) { struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; struct inotify_inode_mark *found_i_mark = NULL; int wd; spin_lock(idr_lock); wd = i_mark->wd; /* * does this i_mark think it is in the idr? we shouldn't get called * if it wasn't.... */ if (wd == -1) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } /* Lets look in the idr to see if we find it */ found_i_mark = inotify_idr_find_locked(group, wd); if (unlikely(!found_i_mark)) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } /* * We found an mark in the idr at the right wd, but it's * not the mark we were told to remove. eparis seriously * fucked up somewhere. */ if (unlikely(found_i_mark != i_mark)) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p " "found_i_mark=%p found_i_mark->wd=%d " "found_i_mark->group=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group, found_i_mark, found_i_mark->wd, found_i_mark->fsn_mark.group); goto out; } /* * One ref for being in the idr * one ref grabbed by inotify_idr_find */ if (unlikely(refcount_read(&i_mark->fsn_mark.refcnt) < 2)) { printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); /* we can't really recover with bad ref cnting.. */ BUG(); } idr_remove(idr, wd); /* Removed from the idr, drop that ref. */ fsnotify_put_mark(&i_mark->fsn_mark); out: i_mark->wd = -1; spin_unlock(idr_lock); /* match the ref taken by inotify_idr_find_locked() */ if (found_i_mark) fsnotify_put_mark(&found_i_mark->fsn_mark); } /* * Send IN_IGNORED for this wd, remove this wd from the idr. */ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { struct inotify_inode_mark *i_mark; /* Queue ignore event for the watch */ inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL, 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ inotify_remove_from_idr(group, i_mark); dec_inotify_watches(group->inotify_data.ucounts); } static int inotify_update_existing_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) { struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; int replace = !(arg & IN_MASK_ADD); int create = (arg & IN_MASK_CREATE); int ret; fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; else if (create) { ret = -EEXIST; goto out; } i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); spin_lock(&fsn_mark->lock); old_mask = fsn_mark->mask; if (replace) { fsn_mark->mask = 0; fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; } fsn_mark->mask |= inotify_arg_to_mask(inode, arg); fsn_mark->flags |= inotify_arg_to_flags(arg); new_mask = fsn_mark->mask; spin_unlock(&fsn_mark->lock); if (old_mask != new_mask) { /* more bits in old than in new? */ int dropped = (old_mask & ~new_mask); /* more bits in this fsn_mark than the inode's mask? */ int do_inode = (new_mask & ~inode->i_fsnotify_mask); /* update the inode with this new fsn_mark */ if (dropped || do_inode) fsnotify_recalc_mask(inode->i_fsnotify_marks); } /* return the wd */ ret = i_mark->wd; out: /* match the get from fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); return ret; } static int inotify_new_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) { struct inotify_inode_mark *tmp_i_mark; int ret; struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) return -ENOMEM; fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); tmp_i_mark->wd = -1; ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); if (ret) goto out_err; /* increment the number of watches the user has */ if (!inc_inotify_watches(group->inotify_data.ucounts)) { inotify_remove_from_idr(group, tmp_i_mark); ret = -ENOSPC; goto out_err; } /* we are on the idr, now get on the inode */ ret = fsnotify_add_inode_mark_locked(&tmp_i_mark->fsn_mark, inode, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); goto out_err; } /* return the watch descriptor for this new mark */ ret = tmp_i_mark->wd; out_err: /* match the ref from fsnotify_init_mark() */ fsnotify_put_mark(&tmp_i_mark->fsn_mark); return ret; } static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) { int ret = 0; fsnotify_group_lock(group); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); fsnotify_group_unlock(group); return ret; } static struct fsnotify_group *inotify_new_group(unsigned int max_events) { struct fsnotify_group *group; struct inotify_event_info *oevent; group = fsnotify_alloc_group(&inotify_fsnotify_ops, FSNOTIFY_GROUP_USER); if (IS_ERR(group)) return group; oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); if (unlikely(!oevent)) { fsnotify_destroy_group(group); return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; fsnotify_init_event(group->overflow_event); oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; group->max_events = max_events; group->memcg = get_mem_cgroup_from_mm(current->mm); spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); group->inotify_data.ucounts = inc_ucount(current_user_ns(), current_euid(), UCOUNT_INOTIFY_INSTANCES); if (!group->inotify_data.ucounts) { fsnotify_destroy_group(group); return ERR_PTR(-EMFILE); } return group; } /* inotify syscalls */ static int do_inotify_init(int flags) { struct fsnotify_group *group; int ret; /* Check the IN_* constants for consistency. */ BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK); if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) return -EINVAL; /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ group = inotify_new_group(inotify_max_queued_events); if (IS_ERR(group)) return PTR_ERR(group); ret = anon_inode_getfd("inotify", &inotify_fops, group, O_RDONLY | flags); if (ret < 0) fsnotify_destroy_group(group); return ret; } SYSCALL_DEFINE1(inotify_init1, int, flags) { return do_inotify_init(flags); } SYSCALL_DEFINE0(inotify_init) { return do_inotify_init(0); } SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, u32, mask) { struct fsnotify_group *group; struct inode *inode; struct path path; struct fd f; int ret; unsigned flags = 0; /* * We share a lot of code with fs/dnotify. We also share * the bit layout between inotify's IN_* and the fsnotify * FS_*. This check ensures that only the inotify IN_* * bits get passed in and set in watches/events. */ if (unlikely(mask & ~ALL_INOTIFY_BITS)) return -EINVAL; /* * Require at least one valid bit set in the mask. * Without _something_ set, we would have no events to * watch for. */ if (unlikely(!(mask & ALL_INOTIFY_BITS))) return -EINVAL; f = fdget(fd); if (unlikely(!f.file)) return -EBADF; /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) { ret = -EINVAL; goto fput_and_out; } /* verify that this is indeed an inotify instance */ if (unlikely(f.file->f_op != &inotify_fops)) { ret = -EINVAL; goto fput_and_out; } if (!(mask & IN_DONT_FOLLOW)) flags |= LOOKUP_FOLLOW; if (mask & IN_ONLYDIR) flags |= LOOKUP_DIRECTORY; ret = inotify_find_inode(pathname, &path, flags, (mask & IN_ALL_EVENTS)); if (ret) goto fput_and_out; /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); path_put(&path); fput_and_out: fdput(f); return ret; } SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; struct inotify_inode_mark *i_mark; struct fd f; int ret = -EINVAL; f = fdget(fd); if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an inotify instance */ if (unlikely(f.file->f_op != &inotify_fops)) goto out; group = f.file->private_data; i_mark = inotify_idr_find(group, wd); if (unlikely(!i_mark)) goto out; ret = 0; fsnotify_destroy_mark(&i_mark->fsn_mark, group); /* match ref taken by inotify_idr_find */ fsnotify_put_mark(&i_mark->fsn_mark); out: fdput(f); return ret; } /* * inotify_user_setup - Our initialization function. Note that we cannot return * error because we have compiled-in VFS hooks. So an (unlikely) failure here * must result in panic(). */ static int __init inotify_user_setup(void) { unsigned long watches_max; struct sysinfo si; si_meminfo(&si); /* * Allow up to 1% of addressable memory to be allocated for inotify * watches (per user) limited to the range [8192, 1048576]. */ watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / INOTIFY_WATCH_COST; watches_max = clamp(watches_max, 8192UL, 1048576UL); BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); BUILD_BUG_ON(IN_OPEN != FS_OPEN); BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); BUILD_BUG_ON(IN_CREATE != FS_CREATE); BUILD_BUG_ON(IN_DELETE != FS_DELETE); BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC|SLAB_ACCOUNT); inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; inotify_sysctls_init(); return 0; } fs_initcall(inotify_user_setup); |
3 1 1 2 2 2 3 1 2 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | // SPDX-License-Identifier: GPL-2.0-or-later /* Driver for Philips webcam Functions that send various control messages to the webcam, including video modes. (C) 1999-2003 Nemosoft Unv. (C) 2004-2006 Luc Saillard (luc@saillard.org) (C) 2011 Hans de Goede <hdegoede@redhat.com> NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. The decompression routines have been implemented by reverse-engineering the Nemosoft binary pwcx module. Caveat emptor. */ /* Changes 2001/08/03 Alvarado Added methods for changing white balance and red/green gains */ /* Control functions for the cam; brightness, contrast, video mode, etc. */ #ifdef __KERNEL__ #include <linux/uaccess.h> #endif #include <asm/errno.h> #include "pwc.h" #include "pwc-kiara.h" #include "pwc-timon.h" #include "pwc-dec1.h" #include "pwc-dec23.h" /* Selectors for status controls used only in this file */ #define GET_STATUS_B00 0x0B00 #define SENSOR_TYPE_FORMATTER1 0x0C00 #define GET_STATUS_3000 0x3000 #define READ_RAW_Y_MEAN_FORMATTER 0x3100 #define SET_POWER_SAVE_MODE_FORMATTER 0x3200 #define MIRROR_IMAGE_FORMATTER 0x3300 #define LED_FORMATTER 0x3400 #define LOWLIGHT 0x3500 #define GET_STATUS_3600 0x3600 #define SENSOR_TYPE_FORMATTER2 0x3700 #define GET_STATUS_3800 0x3800 #define GET_STATUS_4000 0x4000 #define GET_STATUS_4100 0x4100 /* Get */ #define CTL_STATUS_4200 0x4200 /* [GS] 1 */ /* Formatters for the Video Endpoint controls [GS]ET_EP_STREAM_CTL */ #define VIDEO_OUTPUT_CONTROL_FORMATTER 0x0100 static const char *size2name[PSZ_MAX] = { "subQCIF", "QSIF", "QCIF", "SIF", "CIF", "VGA", }; /********/ /* Entries for the Nala (645/646) camera; the Nala doesn't have compression preferences, so you either get compressed or non-compressed streams. An alternate value of 0 means this mode is not available at all. */ #define PWC_FPS_MAX_NALA 8 struct Nala_table_entry { char alternate; /* USB alternate setting */ int compressed; /* Compressed yes/no */ unsigned char mode[3]; /* precomputed mode table */ }; static unsigned int Nala_fps_vector[PWC_FPS_MAX_NALA] = { 4, 5, 7, 10, 12, 15, 20, 24 }; static struct Nala_table_entry Nala_table[PSZ_MAX][PWC_FPS_MAX_NALA] = { #include "pwc-nala.h" }; /****************************************************************************/ static int recv_control_msg(struct pwc_device *pdev, u8 request, u16 value, int recv_count) { int rc; rc = usb_control_msg(pdev->udev, usb_rcvctrlpipe(pdev->udev, 0), request, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, pdev->vcinterface, pdev->ctrl_buf, recv_count, USB_CTRL_GET_TIMEOUT); if (rc < 0) PWC_ERROR("recv_control_msg error %d req %02x val %04x\n", rc, request, value); return rc; } static inline int send_video_command(struct pwc_device *pdev, int index, const unsigned char *buf, int buflen) { int rc; memcpy(pdev->ctrl_buf, buf, buflen); rc = usb_control_msg(pdev->udev, usb_sndctrlpipe(pdev->udev, 0), SET_EP_STREAM_CTL, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, VIDEO_OUTPUT_CONTROL_FORMATTER, index, pdev->ctrl_buf, buflen, USB_CTRL_SET_TIMEOUT); if (rc >= 0) memcpy(pdev->cmd_buf, buf, buflen); else PWC_ERROR("send_video_command error %d\n", rc); return rc; } int send_control_msg(struct pwc_device *pdev, u8 request, u16 value, void *buf, int buflen) { return usb_control_msg(pdev->udev, usb_sndctrlpipe(pdev->udev, 0), request, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, pdev->vcinterface, buf, buflen, USB_CTRL_SET_TIMEOUT); } static int set_video_mode_Nala(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { int fps, ret = 0; struct Nala_table_entry *pEntry; int frames2frames[31] = { /* closest match of framerate */ 0, 0, 0, 0, 4, /* 0-4 */ 5, 5, 7, 7, 10, /* 5-9 */ 10, 10, 12, 12, 15, /* 10-14 */ 15, 15, 15, 20, 20, /* 15-19 */ 20, 20, 20, 24, 24, /* 20-24 */ 24, 24, 24, 24, 24, /* 25-29 */ 24 /* 30 */ }; int frames2table[31] = { 0, 0, 0, 0, 0, /* 0-4 */ 1, 1, 1, 2, 2, /* 5-9 */ 3, 3, 4, 4, 4, /* 10-14 */ 5, 5, 5, 5, 5, /* 15-19 */ 6, 6, 6, 6, 7, /* 20-24 */ 7, 7, 7, 7, 7, /* 25-29 */ 7 /* 30 */ }; if (size < 0 || size > PSZ_CIF) return -EINVAL; if (frames < 4) frames = 4; else if (size > PSZ_QCIF && frames > 15) frames = 15; else if (frames > 25) frames = 25; frames = frames2frames[frames]; fps = frames2table[frames]; pEntry = &Nala_table[size][fps]; if (pEntry->alternate == 0) return -EINVAL; if (send_to_cam) ret = send_video_command(pdev, pdev->vendpoint, pEntry->mode, 3); if (ret < 0) return ret; if (pEntry->compressed && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec1_init(pdev, pEntry->mode); /* Set various parameters */ pdev->pixfmt = pixfmt; pdev->vframes = frames; pdev->valternate = pEntry->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->frame_size = (pdev->width * pdev->height * 3) / 2; if (pEntry->compressed) { if (pdev->release < 5) { /* 4 fold compression */ pdev->vbandlength = 528; pdev->frame_size /= 4; } else { pdev->vbandlength = 704; pdev->frame_size /= 3; } } else pdev->vbandlength = 0; /* Let pwc-if.c:isoc_init know we don't support higher compression */ *compression = 3; return 0; } static int set_video_mode_Timon(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { const struct Timon_table_entry *pChoose; int fps, ret = 0; if (size >= PSZ_MAX || *compression < 0 || *compression > 3) return -EINVAL; if (frames < 5) frames = 5; else if (size == PSZ_VGA && frames > 15) frames = 15; else if (frames > 30) frames = 30; fps = (frames / 5) - 1; /* Find a supported framerate with progressively higher compression */ do { pChoose = &Timon_table[size][fps][*compression]; if (pChoose->alternate != 0) break; (*compression)++; } while (*compression <= 3); if (pChoose->alternate == 0) return -ENOENT; /* Not supported. */ if (send_to_cam) ret = send_video_command(pdev, pdev->vendpoint, pChoose->mode, 13); if (ret < 0) return ret; if (pChoose->bandlength > 0 && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec23_init(pdev, pChoose->mode); /* Set various parameters */ pdev->pixfmt = pixfmt; pdev->vframes = (fps + 1) * 5; pdev->valternate = pChoose->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->vbandlength = pChoose->bandlength; if (pChoose->bandlength > 0) pdev->frame_size = (pChoose->bandlength * pdev->height) / 4; else pdev->frame_size = (pdev->width * pdev->height * 12) / 8; return 0; } static int set_video_mode_Kiara(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { const struct Kiara_table_entry *pChoose; int fps, ret = 0; if (size >= PSZ_MAX || *compression < 0 || *compression > 3) return -EINVAL; if (frames < 5) frames = 5; else if (size == PSZ_VGA && frames > 15) frames = 15; else if (frames > 30) frames = 30; fps = (frames / 5) - 1; /* Find a supported framerate with progressively higher compression */ do { pChoose = &Kiara_table[size][fps][*compression]; if (pChoose->alternate != 0) break; (*compression)++; } while (*compression <= 3); if (pChoose->alternate == 0) return -ENOENT; /* Not supported. */ /* Firmware bug: video endpoint is 5, but commands are sent to endpoint 4 */ if (send_to_cam) ret = send_video_command(pdev, 4, pChoose->mode, 12); if (ret < 0) return ret; if (pChoose->bandlength > 0 && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec23_init(pdev, pChoose->mode); /* All set and go */ pdev->pixfmt = pixfmt; pdev->vframes = (fps + 1) * 5; pdev->valternate = pChoose->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->vbandlength = pChoose->bandlength; if (pdev->vbandlength > 0) pdev->frame_size = (pdev->vbandlength * pdev->height) / 4; else pdev->frame_size = (pdev->width * pdev->height * 12) / 8; PWC_TRACE("frame_size=%d, vframes=%d, vsize=%d, vbandlength=%d\n", pdev->frame_size, pdev->vframes, size, pdev->vbandlength); return 0; } int pwc_set_video_mode(struct pwc_device *pdev, int width, int height, int pixfmt, int frames, int *compression, int send_to_cam) { int ret, size; PWC_DEBUG_FLOW("set_video_mode(%dx%d @ %d, pixfmt %08x).\n", width, height, frames, pixfmt); size = pwc_get_size(pdev, width, height); PWC_TRACE("decode_size = %d.\n", size); if (DEVICE_USE_CODEC1(pdev->type)) { ret = set_video_mode_Nala(pdev, size, pixfmt, frames, compression, send_to_cam); } else if (DEVICE_USE_CODEC3(pdev->type)) { ret = set_video_mode_Kiara(pdev, size, pixfmt, frames, compression, send_to_cam); } else { ret = set_video_mode_Timon(pdev, size, pixfmt, frames, compression, send_to_cam); } if (ret < 0) { PWC_ERROR("Failed to set video mode %s@%d fps; return code = %d\n", size2name[size], frames, ret); return ret; } pdev->frame_total_size = pdev->frame_size + pdev->frame_header_size + pdev->frame_trailer_size; PWC_DEBUG_SIZE("Set resolution to %dx%d\n", pdev->width, pdev->height); return 0; } static unsigned int pwc_get_fps_Nala(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i = 0; i < PWC_FPS_MAX_NALA; i++) { if (Nala_table[size][i].alternate) { if (index--==0) return Nala_fps_vector[i]; } } return 0; } static unsigned int pwc_get_fps_Kiara(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i = 0; i < PWC_FPS_MAX_KIARA; i++) { if (Kiara_table[size][i][3].alternate) { if (index--==0) return Kiara_fps_vector[i]; } } return 0; } static unsigned int pwc_get_fps_Timon(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i=0; i < PWC_FPS_MAX_TIMON; i++) { if (Timon_table[size][i][3].alternate) { if (index--==0) return Timon_fps_vector[i]; } } return 0; } unsigned int pwc_get_fps(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int ret; if (DEVICE_USE_CODEC1(pdev->type)) { ret = pwc_get_fps_Nala(pdev, index, size); } else if (DEVICE_USE_CODEC3(pdev->type)) { ret = pwc_get_fps_Kiara(pdev, index, size); } else { ret = pwc_get_fps_Timon(pdev, index, size); } return ret; } int pwc_get_u8_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 1); if (ret < 0) return ret; *data = pdev->ctrl_buf[0]; return 0; } int pwc_set_u8_ctrl(struct pwc_device *pdev, u8 request, u16 value, u8 data) { int ret; pdev->ctrl_buf[0] = data; ret = send_control_msg(pdev, request, value, pdev->ctrl_buf, 1); if (ret < 0) return ret; return 0; } int pwc_get_s8_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 1); if (ret < 0) return ret; *data = ((s8 *)pdev->ctrl_buf)[0]; return 0; } int pwc_get_u16_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 2); if (ret < 0) return ret; *data = (pdev->ctrl_buf[1] << 8) | pdev->ctrl_buf[0]; return 0; } int pwc_set_u16_ctrl(struct pwc_device *pdev, u8 request, u16 value, u16 data) { int ret; pdev->ctrl_buf[0] = data & 0xff; pdev->ctrl_buf[1] = data >> 8; ret = send_control_msg(pdev, request, value, pdev->ctrl_buf, 2); if (ret < 0) return ret; return 0; } int pwc_button_ctrl(struct pwc_device *pdev, u16 value) { int ret; ret = send_control_msg(pdev, SET_STATUS_CTL, value, NULL, 0); if (ret < 0) return ret; return 0; } /* POWER */ void pwc_camera_power(struct pwc_device *pdev, int power) { int r; if (!pdev->power_save) return; if (pdev->type < 675 || (pdev->type < 730 && pdev->release < 6)) return; /* Not supported by Nala or Timon < release 6 */ if (power) pdev->ctrl_buf[0] = 0x00; /* active */ else pdev->ctrl_buf[0] = 0xFF; /* power save */ r = send_control_msg(pdev, SET_STATUS_CTL, SET_POWER_SAVE_MODE_FORMATTER, pdev->ctrl_buf, 1); if (r < 0) PWC_ERROR("Failed to power %s camera (%d)\n", power ? "on" : "off", r); } int pwc_set_leds(struct pwc_device *pdev, int on_value, int off_value) { int r; if (pdev->type < 730) return 0; on_value /= 100; off_value /= 100; if (on_value < 0) on_value = 0; if (on_value > 0xff) on_value = 0xff; if (off_value < 0) off_value = 0; if (off_value > 0xff) off_value = 0xff; pdev->ctrl_buf[0] = on_value; pdev->ctrl_buf[1] = off_value; r = send_control_msg(pdev, SET_STATUS_CTL, LED_FORMATTER, pdev->ctrl_buf, 2); if (r < 0) PWC_ERROR("Failed to set LED on/off time (%d)\n", r); return r; } #ifdef CONFIG_USB_PWC_DEBUG int pwc_get_cmos_sensor(struct pwc_device *pdev, int *sensor) { int ret, request; if (pdev->type < 675) request = SENSOR_TYPE_FORMATTER1; else if (pdev->type < 730) return -1; /* The Vesta series doesn't have this call */ else request = SENSOR_TYPE_FORMATTER2; ret = recv_control_msg(pdev, GET_STATUS_CTL, request, 1); if (ret < 0) return ret; if (pdev->type < 675) *sensor = pdev->ctrl_buf[0] | 0x100; else *sensor = pdev->ctrl_buf[0]; return 0; } #endif |
4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | /* SPDX-License-Identifier: GPL-2.0 */ /* * win_minmax.h: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H #define MINMAX_H #include <linux/types.h> /* A single data point for our parameterized min-max tracker */ struct minmax_sample { u32 t; /* time measurement was taken */ u32 v; /* value measured */ }; /* State for the parameterized min-max tracker */ struct minmax { struct minmax_sample s[3]; }; static inline u32 minmax_get(const struct minmax *m) { return m->s[0].v; } static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) { struct minmax_sample val = { .t = t, .v = meas }; m->s[2] = m->s[1] = m->s[0] = val; return m->s[0].v; } u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); #endif |
34 34 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Contiguous Memory Allocator * * Copyright (c) 2010-2011 by Samsung Electronics. * Copyright IBM Corporation, 2013 * Copyright LG Electronics Inc., 2014 * Written by: * Marek Szyprowski <m.szyprowski@samsung.com> * Michal Nazarewicz <mina86@mina86.com> * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> * Joonsoo Kim <iamjoonsoo.kim@lge.com> */ #define pr_fmt(fmt) "cma: " fmt #define CREATE_TRACE_POINTS #include <linux/memblock.h> #include <linux/err.h> #include <linux/mm.h> #include <linux/sizes.h> #include <linux/slab.h> #include <linux/log2.h> #include <linux/cma.h> #include <linux/highmem.h> #include <linux/io.h> #include <linux/kmemleak.h> #include <trace/events/cma.h> #include "internal.h" #include "cma.h" struct cma cma_areas[MAX_CMA_AREAS]; unsigned cma_area_count; static DEFINE_MUTEX(cma_mutex); phys_addr_t cma_get_base(const struct cma *cma) { return PFN_PHYS(cma->base_pfn); } unsigned long cma_get_size(const struct cma *cma) { return cma->count << PAGE_SHIFT; } const char *cma_get_name(const struct cma *cma) { return cma->name; } static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, unsigned int align_order) { if (align_order <= cma->order_per_bit) return 0; return (1UL << (align_order - cma->order_per_bit)) - 1; } /* * Find the offset of the base PFN from the specified align_order. * The value returned is represented in order_per_bits. */ static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, unsigned int align_order) { return (cma->base_pfn & ((1UL << align_order) - 1)) >> cma->order_per_bit; } static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, unsigned long pages) { return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; } static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, unsigned long count) { unsigned long bitmap_no, bitmap_count; unsigned long flags; bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; bitmap_count = cma_bitmap_pages_to_bits(cma, count); spin_lock_irqsave(&cma->lock, flags); bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); spin_unlock_irqrestore(&cma->lock, flags); } static void __init cma_activate_area(struct cma *cma) { unsigned long base_pfn = cma->base_pfn, pfn; struct zone *zone; cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL); if (!cma->bitmap) goto out_error; /* * alloc_contig_range() requires the pfn range specified to be in the * same zone. Simplify by forcing the entire CMA resv range to be in the * same zone. */ WARN_ON_ONCE(!pfn_valid(base_pfn)); zone = page_zone(pfn_to_page(base_pfn)); for (pfn = base_pfn + 1; pfn < base_pfn + cma->count; pfn++) { WARN_ON_ONCE(!pfn_valid(pfn)); if (page_zone(pfn_to_page(pfn)) != zone) goto not_in_zone; } for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn += pageblock_nr_pages) init_cma_reserved_pageblock(pfn_to_page(pfn)); spin_lock_init(&cma->lock); #ifdef CONFIG_CMA_DEBUGFS INIT_HLIST_HEAD(&cma->mem_head); spin_lock_init(&cma->mem_head_lock); #endif return; not_in_zone: bitmap_free(cma->bitmap); out_error: /* Expose all pages to the buddy, they are useless for CMA. */ if (!cma->reserve_pages_on_error) { for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++) free_reserved_page(pfn_to_page(pfn)); } totalcma_pages -= cma->count; cma->count = 0; pr_err("CMA area %s could not be activated\n", cma->name); return; } static int __init cma_init_reserved_areas(void) { int i; for (i = 0; i < cma_area_count; i++) cma_activate_area(&cma_areas[i]); return 0; } core_initcall(cma_init_reserved_areas); void __init cma_reserve_pages_on_error(struct cma *cma) { cma->reserve_pages_on_error = true; } /** * cma_init_reserved_mem() - create custom contiguous area from reserved memory * @base: Base address of the reserved area * @size: Size of the reserved area (in bytes), * @order_per_bit: Order of pages represented by one bit on bitmap. * @name: The name of the area. If this parameter is NULL, the name of * the area will be set to "cmaN", where N is a running counter of * used areas. * @res_cma: Pointer to store the created cma region. * * This function creates custom contiguous area from already reserved memory. */ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, struct cma **res_cma) { struct cma *cma; /* Sanity checks */ if (cma_area_count == ARRAY_SIZE(cma_areas)) { pr_err("Not enough slots for CMA reserved regions!\n"); return -ENOSPC; } if (!size || !memblock_is_region_reserved(base, size)) return -EINVAL; /* alignment should be aligned with order_per_bit */ if (!IS_ALIGNED(CMA_MIN_ALIGNMENT_PAGES, 1 << order_per_bit)) return -EINVAL; /* ensure minimal alignment required by mm core */ if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES)) return -EINVAL; /* * Each reserved area must be initialised later, when more kernel * subsystems (like slab allocator) are available. */ cma = &cma_areas[cma_area_count]; if (name) snprintf(cma->name, CMA_MAX_NAME, name); else snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count); cma->base_pfn = PFN_DOWN(base); cma->count = size >> PAGE_SHIFT; cma->order_per_bit = order_per_bit; *res_cma = cma; cma_area_count++; totalcma_pages += (size / PAGE_SIZE); return 0; } /** * cma_declare_contiguous_nid() - reserve custom contiguous area * @base: Base address of the reserved area optional, use 0 for any * @size: Size of the reserved area (in bytes), * @limit: End address of the reserved memory (optional, 0 for any). * @alignment: Alignment for the CMA area, should be power of 2 or zero * @order_per_bit: Order of pages represented by one bit on bitmap. * @fixed: hint about where to place the reserved area * @name: The name of the area. See function cma_init_reserved_mem() * @res_cma: Pointer to store the created cma region. * @nid: nid of the free area to find, %NUMA_NO_NODE for any node * * This function reserves memory from early allocator. It should be * called by arch specific code once the early allocator (memblock or bootmem) * has been activated and all other subsystems have already allocated/reserved * memory. This function allows to create custom reserved areas. * * If @fixed is true, reserve contiguous area at exactly @base. If false, * reserve in range from @base to @limit. */ int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start; int ret; /* * We can't use __pa(high_memory) directly, since high_memory * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly) * complain. Find the boundary by adding one to the last valid * address. */ highmem_start = __pa(high_memory - 1) + 1; pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", __func__, &size, &base, &limit, &alignment); if (cma_area_count == ARRAY_SIZE(cma_areas)) { pr_err("Not enough slots for CMA reserved regions!\n"); return -ENOSPC; } if (!size) return -EINVAL; if (alignment && !is_power_of_2(alignment)) return -EINVAL; if (!IS_ENABLED(CONFIG_NUMA)) nid = NUMA_NO_NODE; /* Sanitise input arguments. */ alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES); if (fixed && base & (alignment - 1)) { ret = -EINVAL; pr_err("Region at %pa must be aligned to %pa bytes\n", &base, &alignment); goto err; } base = ALIGN(base, alignment); size = ALIGN(size, alignment); limit &= ~(alignment - 1); if (!base) fixed = false; /* size should be aligned with order_per_bit */ if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) return -EINVAL; /* * If allocating at a fixed base the request region must not cross the * low/high memory boundary. */ if (fixed && base < highmem_start && base + size > highmem_start) { ret = -EINVAL; pr_err("Region at %pa defined on low/high memory boundary (%pa)\n", &base, &highmem_start); goto err; } /* * If the limit is unspecified or above the memblock end, its effective * value will be the memblock end. Set it explicitly to simplify further * checks. */ if (limit == 0 || limit > memblock_end) limit = memblock_end; if (base + size > limit) { ret = -EINVAL; pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n", &size, &base, &limit); goto err; } /* Reserve memory */ if (fixed) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { ret = -EBUSY; goto err; } } else { phys_addr_t addr = 0; /* * If there is enough memory, try a bottom-up allocation first. * It will place the new cma area close to the start of the node * and guarantee that the compaction is moving pages out of the * cma area and not into it. * Avoid using first 4GB to not interfere with constrained zones * like DMA/DMA32. */ #ifdef CONFIG_PHYS_ADDR_T_64BIT if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) { memblock_set_bottom_up(true); addr = memblock_alloc_range_nid(size, alignment, SZ_4G, limit, nid, true); memblock_set_bottom_up(false); } #endif /* * All pages in the reserved area must come from the same zone. * If the requested region crosses the low/high memory boundary, * try allocating from high memory first and fall back to low * memory in case of failure. */ if (!addr && base < highmem_start && limit > highmem_start) { addr = memblock_alloc_range_nid(size, alignment, highmem_start, limit, nid, true); limit = highmem_start; } if (!addr) { addr = memblock_alloc_range_nid(size, alignment, base, limit, nid, true); if (!addr) { ret = -ENOMEM; goto err; } } /* * kmemleak scans/reads tracked objects for pointers to other * objects but this address isn't mapped and accessible */ kmemleak_ignore_phys(addr); base = addr; } ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); if (ret) goto free_mem; pr_info("Reserved %ld MiB at %pa on node %d\n", (unsigned long)size / SZ_1M, &base, nid); return 0; free_mem: memblock_phys_free(base, size); err: pr_err("Failed to reserve %ld MiB on node %d\n", (unsigned long)size / SZ_1M, nid); return ret; } static void cma_debug_show_areas(struct cma *cma) { unsigned long next_zero_bit, next_set_bit, nr_zero; unsigned long start = 0; unsigned long nr_part, nr_total = 0; unsigned long nbits = cma_bitmap_maxno(cma); spin_lock_irq(&cma->lock); pr_info("number of available pages: "); for (;;) { next_zero_bit = find_next_zero_bit(cma->bitmap, nbits, start); if (next_zero_bit >= nbits) break; next_set_bit = find_next_bit(cma->bitmap, nbits, next_zero_bit); nr_zero = next_set_bit - next_zero_bit; nr_part = nr_zero << cma->order_per_bit; pr_cont("%s%lu@%lu", nr_total ? "+" : "", nr_part, next_zero_bit); nr_total += nr_part; start = next_zero_bit + nr_zero; } pr_cont("=> %lu free of %lu total pages\n", nr_total, cma->count); spin_unlock_irq(&cma->lock); } /** * cma_alloc() - allocate pages from contiguous area * @cma: Contiguous memory region for which the allocation is performed. * @count: Requested number of pages. * @align: Requested alignment of pages (in PAGE_SIZE order). * @no_warn: Avoid printing message about failed allocation * * This function allocates part of contiguous memory on specific * contiguous memory area. */ struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn) { unsigned long mask, offset; unsigned long pfn = -1; unsigned long start = 0; unsigned long bitmap_maxno, bitmap_no, bitmap_count; unsigned long i; struct page *page = NULL; int ret = -ENOMEM; const char *name = cma ? cma->name : NULL; trace_cma_alloc_start(name, count, align); if (!cma || !cma->count || !cma->bitmap) return page; pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, (void *)cma, cma->name, count, align); if (!count) return page; mask = cma_bitmap_aligned_mask(cma, align); offset = cma_bitmap_aligned_offset(cma, align); bitmap_maxno = cma_bitmap_maxno(cma); bitmap_count = cma_bitmap_pages_to_bits(cma, count); if (bitmap_count > bitmap_maxno) return page; for (;;) { spin_lock_irq(&cma->lock); bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap, bitmap_maxno, start, bitmap_count, mask, offset); if (bitmap_no >= bitmap_maxno) { spin_unlock_irq(&cma->lock); break; } bitmap_set(cma->bitmap, bitmap_no, bitmap_count); /* * It's safe to drop the lock here. We've marked this region for * our exclusive use. If the migration fails we will take the * lock again and unmark it. */ spin_unlock_irq(&cma->lock); pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); mutex_lock(&cma_mutex); ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0)); mutex_unlock(&cma_mutex); if (ret == 0) { page = pfn_to_page(pfn); break; } cma_clear_bitmap(cma, pfn, count); if (ret != -EBUSY) break; pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n", __func__, pfn, pfn_to_page(pfn)); trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn), count, align); /* try again with a bit different memory target */ start = bitmap_no + mask + 1; } /* * CMA can allocate multiple page blocks, which results in different * blocks being marked with different tags. Reset the tags to ignore * those page blocks. */ if (page) { for (i = 0; i < count; i++) page_kasan_tag_reset(nth_page(page, i)); } if (ret && !no_warn) { pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n", __func__, cma->name, count, ret); cma_debug_show_areas(cma); } pr_debug("%s(): returned %p\n", __func__, page); trace_cma_alloc_finish(name, pfn, page, count, align, ret); if (page) { count_vm_event(CMA_ALLOC_SUCCESS); cma_sysfs_account_success_pages(cma, count); } else { count_vm_event(CMA_ALLOC_FAIL); cma_sysfs_account_fail_pages(cma, count); } return page; } bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count) { unsigned long pfn; if (!cma || !pages) return false; pfn = page_to_pfn(pages); if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) { pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); return false; } return true; } /** * cma_release() - release allocated pages * @cma: Contiguous memory region for which the allocation is performed. * @pages: Allocated pages. * @count: Number of allocated pages. * * This function releases memory allocated by cma_alloc(). * It returns false when provided pages do not belong to contiguous area and * true otherwise. */ bool cma_release(struct cma *cma, const struct page *pages, unsigned long count) { unsigned long pfn; if (!cma_pages_valid(cma, pages, count)) return false; pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); pfn = page_to_pfn(pages); VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); free_contig_range(pfn, count); cma_clear_bitmap(cma, pfn, count); cma_sysfs_account_release_pages(cma, count); trace_cma_release(cma->name, pfn, pages, count); return true; } int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) { int i; for (i = 0; i < cma_area_count; i++) { int ret = it(&cma_areas[i], data); if (ret) return ret; } return 0; } |
6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 | // SPDX-License-Identifier: GPL-2.0-or-later /* * x86 SMP booting functions * * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> * Copyright 2001 Andi Kleen, SuSE Labs. * * Much of the core SMP work is based on previous work by Thomas Radke, to * whom a great many thanks are extended. * * Thanks to Intel for making available several different Pentium, * Pentium Pro and Pentium-II/Xeon MP machines. * Original development of Linux SMP code supported by Caldera. * * Fixes * Felix Koop : NR_CPUS used properly * Jose Renau : Handle single CPU case. * Alan Cox : By repeated request 8) - Total BogoMIPS report. * Greg Wright : Fix for kernel stacks panic. * Erich Boleyn : MP v1.4 and additional changes. * Matthias Sattler : Changes for 2.1 kernel map. * Michel Lespinasse : Changes for 2.1 kernel map. * Michael Chastain : Change trampoline.S to gnu as. * Alan Cox : Dumb bug: 'B' step PPro's are fine * Ingo Molnar : Added APIC timers, based on code * from Jose Renau * Ingo Molnar : various cleanups and rewrites * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. * Maciej W. Rozycki : Bits for genuine 82489DX APICs * Andi Kleen : Changed for SMP boot into long mode. * Martin J. Bligh : Added support for multi-quad systems * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. * Andi Kleen : Converted to new state machine. * Ashok Raj : CPU hotplug support * Glauber Costa : i386 and x86_64 integration */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/init.h> #include <linux/smp.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/topology.h> #include <linux/sched/hotplug.h> #include <linux/sched/task_stack.h> #include <linux/percpu.h> #include <linux/memblock.h> #include <linux/err.h> #include <linux/nmi.h> #include <linux/tboot.h> #include <linux/gfp.h> #include <linux/cpuidle.h> #include <linux/kexec.h> #include <linux/numa.h> #include <linux/pgtable.h> #include <linux/overflow.h> #include <linux/stackprotector.h> #include <linux/cpuhotplug.h> #include <linux/mc146818rtc.h> #include <asm/acpi.h> #include <asm/cacheinfo.h> #include <asm/desc.h> #include <asm/nmi.h> #include <asm/irq.h> #include <asm/realmode.h> #include <asm/cpu.h> #include <asm/numa.h> #include <asm/tlbflush.h> #include <asm/mtrr.h> #include <asm/mwait.h> #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/fpu/api.h> #include <asm/setup.h> #include <asm/uv/uv.h> #include <asm/microcode.h> #include <asm/i8259.h> #include <asm/misc.h> #include <asm/qspinlock.h> #include <asm/intel-family.h> #include <asm/cpu_device_id.h> #include <asm/spec-ctrl.h> #include <asm/hw_irq.h> #include <asm/stackprotector.h> #include <asm/sev.h> #include <asm/spec-ctrl.h> /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* representing HT, core, and die siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); EXPORT_PER_CPU_SYMBOL(cpu_die_map); /* CPUs which are the primary SMT threads */ struct cpumask __cpu_primary_thread_mask __read_mostly; /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask; struct mwait_cpu_dead { unsigned int control; unsigned int status; }; #define CPUDEAD_MWAIT_WAIT 0xDEADBEEF #define CPUDEAD_MWAIT_KEXEC_HLT 0x4A17DEAD /* * Cache line aligned data for mwait_play_dead(). Separate on purpose so * that it's unlikely to be touched by other CPUs. */ static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); /* Maximum number of SMT threads on any online core */ int __read_mostly __max_smt_threads = 1; /* Flag to indicate if a complete sched domain rebuild is required */ bool x86_topology_update; int arch_update_cpu_topology(void) { int retval = x86_topology_update; x86_topology_update = false; return retval; } static unsigned int smpboot_warm_reset_vector_count; static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; spin_lock_irqsave(&rtc_lock, flags); if (!smpboot_warm_reset_vector_count++) { CMOS_WRITE(0xa, 0xf); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; } spin_unlock_irqrestore(&rtc_lock, flags); } static inline void smpboot_restore_warm_reset_vector(void) { unsigned long flags; /* * Paranoid: Set warm reset code and vector here back * to default values. */ spin_lock_irqsave(&rtc_lock, flags); if (!--smpboot_warm_reset_vector_count) { CMOS_WRITE(0, 0xf); *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } spin_unlock_irqrestore(&rtc_lock, flags); } /* Run the next set of setup steps for the upcoming CPU */ static void ap_starting(void) { int cpuid = smp_processor_id(); /* Mop up eventual mwait_play_dead() wreckage */ this_cpu_write(mwait_cpu_dead.status, 0); this_cpu_write(mwait_cpu_dead.control, 0); /* * If woken up by an INIT in an 82489DX configuration the alive * synchronization guarantees that the CPU does not reach this * point before an INIT_deassert IPI reaches the local APIC, so it * is now safe to touch the local APIC. * * Set up this CPU, first the APIC, which is probably redundant on * most boards. */ apic_ap_setup(); /* Save the processor parameters. */ smp_store_cpu_info(cpuid); /* * The topology information must be up to date before * notify_cpu_starting(). */ set_cpu_sibling_map(cpuid); ap_init_aperfmperf(); pr_debug("Stack at about %p\n", &cpuid); wmb(); /* * This runs the AP through all the cpuhp states to its target * state CPUHP_ONLINE. */ notify_cpu_starting(cpuid); } static void ap_calibrate_delay(void) { /* * Calibrate the delay loop and update loops_per_jiffy in cpu_data. * smp_store_cpu_info() stored a value that is close but not as * accurate as the value just calculated. * * As this is invoked after the TSC synchronization check, * calibrate_delay_is_known() will skip the calibration routine * when TSC is synchronized across sockets. */ calibrate_delay(); cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy; } /* * Activate a secondary processor. */ static void notrace start_secondary(void *unused) { /* * Don't put *anything* except direct CPU state initialization * before cpu_init(), SMP booting is too fragile that we want to * limit the things done here to the most necessary things. */ cr4_init(); /* * 32-bit specific. 64-bit reaches this code with the correct page * table established. Yet another historical divergence. */ if (IS_ENABLED(CONFIG_X86_32)) { /* switch away from the initial page table */ load_cr3(swapper_pg_dir); __flush_tlb_all(); } cpu_init_exception_handling(); /* * Load the microcode before reaching the AP alive synchronization * point below so it is not part of the full per CPU serialized * bringup part when "parallel" bringup is enabled. * * That's even safe when hyperthreading is enabled in the CPU as * the core code starts the primary threads first and leaves the * secondary threads waiting for SIPI. Loading microcode on * physical cores concurrently is a safe operation. * * This covers both the Intel specific issue that concurrent * microcode loading on SMT siblings must be prohibited and the * vendor independent issue`that microcode loading which changes * CPUID, MSRs etc. must be strictly serialized to maintain * software state correctness. */ load_ucode_ap(); /* * Synchronization point with the hotplug core. Sets this CPUs * synchronization state to ALIVE and spin-waits for the control CPU to * release this CPU for further bringup. */ cpuhp_ap_sync_alive(); cpu_init(); fpu__init_cpu(); rcutree_report_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); ap_starting(); /* Check TSC synchronization with the control CPU. */ check_tsc_sync_target(); /* * Calibrate the delay loop after the TSC synchronization check. * This allows to skip the calibration when TSC is synchronized * across sockets. */ ap_calibrate_delay(); speculative_store_bypass_ht_init(); /* * Lock vector_lock, set CPU online and bring the vector * allocator online. Online must be set with vector_lock held * to prevent a concurrent irq setup/teardown from seeing a * half valid vector space. */ lock_vector_lock(); set_cpu_online(smp_processor_id(), true); lapic_online(); unlock_vector_lock(); x86_platform.nmi_init(); /* enable local interrupts */ local_irq_enable(); x86_cpuinit.setup_percpu_clockev(); wmb(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU */ void smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = &cpu_data(id); /* Copy boot_cpu_data only on the first bringup */ if (!c->initialized) *c = boot_cpu_data; c->cpu_index = id; /* * During boot time, CPU0 has this setup already. Save the info when * bringing up an AP. */ identify_secondary_cpu(c); c->initialized = true; } static bool topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; return (cpu_to_node(cpu1) == cpu_to_node(cpu2)); } static bool topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; return !WARN_ONCE(!topology_same_node(c, o), "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! " "[node: %d != %d]. Ignoring dependency.\n", cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); } #define link_mask(mfunc, c1, c2) \ do { \ cpumask_set_cpu((c1), mfunc(c2)); \ cpumask_set_cpu((c2), mfunc(c1)); \ } while (0) static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->topo.pkg_id == o->topo.pkg_id && c->topo.die_id == o->topo.die_id && c->topo.amd_node_id == o->topo.amd_node_id && per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) { if (c->topo.core_id == o->topo.core_id) return topology_sane(c, o, "smt"); if ((c->topo.cu_id != 0xff) && (o->topo.cu_id != 0xff) && (c->topo.cu_id == o->topo.cu_id)) return topology_sane(c, o, "smt"); } } else if (c->topo.pkg_id == o->topo.pkg_id && c->topo.die_id == o->topo.die_id && c->topo.core_id == o->topo.core_id) { return topology_sane(c, o, "smt"); } return false; } static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { if (c->topo.pkg_id != o->topo.pkg_id || c->topo.die_id != o->topo.die_id) return false; if (cpu_feature_enabled(X86_FEATURE_TOPOEXT) && topology_amd_nodes_per_pkg() > 1) return c->topo.amd_node_id == o->topo.amd_node_id; return true; } static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; /* If the arch didn't set up l2c_id, fall back to SMT */ if (per_cpu_l2c_id(cpu1) == BAD_APICID) return match_smt(c, o); /* Do not match if L2 cache id does not match: */ if (per_cpu_l2c_id(cpu1) != per_cpu_l2c_id(cpu2)) return false; return topology_sane(c, o, "l2c"); } /* * Unlike the other levels, we do not enforce keeping a * multicore group inside a NUMA node. If this happens, we will * discard the MC level of the topology later. */ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { if (c->topo.pkg_id == o->topo.pkg_id) return true; return false; } /* * Define intel_cod_cpu[] for Intel COD (Cluster-on-Die) CPUs. * * Any Intel CPU that has multiple nodes per package and does not * match intel_cod_cpu[] has the SNC (Sub-NUMA Cluster) topology. * * When in SNC mode, these CPUs enumerate an LLC that is shared * by multiple NUMA nodes. The LLC is shared for off-package data * access but private to the NUMA node (half of the package) for * on-package access. CPUID (the source of the information about * the LLC) can only enumerate the cache as shared or unshared, * but not this particular configuration. */ static const struct x86_cpu_id intel_cod_cpu[] = { X86_MATCH_VFM(INTEL_HASWELL_X, 0), /* COD */ X86_MATCH_VFM(INTEL_BROADWELL_X, 0), /* COD */ X86_MATCH_VFM(INTEL_ANY, 1), /* SNC */ {} }; static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu); int cpu1 = c->cpu_index, cpu2 = o->cpu_index; bool intel_snc = id && id->driver_data; /* Do not match if we do not have a valid APICID for cpu: */ if (per_cpu_llc_id(cpu1) == BAD_APICID) return false; /* Do not match if LLC id does not match: */ if (per_cpu_llc_id(cpu1) != per_cpu_llc_id(cpu2)) return false; /* * Allow the SNC topology without warning. Return of false * means 'c' does not share the LLC of 'o'. This will be * reflected to userspace. */ if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc) return false; return topology_sane(c, o, "llc"); } static inline int x86_sched_itmt_flags(void) { return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0; } #ifdef CONFIG_SCHED_MC static int x86_core_flags(void) { return cpu_core_flags() | x86_sched_itmt_flags(); } #endif #ifdef CONFIG_SCHED_SMT static int x86_smt_flags(void) { return cpu_smt_flags(); } #endif #ifdef CONFIG_SCHED_CLUSTER static int x86_cluster_flags(void) { return cpu_cluster_flags() | x86_sched_itmt_flags(); } #endif static int x86_die_flags(void) { if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) return x86_sched_itmt_flags(); return 0; } /* * Set if a package/die has multiple NUMA nodes inside. * AMD Magny-Cours, Intel Cluster-on-Die, and Intel * Sub-NUMA Clustering have this. */ static bool x86_has_numa_in_package; static struct sched_domain_topology_level x86_topology[6]; static void __init build_sched_topology(void) { int i = 0; #ifdef CONFIG_SCHED_SMT x86_topology[i++] = (struct sched_domain_topology_level){ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }; #endif #ifdef CONFIG_SCHED_CLUSTER x86_topology[i++] = (struct sched_domain_topology_level){ cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) }; #endif #ifdef CONFIG_SCHED_MC x86_topology[i++] = (struct sched_domain_topology_level){ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }; #endif /* * When there is NUMA topology inside the package skip the PKG domain * since the NUMA domains will auto-magically create the right spanning * domains based on the SLIT. */ if (!x86_has_numa_in_package) { x86_topology[i++] = (struct sched_domain_topology_level){ cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(PKG) }; } /* * There must be one trailing NULL entry left. */ BUG_ON(i >= ARRAY_SIZE(x86_topology)-1); set_sched_topology(x86_topology); } void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; bool has_mp = has_smt || topology_num_cores_per_package() > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i, threads; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); if (!has_mp) { cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); cpumask_set_cpu(cpu, cpu_l2c_shared_mask(cpu)); cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); cpumask_set_cpu(cpu, topology_die_cpumask(cpu)); c->booted_cores = 1; return; } for_each_cpu(i, cpu_sibling_setup_mask) { o = &cpu_data(i); if (match_pkg(c, o) && !topology_same_node(c, o)) x86_has_numa_in_package = true; if ((i == cpu) || (has_smt && match_smt(c, o))) link_mask(topology_sibling_cpumask, cpu, i); if ((i == cpu) || (has_mp && match_llc(c, o))) link_mask(cpu_llc_shared_mask, cpu, i); if ((i == cpu) || (has_mp && match_l2c(c, o))) link_mask(cpu_l2c_shared_mask, cpu, i); if ((i == cpu) || (has_mp && match_die(c, o))) link_mask(topology_die_cpumask, cpu, i); } threads = cpumask_weight(topology_sibling_cpumask(cpu)); if (threads > __max_smt_threads) __max_smt_threads = threads; for_each_cpu(i, topology_sibling_cpumask(cpu)) cpu_data(i).smt_active = threads > 1; /* * This needs a separate iteration over the cpus because we rely on all * topology_sibling_cpumask links to be set-up. */ for_each_cpu(i, cpu_sibling_setup_mask) { o = &cpu_data(i); if ((i == cpu) || (has_mp && match_pkg(c, o))) { link_mask(topology_core_cpumask, cpu, i); /* * Does this new cpu bringup a new core? */ if (threads == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ if (cpumask_first( topology_sibling_cpumask(i)) == i) c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) cpu_data(i).booted_cores++; } else if (i != cpu && !c->booted_cores) c->booted_cores = cpu_data(i).booted_cores; } } } /* maps the cpu to the sched domain representing multi-core */ const struct cpumask *cpu_coregroup_mask(int cpu) { return cpu_llc_shared_mask(cpu); } const struct cpumask *cpu_clustergroup_mask(int cpu) { return cpu_l2c_shared_mask(cpu); } EXPORT_SYMBOL_GPL(cpu_clustergroup_mask); static void impress_friends(void) { int cpu; unsigned long bogosum = 0; /* * Allow the user to impress friends. */ pr_debug("Before bogomips\n"); for_each_online_cpu(cpu) bogosum += cpu_data(cpu).loops_per_jiffy; pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); pr_debug("Before bogocount - setting activated=1\n"); } /* * The Multiprocessor Specification 1.4 (1997) example code suggests * that there should be a 10ms delay between the BSP asserting INIT * and de-asserting INIT, when starting a remote processor. * But that slows boot and resume on modern processors, which include * many cores and don't require that delay. * * Cmdline "init_cpu_udelay=" is available to over-ride this delay. * Modern processor families are quirked to remove the delay entirely. */ #define UDELAY_10MS_DEFAULT 10000 static unsigned int init_udelay = UINT_MAX; static int __init cpu_init_udelay(char *str) { get_option(&str, &init_udelay); return 0; } early_param("cpu_init_udelay", cpu_init_udelay); static void __init smp_quirk_init_udelay(void) { /* if cmdline changed it from default, leave it alone */ if (init_udelay != UINT_MAX) return; /* if modern processor, use no delay */ if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) || ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) { init_udelay = 0; return; } /* else, use legacy delay */ init_udelay = UDELAY_10MS_DEFAULT; } /* * Wake up AP by INIT, INIT, STARTUP sequence. */ static void send_init_sequence(u32 phys_apicid) { int maxlvt = lapic_get_maxlvt(); /* Be paranoid about clearing APIC errors. */ if (APIC_INTEGRATED(boot_cpu_apic_version)) { /* Due to the Pentium erratum 3AP. */ if (maxlvt > 3) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } /* Assert INIT on the target CPU */ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, phys_apicid); safe_apic_wait_icr_idle(); udelay(init_udelay); /* Deassert INIT on the target CPU */ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); safe_apic_wait_icr_idle(); } /* * Wake up AP by INIT, INIT, STARTUP sequence. */ static int wakeup_secondary_cpu_via_init(u32 phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; int num_starts, j, maxlvt; preempt_disable(); maxlvt = lapic_get_maxlvt(); send_init_sequence(phys_apicid); mb(); /* * Should we send STARTUP IPIs ? * * Determine this based on the APIC version. * If we don't have an integrated APIC, don't send the STARTUP IPIs. */ if (APIC_INTEGRATED(boot_cpu_apic_version)) num_starts = 2; else num_starts = 0; /* * Run STARTUP IPI loop. */ pr_debug("#startup loops: %d\n", num_starts); for (j = 1; j <= num_starts; j++) { pr_debug("Sending STARTUP #%d\n", j); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); apic_read(APIC_ESR); pr_debug("After apic_write\n"); /* * STARTUP IPI */ /* Target chip */ /* Boot on the stack */ /* Kick the second */ apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), phys_apicid); /* * Give the other CPU some time to accept the IPI. */ if (init_udelay == 0) udelay(10); else udelay(300); pr_debug("Startup point 1\n"); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); /* * Give the other CPU some time to accept the IPI. */ if (init_udelay == 0) udelay(10); else udelay(200); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); if (send_status || accept_status) break; } pr_debug("After Startup\n"); if (send_status) pr_err("APIC never delivered???\n"); if (accept_status) pr_err("APIC delivery error (%lx)\n", accept_status); preempt_enable(); return (send_status | accept_status); } /* reduce the number of lines printed when booting a large cpu count system */ static void announce_cpu(int cpu, int apicid) { static int width, node_width, first = 1; static int current_node = NUMA_NO_NODE; int node = early_cpu_to_node(cpu); if (!width) width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */ if (!node_width) node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */ if (system_state < SYSTEM_RUNNING) { if (first) pr_info("x86: Booting SMP configuration:\n"); if (node != current_node) { if (current_node > (-1)) pr_cont("\n"); current_node = node; printk(KERN_INFO ".... node %*s#%d, CPUs: ", node_width - num_digits(node), " ", node); } /* Add padding for the BSP */ if (first) pr_cont("%*s", width + 1, " "); first = 0; pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu); } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", node, cpu, apicid); } int common_cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; /* Just in case we booted with a single CPU. */ alternatives_enable_smp(); per_cpu(pcpu_hot.current_task, cpu) = idle; cpu_init_stack_canary(cpu, idle); /* Initialize the interrupt stack(s) */ ret = irq_init_percpu_irqstack(cpu); if (ret) return ret; #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); #endif return 0; } /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. * Returns zero if startup was successfully sent, else error code from * ->wakeup_secondary_cpu. */ static int do_boot_cpu(u32 apicid, int cpu, struct task_struct *idle) { unsigned long start_ip = real_mode_header->trampoline_start; int ret; #ifdef CONFIG_X86_64 /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */ if (apic->wakeup_secondary_cpu_64) start_ip = real_mode_header->trampoline_start64; #endif idle->thread.sp = (unsigned long)task_pt_regs(idle); initial_code = (unsigned long)start_secondary; if (IS_ENABLED(CONFIG_X86_32)) { early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); initial_stack = idle->thread.sp; } else if (!(smpboot_control & STARTUP_PARALLEL_MASK)) { smpboot_control = cpu; } /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); /* So we see what's up */ announce_cpu(cpu, apicid); /* * This grunge runs the startup process for * the targeted processor. */ if (x86_platform.legacy.warm_reset) { pr_debug("Setting warm reset code and vector.\n"); smpboot_setup_warm_reset_vector(start_ip); /* * Be paranoid about clearing APIC errors. */ if (APIC_INTEGRATED(boot_cpu_apic_version)) { apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } } smp_mb(); /* * Wake up a CPU in difference cases: * - Use a method from the APIC driver if one defined, with wakeup * straight to 64-bit mode preferred over wakeup to RM. * Otherwise, * - Use an INIT boot APIC message */ if (apic->wakeup_secondary_cpu_64) ret = apic->wakeup_secondary_cpu_64(apicid, start_ip); else if (apic->wakeup_secondary_cpu) ret = apic->wakeup_secondary_cpu(apicid, start_ip); else ret = wakeup_secondary_cpu_via_init(apicid, start_ip); /* If the wakeup mechanism failed, cleanup the warm reset vector */ if (ret) arch_cpuhp_cleanup_kick_cpu(cpu); return ret; } int native_kick_ap(unsigned int cpu, struct task_struct *tidle) { u32 apicid = apic->cpu_present_to_apicid(cpu); int err; lockdep_assert_irqs_enabled(); pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); if (apicid == BAD_APICID || !apic_id_valid(apicid)) { pr_err("CPU %u has invalid APIC ID %x. Aborting bringup\n", cpu, apicid); return -EINVAL; } if (!test_bit(apicid, phys_cpu_present_map)) { pr_err("CPU %u APIC ID %x is not present. Aborting bringup\n", cpu, apicid); return -EINVAL; } /* * Save current MTRR state in case it was changed since early boot * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: */ mtrr_save_state(); /* the FPU context is blank, nobody can own it */ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; err = common_cpu_up(cpu, tidle); if (err) return err; err = do_boot_cpu(apicid, cpu, tidle); if (err) pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); return err; } int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle) { return smp_ops.kick_ap_alive(cpu, tidle); } void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { /* Cleanup possible dangling ends... */ if (smp_ops.kick_ap_alive == native_kick_ap && x86_platform.legacy.warm_reset) smpboot_restore_warm_reset_vector(); } void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { if (smp_ops.cleanup_dead_cpu) smp_ops.cleanup_dead_cpu(cpu); if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); } void arch_cpuhp_sync_state_poll(void) { if (smp_ops.poll_sync_state) smp_ops.poll_sync_state(); } /** * arch_disable_smp_support() - Disables SMP support for x86 at boottime */ void __init arch_disable_smp_support(void) { disable_ioapic_support(); } /* * Fall back to non SMP mode after errors. * * RED-PEN audit/test this more. I bet there is more state messed up here. */ static __init void disable_smp(void) { pr_info("SMP disabled\n"); disable_ioapic_support(); topology_reset_possible_cpus_up(); cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); cpumask_set_cpu(0, topology_die_cpumask(0)); } void __init smp_prepare_cpus_common(void) { unsigned int cpu, node; /* Mark all except the boot CPU as hotpluggable */ for_each_possible_cpu(cpu) { if (cpu) per_cpu(cpu_info.cpu_index, cpu) = nr_cpu_ids; } for_each_possible_cpu(cpu) { node = cpu_to_node(cpu); zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), GFP_KERNEL, node); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, node); zalloc_cpumask_var_node(&per_cpu(cpu_die_map, cpu), GFP_KERNEL, node); zalloc_cpumask_var_node(&per_cpu(cpu_llc_shared_map, cpu), GFP_KERNEL, node); zalloc_cpumask_var_node(&per_cpu(cpu_l2c_shared_map, cpu), GFP_KERNEL, node); } set_cpu_sibling_map(0); } void __init smp_prepare_boot_cpu(void) { smp_ops.smp_prepare_boot_cpu(); } #ifdef CONFIG_X86_64 /* Establish whether parallel bringup can be supported. */ bool __init arch_cpuhp_init_parallel_bringup(void) { if (!x86_cpuinit.parallel_bringup) { pr_info("Parallel CPU startup disabled by the platform\n"); return false; } smpboot_control = STARTUP_READ_APICID; pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control); return true; } #endif /* * Prepare for SMP bootup. * @max_cpus: configured maximum number of CPUs, It is a legacy parameter * for common interface support. */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { smp_prepare_cpus_common(); switch (apic_intr_mode) { case APIC_PIC: case APIC_VIRTUAL_WIRE_NO_CONFIG: disable_smp(); return; case APIC_SYMMETRIC_IO_NO_ROUTING: disable_smp(); /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); return; case APIC_VIRTUAL_WIRE: case APIC_SYMMETRIC_IO: break; } /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); uv_system_init(); smp_quirk_init_udelay(); speculative_store_bypass_ht_init(); snp_set_wakeup_secondary_cpu(); } void arch_thaw_secondary_cpus_begin(void) { set_cache_aps_delayed_init(true); } void arch_thaw_secondary_cpus_end(void) { cache_aps_init(); } /* * Early setup to make printk work. */ void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); /* SMP handles this from setup_per_cpu_areas() */ if (!IS_ENABLED(CONFIG_SMP)) switch_gdt_and_percpu_base(me); native_pv_lock_init(); } void __init native_smp_cpus_done(unsigned int max_cpus) { pr_debug("Boot done\n"); build_sched_topology(); nmi_selftest(); impress_friends(); cache_aps_init(); } /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } #ifdef CONFIG_HOTPLUG_CPU /* Recompute SMT state for all CPUs on offline */ static void recompute_smt_state(void) { int max_threads, cpu; max_threads = 0; for_each_online_cpu (cpu) { int threads = cpumask_weight(topology_sibling_cpumask(cpu)); if (threads > max_threads) max_threads = threads; } __max_smt_threads = max_threads; } static void remove_siblinginfo(int cpu) { int sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); for_each_cpu(sibling, topology_core_cpumask(cpu)) { cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); /*/ * last thread sibling in this cpu core going down */ if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1) cpu_data(sibling).booted_cores--; } for_each_cpu(sibling, topology_die_cpumask(cpu)) cpumask_clear_cpu(cpu, topology_die_cpumask(sibling)); for_each_cpu(sibling, topology_sibling_cpumask(cpu)) { cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1) cpu_data(sibling).smt_active = false; } for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); for_each_cpu(sibling, cpu_l2c_shared_mask(cpu)) cpumask_clear_cpu(cpu, cpu_l2c_shared_mask(sibling)); cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(cpu_l2c_shared_mask(cpu)); cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); cpumask_clear(topology_die_cpumask(cpu)); c->topo.core_id = 0; c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); } static void remove_cpu_from_maps(int cpu) { set_cpu_online(cpu, false); numa_remove_cpu(cpu); } void cpu_disable_common(void) { int cpu = smp_processor_id(); remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ lock_vector_lock(); remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); lapic_offline(); } int native_cpu_disable(void) { int ret; ret = lapic_can_unplug_cpu(); if (ret) return ret; cpu_disable_common(); /* * Disable the local APIC. Otherwise IPI broadcasts will reach * it. It still responds normally to INIT, NMI, SMI, and SIPI * messages. * * Disabling the APIC must happen after cpu_disable_common() * which invokes fixup_irqs(). * * Disabling the APIC preserves already set bits in IRR, but * an interrupt arriving after disabling the local APIC does not * set the corresponding IRR bit. * * fixup_irqs() scans IRR for set bits so it can raise a not * yet handled interrupt on the new destination CPU via an IPI * but obviously it can't do so for IRR bits which are not set. * IOW, interrupts arriving after disabling the local APIC will * be lost. */ apic_soft_disable(); return 0; } void play_dead_common(void) { idle_task_exit(); cpuhp_ap_report_dead(); local_irq_disable(); } /* * We need to flush the caches before going to sleep, lest we have * dirty data in our caches when we come back up. */ static inline void mwait_play_dead(void) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; int i; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) return; if (!this_cpu_has(X86_FEATURE_MWAIT)) return; if (!this_cpu_has(X86_FEATURE_CLFLUSH)) return; if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) return; eax = CPUID_MWAIT_LEAF; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); /* * eax will be 0 if EDX enumeration is not valid. * Initialized below to cstate, sub_cstate value when EDX is valid. */ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { eax = 0; } else { edx >>= MWAIT_SUBSTATE_SIZE; for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { if (edx & MWAIT_SUBSTATE_MASK) { highest_cstate = i; highest_subcstate = edx & MWAIT_SUBSTATE_MASK; } } eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | (highest_subcstate - 1); } /* Set up state for the kexec() hack below */ md->status = CPUDEAD_MWAIT_WAIT; md->control = CPUDEAD_MWAIT_WAIT; wbinvd(); while (1) { /* * The CLFLUSH is a workaround for erratum AAI65 for * the Xeon 7400 series. It's not clear it is actually * needed, but it should be harmless in either case. * The WBINVD is insufficient due to the spurious-wakeup * case where we return around the loop. */ mb(); clflush(md); mb(); __monitor(md, 0, 0); mb(); __mwait(eax, 0); if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) { /* * Kexec is about to happen. Don't go back into mwait() as * the kexec kernel might overwrite text and data including * page tables and stack. So mwait() would resume when the * monitor cache line is written to and then the CPU goes * south due to overwritten text, page tables and stack. * * Note: This does _NOT_ protect against a stray MCE, NMI, * SMI. They will resume execution at the instruction * following the HLT instruction and run into the problem * which this is trying to prevent. */ WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT); while(1) native_halt(); } } } /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). */ void smp_kick_mwait_play_dead(void) { u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT; struct mwait_cpu_dead *md; unsigned int cpu, i; for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) { md = per_cpu_ptr(&mwait_cpu_dead, cpu); /* Does it sit in mwait_play_dead() ? */ if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT) continue; /* Wait up to 5ms */ for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { /* Bring it out of mwait */ WRITE_ONCE(md->control, newstate); udelay(5); } if (READ_ONCE(md->status) != newstate) pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu); } } void __noreturn hlt_play_dead(void) { if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); while (1) native_halt(); } /* * native_play_dead() is essentially a __noreturn function, but it can't * be marked as such as the compiler may complain about it. */ void native_play_dead(void) { if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) __update_spec_ctrl(0); play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); mwait_play_dead(); if (cpuidle_play_dead()) hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ int native_cpu_disable(void) { return -ENOSYS; } void native_play_dead(void) { BUG(); } #endif |
18 17 18 3 17 18 18 17 18 18 18 96 62 31 69 59 53 53 96 96 30 28 12 31 13 8 53 51 63 2 96 93 18 18 2 2 6 2 19 19 5 5 5 5 5 19 19 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 | // SPDX-License-Identifier: GPL-2.0+ /* * Meta data file for NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. */ #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/mm.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/swap.h> #include <linux/slab.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" #include "alloc.h" /* nilfs_palloc_destroy_cache() */ #include <trace/events/nilfs2.h> #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) static int nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, struct buffer_head *bh, void (*init_block)(struct inode *, struct buffer_head *, void *)) { struct nilfs_inode_info *ii = NILFS_I(inode); void *kaddr; int ret; /* Caller exclude read accesses using page lock */ /* set_buffer_new(bh); */ bh->b_blocknr = 0; ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh); if (unlikely(ret)) return ret; set_buffer_mapped(bh); kaddr = kmap_local_page(bh->b_page); memset(kaddr + bh_offset(bh), 0, i_blocksize(inode)); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); kunmap_local(kaddr); set_buffer_uptodate(bh); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block); return 0; } static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, struct buffer_head **out_bh, void (*init_block)(struct inode *, struct buffer_head *, void *)) { struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; nilfs_transaction_begin(sb, &ti, 0); err = -ENOMEM; bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0); if (unlikely(!bh)) goto failed_unlock; err = -EEXIST; if (buffer_uptodate(bh)) goto failed_bh; wait_on_buffer(bh); if (buffer_uptodate(bh)) goto failed_bh; bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); *out_bh = bh; } failed_bh: folio_unlock(bh->b_folio); folio_put(bh->b_folio); brelse(bh); failed_unlock: if (likely(!err)) err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); return err; } static int nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf, struct buffer_head **out_bh) { struct buffer_head *bh; __u64 blknum = 0; int ret = -ENOMEM; bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); if (unlikely(!bh)) goto failed; ret = -EEXIST; /* internal code */ if (buffer_uptodate(bh)) goto out; if (opf & REQ_RAHEAD) { if (!trylock_buffer(bh)) { ret = -EBUSY; goto failed_bh; } } else /* opf == REQ_OP_READ */ lock_buffer(bh); if (buffer_uptodate(bh)) { unlock_buffer(bh); goto out; } ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum); if (unlikely(ret)) { unlock_buffer(bh); goto failed_bh; } map_bh(bh, inode->i_sb, (sector_t)blknum); bh->b_end_io = end_buffer_read_sync; get_bh(bh); submit_bh(opf, bh); ret = 0; trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, opf & REQ_OP_MASK); out: get_bh(bh); *out_bh = bh; failed_bh: folio_unlock(bh->b_folio); folio_put(bh->b_folio); brelse(bh); failed: return ret; } static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, int readahead, struct buffer_head **out_bh) { struct buffer_head *first_bh, *bh; unsigned long blkoff; int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; int err; err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, &first_bh); if (err == -EEXIST) /* internal code */ goto out; if (unlikely(err)) goto failed; if (readahead) { blkoff = block + 1; for (i = 0; i < nr_ra_blocks; i++, blkoff++) { err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ | REQ_RAHEAD, &bh); if (likely(!err || err == -EEXIST)) brelse(bh); else if (err != -EBUSY) break; /* abort readahead if bmap lookup failed */ if (!buffer_locked(first_bh)) goto out_no_wait; } } wait_on_buffer(first_bh); out_no_wait: err = -EIO; if (!buffer_uptodate(first_bh)) { nilfs_err(inode->i_sb, "I/O error reading meta-data file (ino=%lu, block-offset=%lu)", inode->i_ino, block); goto failed_bh; } out: *out_bh = first_bh; return 0; failed_bh: brelse(first_bh); failed: return err; } /** * nilfs_mdt_get_block - read or create a buffer on meta data file. * @inode: inode of the meta data file * @blkoff: block offset * @create: create flag * @init_block: initializer used for newly allocated block * @out_bh: output of a pointer to the buffer_head * * nilfs_mdt_get_block() looks up the specified buffer and tries to create * a new buffer if @create is not zero. On success, the returned buffer is * assured to be either existing or formatted using a buffer lock on success. * @out_bh is substituted only when zero is returned. * * Return Value: On success, it returns 0. On error, the following negative * error code is returned. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error * * %-ENOENT - the specified block does not exist (hole block) * * %-EROFS - Read only filesystem (for create mode) */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, void (*init_block)(struct inode *, struct buffer_head *, void *), struct buffer_head **out_bh) { int ret; /* Should be rewritten with merging nilfs_mdt_read_block() */ retry: ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh); if (!create || ret != -ENOENT) return ret; ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block); if (unlikely(ret == -EEXIST)) { /* create = 0; */ /* limit read-create loop retries */ goto retry; } return ret; } /** * nilfs_mdt_find_block - find and get a buffer on meta data file. * @inode: inode of the meta data file * @start: start block offset (inclusive) * @end: end block offset (inclusive) * @blkoff: block offset * @out_bh: place to store a pointer to buffer_head struct * * nilfs_mdt_find_block() looks up an existing block in range of * [@start, @end] and stores pointer to a buffer head of the block to * @out_bh, and block offset to @blkoff, respectively. @out_bh and * @blkoff are substituted only when zero is returned. * * Return Value: On success, it returns 0. On error, the following negative * error code is returned. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error * * %-ENOENT - no block was found in the range */ int nilfs_mdt_find_block(struct inode *inode, unsigned long start, unsigned long end, unsigned long *blkoff, struct buffer_head **out_bh) { __u64 next; int ret; if (unlikely(start > end)) return -ENOENT; ret = nilfs_mdt_read_block(inode, start, true, out_bh); if (!ret) { *blkoff = start; goto out; } if (unlikely(ret != -ENOENT || start == ULONG_MAX)) goto out; ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next); if (!ret) { if (next <= end) { ret = nilfs_mdt_read_block(inode, next, true, out_bh); if (!ret) *blkoff = next; } else { ret = -ENOENT; } } out: return ret; } /** * nilfs_mdt_delete_block - make a hole on the meta data file. * @inode: inode of the meta data file * @block: block offset * * Return Value: On success, zero is returned. * On error, one of the following negative error code is returned. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { struct nilfs_inode_info *ii = NILFS_I(inode); int err; err = nilfs_bmap_delete(ii->i_bmap, block); if (!err || err == -ENOENT) { nilfs_mdt_mark_dirty(inode); nilfs_mdt_forget_block(inode, block); } return err; } /** * nilfs_mdt_forget_block - discard dirty state and try to remove the page * @inode: inode of the meta data file * @block: block offset * * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and * tries to release the page including the buffer from a page cache. * * Return Value: On success, 0 is returned. On error, one of the following * negative error code is returned. * * %-EBUSY - page has an active buffer. * * %-ENOENT - page cache has no page addressed by the offset. */ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) { pgoff_t index = block >> (PAGE_SHIFT - inode->i_blkbits); struct folio *folio; struct buffer_head *bh; int ret = 0; int still_dirty; folio = filemap_lock_folio(inode->i_mapping, index); if (IS_ERR(folio)) return -ENOENT; folio_wait_writeback(folio); bh = folio_buffers(folio); if (bh) { unsigned long first_block = index << (PAGE_SHIFT - inode->i_blkbits); bh = get_nth_bh(bh, block - first_block); nilfs_forget_buffer(bh); } still_dirty = folio_test_dirty(folio); folio_unlock(folio); folio_put(folio); if (still_dirty || invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) ret = -EBUSY; return ret; } int nilfs_mdt_fetch_dirty(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) { set_bit(NILFS_I_DIRTY, &ii->i_state); return 1; } return test_bit(NILFS_I_DIRTY, &ii->i_state); } static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { struct folio *folio = page_folio(page); struct inode *inode = folio->mapping->host; struct super_block *sb; int err = 0; if (inode && sb_rdonly(inode->i_sb)) { /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we * have dirty folios that try to be flushed in background. * So, here we simply discard this dirty folio. */ nilfs_clear_folio_dirty(folio, false); folio_unlock(folio); return -EROFS; } folio_redirty_for_writepage(wbc, folio); folio_unlock(folio); if (!inode) return 0; sb = inode->i_sb; if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); else if (wbc->for_reclaim) nilfs_flush_segment(sb, inode->i_ino); return err; } static const struct address_space_operations def_mdt_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .writepage = nilfs_mdt_write_page, }; static const struct inode_operations def_mdt_iops; static const struct file_operations def_mdt_fops; int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) { struct nilfs_mdt_info *mi; mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); if (!mi) return -ENOMEM; init_rwsem(&mi->mi_sem); inode->i_private = mi; inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, gfp_mask); inode->i_op = &def_mdt_iops; inode->i_fop = &def_mdt_fops; inode->i_mapping->a_ops = &def_mdt_aops; return 0; } /** * nilfs_mdt_clear - do cleanup for the metadata file * @inode: inode of the metadata file */ void nilfs_mdt_clear(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mdi->mi_shadow; if (mdi->mi_palloc_cache) nilfs_palloc_destroy_cache(inode); if (shadow) { struct inode *s_inode = shadow->inode; shadow->inode = NULL; iput(s_inode); mdi->mi_shadow = NULL; } } /** * nilfs_mdt_destroy - release resources used by the metadata file * @inode: inode of the metadata file */ void nilfs_mdt_destroy(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ kfree(mdi); } void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size, unsigned int header_size) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); mi->mi_entry_size = entry_size; mi->mi_entries_per_block = i_blocksize(inode) / entry_size; mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } /** * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file * @inode: inode of the metadata file * @shadow: shadow mapping */ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct inode *s_inode; INIT_LIST_HEAD(&shadow->frozen_buffers); s_inode = nilfs_iget_for_shadow(inode); if (IS_ERR(s_inode)) return PTR_ERR(s_inode); shadow->inode = s_inode; mi->mi_shadow = shadow; return 0; } /** * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map * @inode: inode of the metadata file */ int nilfs_mdt_save_to_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; struct inode *s_inode = shadow->inode; int ret; ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); if (ret) goto out; ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, ii->i_assoc_inode->i_mapping); if (ret) goto out; nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store); out: return ret; } int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen; struct folio *folio; int blkbits = inode->i_blkbits; folio = filemap_grab_folio(shadow->inode->i_mapping, bh->b_folio->index); if (IS_ERR(folio)) return PTR_ERR(folio); bh_frozen = folio_buffers(folio); if (!bh_frozen) bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0); bh_frozen = get_nth_bh(bh_frozen, bh_offset(bh) >> blkbits); if (!buffer_uptodate(bh_frozen)) nilfs_copy_buffer(bh_frozen, bh); if (list_empty(&bh_frozen->b_assoc_buffers)) { list_add_tail(&bh_frozen->b_assoc_buffers, &shadow->frozen_buffers); set_buffer_nilfs_redirected(bh); } else { brelse(bh_frozen); /* already frozen */ } folio_unlock(folio); folio_put(folio); return 0; } struct buffer_head * nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen = NULL; struct folio *folio; int n; folio = filemap_lock_folio(shadow->inode->i_mapping, bh->b_folio->index); if (!IS_ERR(folio)) { bh_frozen = folio_buffers(folio); if (bh_frozen) { n = bh_offset(bh) >> inode->i_blkbits; bh_frozen = get_nth_bh(bh_frozen, n); } folio_unlock(folio); folio_put(folio); } return bh_frozen; } static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow) { struct list_head *head = &shadow->frozen_buffers; struct buffer_head *bh; while (!list_empty(head)) { bh = list_first_entry(head, struct buffer_head, b_assoc_buffers); list_del_init(&bh->b_assoc_buffers); brelse(bh); /* drop ref-count to make it releasable */ } } /** * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state * @inode: inode of the metadata file */ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; down_write(&mi->mi_sem); if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); nilfs_clear_dirty_pages(inode->i_mapping, true); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); up_write(&mi->mi_sem); } /** * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches * @inode: inode of the metadata file */ void nilfs_mdt_clear_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; down_write(&mi->mi_sem); nilfs_release_frozen_buffers(shadow); truncate_inode_pages(shadow->inode->i_mapping, 0); truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); up_write(&mi->mi_sem); } |
16 16 16 16 16 16 16 16 16 15 16 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | // SPDX-License-Identifier: GPL-2.0-only /* * * Authors: * Alexander Aring <aar@pengutronix.de> * * Based on: net/mac80211/util.c */ #include "ieee802154_i.h" #include "driver-ops.h" /* privid for wpan_phys to determine whether they belong to us or not */ const void *const mac802154_wpan_phy_privid = &mac802154_wpan_phy_privid; /** * ieee802154_wake_queue - wake ieee802154 queue * @hw: main hardware object * * Tranceivers usually have either one transmit framebuffer or one framebuffer * for both transmitting and receiving. Hence, the core currently only handles * one frame at a time for each phy, which means we had to stop the queue to * avoid new skb to come during the transmission. The queue then needs to be * woken up after the operation. */ static void ieee802154_wake_queue(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); struct ieee802154_sub_if_data *sdata; rcu_read_lock(); clear_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags); list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!sdata->dev) continue; netif_wake_queue(sdata->dev); } rcu_read_unlock(); } /** * ieee802154_stop_queue - stop ieee802154 queue * @hw: main hardware object * * Tranceivers usually have either one transmit framebuffer or one framebuffer * for both transmitting and receiving. Hence, the core currently only handles * one frame at a time for each phy, which means we need to tell upper layers to * stop giving us new skbs while we are busy with the transmitted one. The queue * must then be stopped before transmitting. */ static void ieee802154_stop_queue(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); struct ieee802154_sub_if_data *sdata; rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!sdata->dev) continue; netif_stop_queue(sdata->dev); } rcu_read_unlock(); } void ieee802154_hold_queue(struct ieee802154_local *local) { unsigned long flags; spin_lock_irqsave(&local->phy->queue_lock, flags); if (!atomic_fetch_inc(&local->phy->hold_txs)) ieee802154_stop_queue(&local->hw); spin_unlock_irqrestore(&local->phy->queue_lock, flags); } void ieee802154_release_queue(struct ieee802154_local *local) { unsigned long flags; spin_lock_irqsave(&local->phy->queue_lock, flags); if (atomic_dec_and_test(&local->phy->hold_txs)) ieee802154_wake_queue(&local->hw); spin_unlock_irqrestore(&local->phy->queue_lock, flags); } void ieee802154_disable_queue(struct ieee802154_local *local) { struct ieee802154_sub_if_data *sdata; rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!sdata->dev) continue; netif_tx_disable(sdata->dev); } rcu_read_unlock(); } enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer) { struct ieee802154_local *local = container_of(timer, struct ieee802154_local, ifs_timer); ieee802154_release_queue(local); return HRTIMER_NORESTART; } void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling) { struct ieee802154_local *local = hw_to_local(hw); local->tx_result = IEEE802154_SUCCESS; if (ifs_handling) { u8 max_sifs_size; /* If transceiver sets CRC on his own we need to use lifs * threshold len above 16 otherwise 18, because it's not * part of skb->len. */ if (hw->flags & IEEE802154_HW_TX_OMIT_CKSUM) max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE - IEEE802154_FCS_LEN; else max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE; if (skb->len > max_sifs_size) hrtimer_start(&local->ifs_timer, hw->phy->lifs_period * NSEC_PER_USEC, HRTIMER_MODE_REL); else hrtimer_start(&local->ifs_timer, hw->phy->sifs_period * NSEC_PER_USEC, HRTIMER_MODE_REL); } else { ieee802154_release_queue(local); } dev_consume_skb_any(skb); if (atomic_dec_and_test(&hw->phy->ongoing_txs)) wake_up(&hw->phy->sync_txq); } EXPORT_SYMBOL(ieee802154_xmit_complete); void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, int reason) { struct ieee802154_local *local = hw_to_local(hw); local->tx_result = reason; ieee802154_release_queue(local); dev_kfree_skb_any(skb); if (atomic_dec_and_test(&hw->phy->ongoing_txs)) wake_up(&hw->phy->sync_txq); } EXPORT_SYMBOL(ieee802154_xmit_error); void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb) { ieee802154_xmit_error(hw, skb, IEEE802154_SYSTEM_ERROR); } EXPORT_SYMBOL(ieee802154_xmit_hw_error); void ieee802154_stop_device(struct ieee802154_local *local) { flush_workqueue(local->workqueue); hrtimer_cancel(&local->ifs_timer); drv_stop(local); } |
29 29 29 2 2 29 29 29 29 29 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | // SPDX-License-Identifier: GPL-2.0-or-later /* Request key authorisation token key definition. * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/security/keys/request-key.rst */ #include <linux/sched.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/uaccess.h> #include "internal.h" #include <keys/request_key_auth-type.h> static int request_key_auth_preparse(struct key_preparsed_payload *); static void request_key_auth_free_preparse(struct key_preparsed_payload *); static int request_key_auth_instantiate(struct key *, struct key_preparsed_payload *); static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); static long request_key_auth_read(const struct key *, char *, size_t); /* * The request-key authorisation key type definition. */ struct key_type key_type_request_key_auth = { .name = ".request_key_auth", .def_datalen = sizeof(struct request_key_auth), .preparse = request_key_auth_preparse, .free_preparse = request_key_auth_free_preparse, .instantiate = request_key_auth_instantiate, .describe = request_key_auth_describe, .revoke = request_key_auth_revoke, .destroy = request_key_auth_destroy, .read = request_key_auth_read, }; static int request_key_auth_preparse(struct key_preparsed_payload *prep) { return 0; } static void request_key_auth_free_preparse(struct key_preparsed_payload *prep) { } /* * Instantiate a request-key authorisation key. */ static int request_key_auth_instantiate(struct key *key, struct key_preparsed_payload *prep) { rcu_assign_keypointer(key, (struct request_key_auth *)prep->data); return 0; } /* * Describe an authorisation token. */ static void request_key_auth_describe(const struct key *key, struct seq_file *m) { struct request_key_auth *rka = dereference_key_rcu(key); if (!rka) return; seq_puts(m, "key:"); seq_puts(m, key->description); if (key_is_positive(key)) seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } /* * Read the callout_info data (retrieves the callout information). * - the key's semaphore is read-locked */ static long request_key_auth_read(const struct key *key, char *buffer, size_t buflen) { struct request_key_auth *rka = dereference_key_locked(key); size_t datalen; long ret; if (!rka) return -EKEYREVOKED; datalen = rka->callout_len; ret = datalen; /* we can return the data as is */ if (buffer && buflen > 0) { if (buflen > datalen) buflen = datalen; memcpy(buffer, rka->callout_info, buflen); } return ret; } static void free_request_key_auth(struct request_key_auth *rka) { if (!rka) return; key_put(rka->target_key); key_put(rka->dest_keyring); if (rka->cred) put_cred(rka->cred); kfree(rka->callout_info); kfree(rka); } /* * Dispose of the request_key_auth record under RCU conditions */ static void request_key_auth_rcu_disposal(struct rcu_head *rcu) { struct request_key_auth *rka = container_of(rcu, struct request_key_auth, rcu); free_request_key_auth(rka); } /* * Handle revocation of an authorisation token key. * * Called with the key sem write-locked. */ static void request_key_auth_revoke(struct key *key) { struct request_key_auth *rka = dereference_key_locked(key); kenter("{%d}", key->serial); rcu_assign_keypointer(key, NULL); call_rcu(&rka->rcu, request_key_auth_rcu_disposal); } /* * Destroy an instantiation authorisation token key. */ static void request_key_auth_destroy(struct key *key) { struct request_key_auth *rka = rcu_access_pointer(key->payload.rcu_data0); kenter("{%d}", key->serial); if (rka) { rcu_assign_keypointer(key, NULL); call_rcu(&rka->rcu, request_key_auth_rcu_disposal); } } /* * Create an authorisation token for /sbin/request-key or whoever to gain * access to the caller's security data. */ struct key *request_key_auth_new(struct key *target, const char *op, const void *callout_info, size_t callout_len, struct key *dest_keyring) { struct request_key_auth *rka, *irka; const struct cred *cred = current_cred(); struct key *authkey = NULL; char desc[20]; int ret = -ENOMEM; kenter("%d,", target->serial); /* allocate a auth record */ rka = kzalloc(sizeof(*rka), GFP_KERNEL); if (!rka) goto error; rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL); if (!rka->callout_info) goto error_free_rka; rka->callout_len = callout_len; strscpy(rka->op, op, sizeof(rka->op)); /* see if the calling process is already servicing the key request of * another process */ if (cred->request_key_auth) { /* it is - use that instantiation context here too */ down_read(&cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) { up_read(&cred->request_key_auth->sem); ret = -EKEYREVOKED; goto error_free_rka; } irka = cred->request_key_auth->payload.data[0]; rka->cred = get_cred(irka->cred); rka->pid = irka->pid; up_read(&cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ rka->cred = get_cred(cred); rka->pid = current->pid; } rka->target_key = key_get(target); rka->dest_keyring = key_get(dest_keyring); /* allocate the auth key */ sprintf(desc, "%x", target->serial); authkey = key_alloc(&key_type_request_key_auth, desc, cred->fsuid, cred->fsgid, cred, KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_POS_LINK | KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(authkey)) { ret = PTR_ERR(authkey); goto error_free_rka; } /* construct the auth key */ ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); if (ret < 0) goto error_put_authkey; kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage)); return authkey; error_put_authkey: key_put(authkey); error_free_rka: free_request_key_auth(rka); error: kleave("= %d", ret); return ERR_PTR(ret); } /* * Search the current process's keyrings for the authorisation key for * instantiation of a key. */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { char description[16]; struct keyring_search_context ctx = { .index_key.type = &key_type_request_key_auth, .index_key.description = description, .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_DO_STATE_CHECK | KEYRING_SEARCH_RECURSE), }; struct key *authkey; key_ref_t authkey_ref; ctx.index_key.desc_len = sprintf(description, "%x", target_id); rcu_read_lock(); authkey_ref = search_process_keyrings_rcu(&ctx); rcu_read_unlock(); if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); if (authkey == ERR_PTR(-EAGAIN)) authkey = ERR_PTR(-ENOKEY); goto error; } authkey = key_ref_to_ptr(authkey_ref); if (test_bit(KEY_FLAG_REVOKED, &authkey->flags)) { key_put(authkey); authkey = ERR_PTR(-EKEYREVOKED); } error: return authkey; } |
17 55 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* V4L2 device support header. Copyright (C) 2008 Hans Verkuil <hverkuil@xs4all.nl> */ #ifndef _V4L2_DEVICE_H #define _V4L2_DEVICE_H #include <media/media-device.h> #include <media/v4l2-subdev.h> #include <media/v4l2-dev.h> struct v4l2_ctrl_handler; /** * struct v4l2_device - main struct to for V4L2 device drivers * * @dev: pointer to struct device. * @mdev: pointer to struct media_device, may be NULL. * @subdevs: used to keep track of the registered subdevs * @lock: lock this struct; can be used by the driver as well * if this struct is embedded into a larger struct. * @name: unique device name, by default the driver name + bus ID * @notify: notify operation called by some sub-devices. * @ctrl_handler: The control handler. May be %NULL. * @prio: Device's priority state * @ref: Keep track of the references to this struct. * @release: Release function that is called when the ref count * goes to 0. * * Each instance of a V4L2 device should create the v4l2_device struct, * either stand-alone or embedded in a larger struct. * * It allows easy access to sub-devices (see v4l2-subdev.h) and provides * basic V4L2 device-level support. * * .. note:: * * #) @dev->driver_data points to this struct. * #) @dev might be %NULL if there is no parent device */ struct v4l2_device { struct device *dev; struct media_device *mdev; struct list_head subdevs; spinlock_t lock; char name[36]; void (*notify)(struct v4l2_subdev *sd, unsigned int notification, void *arg); struct v4l2_ctrl_handler *ctrl_handler; struct v4l2_prio_state prio; struct kref ref; void (*release)(struct v4l2_device *v4l2_dev); }; /** * v4l2_device_get - gets a V4L2 device reference * * @v4l2_dev: pointer to struct &v4l2_device * * This is an ancillary routine meant to increment the usage for the * struct &v4l2_device pointed by @v4l2_dev. */ static inline void v4l2_device_get(struct v4l2_device *v4l2_dev) { kref_get(&v4l2_dev->ref); } /** * v4l2_device_put - puts a V4L2 device reference * * @v4l2_dev: pointer to struct &v4l2_device * * This is an ancillary routine meant to decrement the usage for the * struct &v4l2_device pointed by @v4l2_dev. */ int v4l2_device_put(struct v4l2_device *v4l2_dev); /** * v4l2_device_register - Initialize v4l2_dev and make @dev->driver_data * point to @v4l2_dev. * * @dev: pointer to struct &device * @v4l2_dev: pointer to struct &v4l2_device * * .. note:: * @dev may be %NULL in rare cases (ISA devices). * In such case the caller must fill in the @v4l2_dev->name field * before calling this function. */ int __must_check v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); /** * v4l2_device_set_name - Optional function to initialize the * name field of struct &v4l2_device * * @v4l2_dev: pointer to struct &v4l2_device * @basename: base name for the device name * @instance: pointer to a static atomic_t var with the instance usage for * the device driver. * * v4l2_device_set_name() initializes the name field of struct &v4l2_device * using the driver name and a driver-global atomic_t instance. * * This function will increment the instance counter and returns the * instance value used in the name. * * Example: * * static atomic_t drv_instance = ATOMIC_INIT(0); * * ... * * instance = v4l2_device_set_name(&\ v4l2_dev, "foo", &\ drv_instance); * * The first time this is called the name field will be set to foo0 and * this function returns 0. If the name ends with a digit (e.g. cx18), * then the name will be set to cx18-0 since cx180 would look really odd. */ int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename, atomic_t *instance); /** * v4l2_device_disconnect - Change V4L2 device state to disconnected. * * @v4l2_dev: pointer to struct v4l2_device * * Should be called when the USB parent disconnects. * Since the parent disappears, this ensures that @v4l2_dev doesn't have * an invalid parent pointer. * * .. note:: This function sets @v4l2_dev->dev to NULL. */ void v4l2_device_disconnect(struct v4l2_device *v4l2_dev); /** * v4l2_device_unregister - Unregister all sub-devices and any other * resources related to @v4l2_dev. * * @v4l2_dev: pointer to struct v4l2_device */ void v4l2_device_unregister(struct v4l2_device *v4l2_dev); /** * v4l2_device_register_subdev - Registers a subdev with a v4l2 device. * * @v4l2_dev: pointer to struct &v4l2_device * @sd: pointer to &struct v4l2_subdev * * While registered, the subdev module is marked as in-use. * * An error is returned if the module is no longer loaded on any attempts * to register it. */ #define v4l2_device_register_subdev(v4l2_dev, sd) \ __v4l2_device_register_subdev(v4l2_dev, sd, THIS_MODULE) int __must_check __v4l2_device_register_subdev(struct v4l2_device *v4l2_dev, struct v4l2_subdev *sd, struct module *module); /** * v4l2_device_unregister_subdev - Unregisters a subdev with a v4l2 device. * * @sd: pointer to &struct v4l2_subdev * * .. note :: * * Can also be called if the subdev wasn't registered. In such * case, it will do nothing. */ void v4l2_device_unregister_subdev(struct v4l2_subdev *sd); /** * __v4l2_device_register_subdev_nodes - Registers device nodes for * all subdevs of the v4l2 device that are marked with the * %V4L2_SUBDEV_FL_HAS_DEVNODE flag. * * @v4l2_dev: pointer to struct v4l2_device * @read_only: subdevices read-only flag. True to register the subdevices * device nodes in read-only mode, false to allow full access to the * subdevice userspace API. */ int __must_check __v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev, bool read_only); /** * v4l2_device_register_subdev_nodes - Registers subdevices device nodes with * unrestricted access to the subdevice userspace operations * * Internally calls __v4l2_device_register_subdev_nodes(). See its documentation * for more details. * * @v4l2_dev: pointer to struct v4l2_device */ static inline int __must_check v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev) { #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API) return __v4l2_device_register_subdev_nodes(v4l2_dev, false); #else return 0; #endif } /** * v4l2_device_register_ro_subdev_nodes - Registers subdevices device nodes * in read-only mode * * Internally calls __v4l2_device_register_subdev_nodes(). See its documentation * for more details. * * @v4l2_dev: pointer to struct v4l2_device */ static inline int __must_check v4l2_device_register_ro_subdev_nodes(struct v4l2_device *v4l2_dev) { #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API) return __v4l2_device_register_subdev_nodes(v4l2_dev, true); #else return 0; #endif } /** * v4l2_subdev_notify - Sends a notification to v4l2_device. * * @sd: pointer to &struct v4l2_subdev * @notification: type of notification. Please notice that the notification * type is driver-specific. * @arg: arguments for the notification. Those are specific to each * notification type. */ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, unsigned int notification, void *arg) { if (sd && sd->v4l2_dev && sd->v4l2_dev->notify) sd->v4l2_dev->notify(sd, notification, arg); } /** * v4l2_device_supports_requests - Test if requests are supported. * * @v4l2_dev: pointer to struct v4l2_device */ static inline bool v4l2_device_supports_requests(struct v4l2_device *v4l2_dev) { return v4l2_dev->mdev && v4l2_dev->mdev->ops && v4l2_dev->mdev->ops->req_queue; } /* Helper macros to iterate over all subdevs. */ /** * v4l2_device_for_each_subdev - Helper macro that interates over all * sub-devices of a given &v4l2_device. * * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev pointer used as an iterator by the loop. * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * * This macro iterates over all sub-devices owned by the @v4l2_dev device. * It acts as a for loop iterator and executes the next statement with * the @sd variable pointing to each sub-device in turn. */ #define v4l2_device_for_each_subdev(sd, v4l2_dev) \ list_for_each_entry(sd, &(v4l2_dev)->subdevs, list) /** * __v4l2_device_call_subdevs_p - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev pointer used as an iterator by the loop. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_p(v4l2_dev, sd, cond, o, f, args...) \ do { \ list_for_each_entry((sd), &(v4l2_dev)->subdevs, list) \ if ((cond) && (sd)->ops->o && (sd)->ops->o->f) \ (sd)->ops->o->f((sd) , ##args); \ } while (0) /** * __v4l2_device_call_subdevs - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs(v4l2_dev, cond, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, cond, o, \ f , ##args); \ } while (0) /** * __v4l2_device_call_subdevs_until_err_p - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @sd: pointer that will be filled by the macro with all * &struct v4l2_subdev sub-devices associated with @v4l2_dev. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, zero * otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_until_err_p(v4l2_dev, sd, cond, o, f, args...) \ ({ \ long __err = 0; \ \ list_for_each_entry((sd), &(v4l2_dev)->subdevs, list) { \ if ((cond) && (sd)->ops->o && (sd)->ops->o->f) \ __err = (sd)->ops->o->f((sd) , ##args); \ if (__err && __err != -ENOIOCTLCMD) \ break; \ } \ (__err == -ENOIOCTLCMD) ? 0 : __err; \ }) /** * __v4l2_device_call_subdevs_until_err - Calls the specified operation for * all subdevs matching the condition. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @cond: condition to be match * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define __v4l2_device_call_subdevs_until_err(v4l2_dev, cond, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, cond, o, \ f , ##args); \ }) /** * v4l2_device_call_all - Calls the specified operation for * all subdevs matching the &v4l2_subdev.grp_id, as assigned * by the bridge driver. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_call_all(v4l2_dev, grpid, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) /** * v4l2_device_call_until_err - Calls the specified operation for * all subdevs matching the &v4l2_subdev.grp_id, as assigned * by the bridge driver, until an error occurs. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_call_until_err(v4l2_dev, grpid, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) /** * v4l2_device_mask_call_all - Calls the specified operation for * all subdevices where a group ID matches a specified bitmask. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Ignore any errors. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_mask_call_all(v4l2_dev, grpmsk, o, f, args...) \ do { \ struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ f , ##args); \ } while (0) /** * v4l2_device_mask_call_until_err - Calls the specified operation for * all subdevices where a group ID matches a specified bitmask. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. * @args: arguments for @f. * * Return: * * If the operation returns an error other than 0 or ``-ENOIOCTLCMD`` * for any subdevice, then abort and return with that error code, * zero otherwise. * * Note: subdevs cannot be added or deleted while walking * the subdevs list. */ #define v4l2_device_mask_call_until_err(v4l2_dev, grpmsk, o, f, args...) \ ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ f , ##args); \ }) /** * v4l2_device_has_op - checks if any subdev with matching grpid has a * given ops. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpid: &struct v4l2_subdev->grp_id group ID to match. * Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. */ #define v4l2_device_has_op(v4l2_dev, grpid, o, f) \ ({ \ struct v4l2_subdev *__sd; \ bool __result = false; \ list_for_each_entry(__sd, &(v4l2_dev)->subdevs, list) { \ if ((grpid) && __sd->grp_id != (grpid)) \ continue; \ if (v4l2_subdev_has_op(__sd, o, f)) { \ __result = true; \ break; \ } \ } \ __result; \ }) /** * v4l2_device_mask_has_op - checks if any subdev with matching group * mask has a given ops. * * @v4l2_dev: &struct v4l2_device owning the sub-devices to iterate over. * @grpmsk: bitmask to be checked against &struct v4l2_subdev->grp_id * group ID to be matched. Use 0 to match them all. * @o: name of the element at &struct v4l2_subdev_ops that contains @f. * Each element there groups a set of operations functions. * @f: operation function that will be called if @cond matches. * The operation functions are defined in groups, according to * each element at &struct v4l2_subdev_ops. */ #define v4l2_device_mask_has_op(v4l2_dev, grpmsk, o, f) \ ({ \ struct v4l2_subdev *__sd; \ bool __result = false; \ list_for_each_entry(__sd, &(v4l2_dev)->subdevs, list) { \ if ((grpmsk) && !(__sd->grp_id & (grpmsk))) \ continue; \ if (v4l2_subdev_has_op(__sd, o, f)) { \ __result = true; \ break; \ } \ } \ __result; \ }) #endif |
68 68 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2020 Facebook * Copyright 2020 Google LLC. */ #include <linux/pid.h> #include <linux/sched.h> #include <linux/rculist.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> #include <linux/bpf_local_storage.h> #include <linux/filter.h> #include <uapi/linux/btf.h> #include <linux/btf_ids.h> #include <linux/fdtable.h> #include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(task_cache); static DEFINE_PER_CPU(int, bpf_task_storage_busy); static void bpf_task_storage_lock(void) { migrate_disable(); this_cpu_inc(bpf_task_storage_busy); } static void bpf_task_storage_unlock(void) { this_cpu_dec(bpf_task_storage_busy); migrate_enable(); } static bool bpf_task_storage_trylock(void) { migrate_disable(); if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { this_cpu_dec(bpf_task_storage_busy); migrate_enable(); return false; } return true; } static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) { struct task_struct *task = owner; return &task->bpf_storage; } static struct bpf_local_storage_data * task_storage_lookup(struct task_struct *task, struct bpf_map *map, bool cacheit_lockit) { struct bpf_local_storage *task_storage; struct bpf_local_storage_map *smap; task_storage = rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); if (!task_storage) return NULL; smap = (struct bpf_local_storage_map *)map; return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); } void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; rcu_read_lock(); local_storage = rcu_dereference(task->bpf_storage); if (!local_storage) { rcu_read_unlock(); return; } bpf_task_storage_lock(); bpf_local_storage_destroy(local_storage); bpf_task_storage_unlock(); rcu_read_unlock(); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) { struct bpf_local_storage_data *sdata; struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return ERR_CAST(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); sdata = task_storage_lookup(task, map, true); bpf_task_storage_unlock(); put_pid(pid); return sdata ? sdata->data : NULL; out: put_pid(pid); return ERR_PTR(err); } static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, map_flags, GFP_ATOMIC); bpf_task_storage_unlock(); err = PTR_ERR_OR_ZERO(sdata); out: put_pid(pid); return err; } static int task_storage_delete(struct task_struct *task, struct bpf_map *map, bool nobusy) { struct bpf_local_storage_data *sdata; sdata = task_storage_lookup(task, map, false); if (!sdata) return -ENOENT; if (!nobusy) return -EBUSY; bpf_selem_unlink(SELEM(sdata), false); return 0; } static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) { struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); err = task_storage_delete(task, map, true); bpf_task_storage_unlock(); out: put_pid(pid); return err; } /* Called by bpf_task_storage_get*() helpers */ static void *__bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags, gfp_t gfp_flags, bool nobusy) { struct bpf_local_storage_data *sdata; sdata = task_storage_lookup(task, map, nobusy); if (sdata) return sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, gfp_flags); return IS_ERR(sdata) ? NULL : sdata->data; } return NULL; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { bool nobusy; void *data; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) return (unsigned long)NULL; nobusy = bpf_task_storage_trylock(); data = __bpf_task_storage_get(map, task, value, flags, gfp_flags, nobusy); if (nobusy) bpf_task_storage_unlock(); return (unsigned long)data; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { void *data; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) return (unsigned long)NULL; bpf_task_storage_lock(); data = __bpf_task_storage_get(map, task, value, flags, gfp_flags, true); bpf_task_storage_unlock(); return (unsigned long)data; } BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, task) { bool nobusy; int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; nobusy = bpf_task_storage_trylock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ ret = task_storage_delete(task, map, nobusy); if (nobusy) bpf_task_storage_unlock(); return ret; } BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, task) { int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; bpf_task_storage_lock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ ret = task_storage_delete(task, map, true); bpf_task_storage_unlock(); return ret; } static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -ENOTSUPP; } static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) { return bpf_local_storage_map_alloc(attr, &task_cache, true); } static void task_storage_map_free(struct bpf_map *map) { bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy); } BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map) const struct bpf_map_ops task_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, .map_alloc = task_storage_map_alloc, .map_free = task_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_pid_task_storage_lookup_elem, .map_update_elem = bpf_pid_task_storage_update_elem, .map_delete_elem = bpf_pid_task_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = task_storage_ptr, }; const struct bpf_func_proto bpf_task_storage_get_recur_proto = { .func = bpf_task_storage_get_recur, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_task_storage_get_proto = { .func = bpf_task_storage_get, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { .func = bpf_task_storage_delete_recur, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; const struct bpf_func_proto bpf_task_storage_delete_proto = { .func = bpf_task_storage_delete, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; |
5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | /* * Copyright © 2008 Intel Corporation * Copyright © 2016 Collabora Ltd * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Based on code from the i915 driver. * Original author: Damien Lespiau <damien.lespiau@intel.com> * */ #include <linux/circ_buf.h> #include <linux/ctype.h> #include <linux/debugfs.h> #include <linux/poll.h> #include <linux/uaccess.h> #include <drm/drm_crtc.h> #include <drm/drm_debugfs_crc.h> #include <drm/drm_drv.h> #include <drm/drm_print.h> #include "drm_internal.h" /** * DOC: CRC ABI * * DRM device drivers can provide to userspace CRC information of each frame as * it reached a given hardware component (a CRC sampling "source"). * * Userspace can control generation of CRCs in a given CRTC by writing to the * file dri/0/crtc-N/crc/control in debugfs, with N being the :ref:`index of * the CRTC<crtc_index>`. Accepted values are source names (which are * driver-specific) and the "auto" keyword, which will let the driver select a * default source of frame CRCs for this CRTC. * * Once frame CRC generation is enabled, userspace can capture them by reading * the dri/0/crtc-N/crc/data file. Each line in that file contains the frame * number in the first field and then a number of unsigned integer fields * containing the CRC data. Fields are separated by a single space and the number * of CRC fields is source-specific. * * Note that though in some cases the CRC is computed in a specified way and on * the frame contents as supplied by userspace (eDP 1.3), in general the CRC * computation is performed in an unspecified way and on frame contents that have * been already processed in also an unspecified way and thus userspace cannot * rely on being able to generate matching CRC values for the frame contents that * it submits. In this general case, the maximum userspace can do is to compare * the reported CRCs of frames that should have the same contents. * * On the driver side the implementation effort is minimal, drivers only need to * implement &drm_crtc_funcs.set_crc_source and &drm_crtc_funcs.verify_crc_source. * The debugfs files are automatically set up if those vfuncs are set. CRC samples * need to be captured in the driver by calling drm_crtc_add_crc_entry(). * Depending on the driver and HW requirements, &drm_crtc_funcs.set_crc_source * may result in a commit (even a full modeset). * * CRC results must be reliable across non-full-modeset atomic commits, so if a * commit via DRM_IOCTL_MODE_ATOMIC would disable or otherwise interfere with * CRC generation, then the driver must mark that commit as a full modeset * (drm_atomic_crtc_needs_modeset() should return true). As a result, to ensure * consistent results, generic userspace must re-setup CRC generation after a * legacy SETCRTC or an atomic commit with DRM_MODE_ATOMIC_ALLOW_MODESET. */ static int crc_control_show(struct seq_file *m, void *data) { struct drm_crtc *crtc = m->private; if (crtc->funcs->get_crc_sources) { size_t count; const char *const *sources = crtc->funcs->get_crc_sources(crtc, &count); size_t values_cnt; int i; if (count == 0 || !sources) goto out; for (i = 0; i < count; i++) if (!crtc->funcs->verify_crc_source(crtc, sources[i], &values_cnt)) { if (strcmp(sources[i], crtc->crc.source)) seq_printf(m, "%s\n", sources[i]); else seq_printf(m, "%s*\n", sources[i]); } } return 0; out: seq_printf(m, "%s*\n", crtc->crc.source); return 0; } static int crc_control_open(struct inode *inode, struct file *file) { struct drm_crtc *crtc = inode->i_private; return single_open(file, crc_control_show, crtc); } static ssize_t crc_control_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { struct seq_file *m = file->private_data; struct drm_crtc *crtc = m->private; struct drm_crtc_crc *crc = &crtc->crc; char *source; size_t values_cnt; int ret; if (len == 0) return 0; if (len > PAGE_SIZE - 1) { DRM_DEBUG_KMS("Expected < %lu bytes into crtc crc control\n", PAGE_SIZE); return -E2BIG; } source = memdup_user_nul(ubuf, len); if (IS_ERR(source)) return PTR_ERR(source); if (source[len - 1] == '\n') source[len - 1] = '\0'; ret = crtc->funcs->verify_crc_source(crtc, source, &values_cnt); if (ret) { kfree(source); return ret; } spin_lock_irq(&crc->lock); if (crc->opened) { spin_unlock_irq(&crc->lock); kfree(source); return -EBUSY; } kfree(crc->source); crc->source = source; spin_unlock_irq(&crc->lock); *offp += len; return len; } static const struct file_operations drm_crtc_crc_control_fops = { .owner = THIS_MODULE, .open = crc_control_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = crc_control_write }; static int crtc_crc_data_count(struct drm_crtc_crc *crc) { assert_spin_locked(&crc->lock); return CIRC_CNT(crc->head, crc->tail, DRM_CRC_ENTRIES_NR); } static void crtc_crc_cleanup(struct drm_crtc_crc *crc) { kfree(crc->entries); crc->overflow = false; crc->entries = NULL; crc->head = 0; crc->tail = 0; crc->values_cnt = 0; crc->opened = false; } static int crtc_crc_open(struct inode *inode, struct file *filep) { struct drm_crtc *crtc = inode->i_private; struct drm_crtc_crc *crc = &crtc->crc; struct drm_crtc_crc_entry *entries = NULL; size_t values_cnt; int ret = 0; if (drm_drv_uses_atomic_modeset(crtc->dev)) { ret = drm_modeset_lock_single_interruptible(&crtc->mutex); if (ret) return ret; if (!crtc->state->active) ret = -EIO; drm_modeset_unlock(&crtc->mutex); if (ret) return ret; } ret = crtc->funcs->verify_crc_source(crtc, crc->source, &values_cnt); if (ret) return ret; if (WARN_ON(values_cnt > DRM_MAX_CRC_NR)) return -EINVAL; if (WARN_ON(values_cnt == 0)) return -EINVAL; entries = kcalloc(DRM_CRC_ENTRIES_NR, sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; spin_lock_irq(&crc->lock); if (!crc->opened) { crc->opened = true; crc->entries = entries; crc->values_cnt = values_cnt; } else { ret = -EBUSY; } spin_unlock_irq(&crc->lock); if (ret) { kfree(entries); return ret; } ret = crtc->funcs->set_crc_source(crtc, crc->source); if (ret) goto err; return 0; err: spin_lock_irq(&crc->lock); crtc_crc_cleanup(crc); spin_unlock_irq(&crc->lock); return ret; } static int crtc_crc_release(struct inode *inode, struct file *filep) { struct drm_crtc *crtc = filep->f_inode->i_private; struct drm_crtc_crc *crc = &crtc->crc; /* terminate the infinite while loop if 'drm_dp_aux_crc_work' running */ spin_lock_irq(&crc->lock); crc->opened = false; spin_unlock_irq(&crc->lock); crtc->funcs->set_crc_source(crtc, NULL); spin_lock_irq(&crc->lock); crtc_crc_cleanup(crc); spin_unlock_irq(&crc->lock); return 0; } /* * 1 frame field of 10 chars plus a number of CRC fields of 10 chars each, space * separated, with a newline at the end and null-terminated. */ #define LINE_LEN(values_cnt) (10 + 11 * values_cnt + 1 + 1) #define MAX_LINE_LEN (LINE_LEN(DRM_MAX_CRC_NR)) static ssize_t crtc_crc_read(struct file *filep, char __user *user_buf, size_t count, loff_t *pos) { struct drm_crtc *crtc = filep->f_inode->i_private; struct drm_crtc_crc *crc = &crtc->crc; struct drm_crtc_crc_entry *entry; char buf[MAX_LINE_LEN]; int ret, i; spin_lock_irq(&crc->lock); if (!crc->source) { spin_unlock_irq(&crc->lock); return 0; } /* Nothing to read? */ while (crtc_crc_data_count(crc) == 0) { if (filep->f_flags & O_NONBLOCK) { spin_unlock_irq(&crc->lock); return -EAGAIN; } ret = wait_event_interruptible_lock_irq(crc->wq, crtc_crc_data_count(crc), crc->lock); if (ret) { spin_unlock_irq(&crc->lock); return ret; } } /* We know we have an entry to be read */ entry = &crc->entries[crc->tail]; if (count < LINE_LEN(crc->values_cnt)) { spin_unlock_irq(&crc->lock); return -EINVAL; } BUILD_BUG_ON_NOT_POWER_OF_2(DRM_CRC_ENTRIES_NR); crc->tail = (crc->tail + 1) & (DRM_CRC_ENTRIES_NR - 1); spin_unlock_irq(&crc->lock); if (entry->has_frame_counter) sprintf(buf, "0x%08x", entry->frame); else sprintf(buf, "XXXXXXXXXX"); for (i = 0; i < crc->values_cnt; i++) sprintf(buf + 10 + i * 11, " 0x%08x", entry->crcs[i]); sprintf(buf + 10 + crc->values_cnt * 11, "\n"); if (copy_to_user(user_buf, buf, LINE_LEN(crc->values_cnt))) return -EFAULT; return LINE_LEN(crc->values_cnt); } static __poll_t crtc_crc_poll(struct file *file, poll_table *wait) { struct drm_crtc *crtc = file->f_inode->i_private; struct drm_crtc_crc *crc = &crtc->crc; __poll_t ret = 0; poll_wait(file, &crc->wq, wait); spin_lock_irq(&crc->lock); if (crc->source && crtc_crc_data_count(crc)) ret |= EPOLLIN | EPOLLRDNORM; spin_unlock_irq(&crc->lock); return ret; } static const struct file_operations drm_crtc_crc_data_fops = { .owner = THIS_MODULE, .open = crtc_crc_open, .read = crtc_crc_read, .poll = crtc_crc_poll, .release = crtc_crc_release, }; void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc) { struct dentry *crc_ent; if (!crtc->funcs->set_crc_source || !crtc->funcs->verify_crc_source) return; crc_ent = debugfs_create_dir("crc", crtc->debugfs_entry); debugfs_create_file("control", S_IRUGO | S_IWUSR, crc_ent, crtc, &drm_crtc_crc_control_fops); debugfs_create_file("data", S_IRUGO, crc_ent, crtc, &drm_crtc_crc_data_fops); } /** * drm_crtc_add_crc_entry - Add entry with CRC information for a frame * @crtc: CRTC to which the frame belongs * @has_frame: whether this entry has a frame number to go with * @frame: number of the frame these CRCs are about * @crcs: array of CRC values, with length matching #drm_crtc_crc.values_cnt * * For each frame, the driver polls the source of CRCs for new data and calls * this function to add them to the buffer from where userspace reads. */ int drm_crtc_add_crc_entry(struct drm_crtc *crtc, bool has_frame, uint32_t frame, uint32_t *crcs) { struct drm_crtc_crc *crc = &crtc->crc; struct drm_crtc_crc_entry *entry; int head, tail; unsigned long flags; spin_lock_irqsave(&crc->lock, flags); /* Caller may not have noticed yet that userspace has stopped reading */ if (!crc->entries) { spin_unlock_irqrestore(&crc->lock, flags); return -EINVAL; } head = crc->head; tail = crc->tail; if (CIRC_SPACE(head, tail, DRM_CRC_ENTRIES_NR) < 1) { bool was_overflow = crc->overflow; crc->overflow = true; spin_unlock_irqrestore(&crc->lock, flags); if (!was_overflow) DRM_ERROR("Overflow of CRC buffer, userspace reads too slow.\n"); return -ENOBUFS; } entry = &crc->entries[head]; entry->frame = frame; entry->has_frame_counter = has_frame; memcpy(&entry->crcs, crcs, sizeof(*crcs) * crc->values_cnt); head = (head + 1) & (DRM_CRC_ENTRIES_NR - 1); crc->head = head; spin_unlock_irqrestore(&crc->lock, flags); wake_up_interruptible(&crc->wq); return 0; } EXPORT_SYMBOL_GPL(drm_crtc_add_crc_entry); |
49 95 9 49 96 98 34 66 17 41 208 108 214 801 722 253 274 274 274 6659 2 56 2 6 6 263 7 5 8 93 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Filesystem access notification for Linux * * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> */ #ifndef __LINUX_FSNOTIFY_BACKEND_H #define __LINUX_FSNOTIFY_BACKEND_H #ifdef __KERNEL__ #include <linux/idr.h> /* inotify uses this */ #include <linux/fs.h> /* struct inode */ #include <linux/list.h> #include <linux/path.h> /* struct path */ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/atomic.h> #include <linux/user_namespace.h> #include <linux/refcount.h> #include <linux/mempool.h> #include <linux/sched/mm.h> /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily * convert between them. dnotify only needs conversion at watch creation * so no perf loss there. fanotify isn't defined yet, so it can use the * wholes if it needs more events. */ #define FS_ACCESS 0x00000001 /* File was accessed */ #define FS_MODIFY 0x00000002 /* File was modified */ #define FS_ATTRIB 0x00000004 /* Metadata changed */ #define FS_CLOSE_WRITE 0x00000008 /* Writable file was closed */ #define FS_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ #define FS_OPEN 0x00000020 /* File was opened */ #define FS_MOVED_FROM 0x00000040 /* File was moved from X */ #define FS_MOVED_TO 0x00000080 /* File was moved to Y */ #define FS_CREATE 0x00000100 /* Subfile was created */ #define FS_DELETE 0x00000200 /* Subfile was deleted */ #define FS_DELETE_SELF 0x00000400 /* Self was deleted */ #define FS_MOVE_SELF 0x00000800 /* Self was moved */ #define FS_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FS_UNMOUNT 0x00002000 /* inode on umount fs */ #define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ #define FS_ERROR 0x00008000 /* Filesystem Error (fanotify) */ /* * FS_IN_IGNORED overloads FS_ERROR. It is only used internally by inotify * which does not support FS_ERROR. */ #define FS_IN_IGNORED 0x00008000 /* last inotify event here */ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. * Set on inode/sb/mount marks that care about parent/name info. */ #define FS_EVENT_ON_CHILD 0x08000000 #define FS_RENAME 0x10000000 /* File was renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) /* * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) /* * This is a list of all events that may get sent to a parent that is watching * with flag FS_EVENT_ON_CHILD based on fs event on a child of that directory. */ #define FS_EVENTS_POSS_ON_CHILD (ALL_FSNOTIFY_PERM_EVENTS | \ FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \ FS_OPEN | FS_OPEN_EXEC) /* * This is a list of all events that may get sent with the parent inode as the * @to_tell argument of fsnotify(). * It may include events that can be sent to an inode/sb/mount mark, but cannot * be sent to a parent watching children. */ #define FS_EVENTS_POSS_TO_PARENT (FS_EVENTS_POSS_ON_CHILD) /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ FS_ERROR) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT) #define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark; struct fsnotify_event_private_data; struct fsnotify_fname; struct fsnotify_iter_info; struct mem_cgroup; /* * Each group much define these ops. The fsnotify infrastructure will call * these operations for each relevant group. * * handle_event - main call for a group to handle an fs event * @group: group to notify * @mask: event type and flags * @data: object that event happened on * @data_type: type of object for fanotify_data_XXX() accessors * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to * @file_name: optional file name associated with event * @cookie: inotify rename cookie * @iter_info: array of marks from this group that are interested in the event * * handle_inode_event - simple variant of handle_event() for groups that only * have inode marks and don't have ignore mask * @mark: mark to notify * @mask: event type and flags * @inode: inode that event happened on * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to. * Either @inode or @dir must be non-NULL. * @file_name: optional file name associated with event * @cookie: inotify rename cookie * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group * MUST be holding a reference on each mark and that reference must be * dropped in this function. inotify uses this function to send * userspace messages that marks have been removed. */ struct fsnotify_ops { int (*handle_event)(struct fsnotify_group *group, u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event); /* called on final put+free to free memory */ void (*free_mark)(struct fsnotify_mark *mark); }; /* * all of the information about the original object we want to now send to * a group. If you want to carry more info from the accessing task to the * listener this structure is where you need to be adding fields. */ struct fsnotify_event { struct list_head list; }; /* * A group is a "thing" that wants to receive notification about filesystem * events. The mask holds the subset of event types this group cares about. * refcnt on a group is up to the implementor and at any moment if it goes 0 * everything will be cleaned up. */ struct fsnotify_group { const struct fsnotify_ops *ops; /* how this group handles things */ /* * How the refcnt is used is up to each group. When the refcnt hits 0 * fsnotify will clean up all of the resources associated with this group. * As an example, the dnotify group will always have a refcnt=1 and that * will never change. Inotify, on the other hand, has a group per * inotify_init() and the refcnt will hit 0 only when that fd has been * closed. */ refcount_t refcnt; /* things with interest in this group */ /* needed to send notification to userspace */ spinlock_t notification_lock; /* protect the notification_list */ struct list_head notification_list; /* list of event_holder this group needs to send to userspace */ wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ unsigned int q_len; /* events on the queue */ unsigned int max_events; /* maximum events allowed on the list */ /* * Valid fsnotify group priorities. Events are send in order from highest * priority to lowest priority. We default to the lowest priority. */ #define FS_PRIO_0 0 /* normal notifiers, no permissions */ #define FS_PRIO_1 1 /* fanotify content based access control */ #define FS_PRIO_2 2 /* fanotify pre-content access */ unsigned int priority; bool shutdown; /* group is being shut down, don't queue more events */ #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ #define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */ #define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */ int flags; unsigned int owner_flags; /* stored flags of mark_mutex owner */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ atomic_t user_waits; /* Number of tasks waiting for user * response */ struct list_head marks_list; /* all inode marks for this group */ struct fasync_struct *fsn_fa; /* async notification */ struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ struct mem_cgroup *memcg; /* memcg to charge allocations */ /* groups can define private fields here or use the void *private */ union { void *private; #ifdef CONFIG_INOTIFY_USER struct inotify_group_private_data { spinlock_t idr_lock; struct idr idr; struct ucounts *ucounts; } inotify_data; #endif #ifdef CONFIG_FANOTIFY struct fanotify_group_private_data { /* Hash table of events for merge */ struct hlist_head *merge_hash; /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; int flags; /* flags from fanotify_init() */ int f_flags; /* event_f_flags from fanotify_init() */ struct ucounts *ucounts; mempool_t error_events_pool; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; }; /* * These helpers are used to prevent deadlock when reclaiming inodes with * evictable marks of the same group that is allocating a new mark. */ static inline void fsnotify_group_lock(struct fsnotify_group *group) { mutex_lock(&group->mark_mutex); if (group->flags & FSNOTIFY_GROUP_NOFS) group->owner_flags = memalloc_nofs_save(); } static inline void fsnotify_group_unlock(struct fsnotify_group *group) { if (group->flags & FSNOTIFY_GROUP_NOFS) memalloc_nofs_restore(group->owner_flags); mutex_unlock(&group->mark_mutex); } static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) { WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); if (group->flags & FSNOTIFY_GROUP_NOFS) WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); } /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, FSNOTIFY_EVENT_DENTRY, FSNOTIFY_EVENT_ERROR, }; struct fs_error_report { int error; struct inode *inode; struct super_block *sb; }; static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_INODE: return (struct inode *)data; case FSNOTIFY_EVENT_DENTRY: return d_inode(data); case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *)data)->inode; default: return NULL; } } static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_DENTRY: /* Non const is needed for dget() */ return (struct dentry *)data; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry; default: return NULL; } } static inline const struct path *fsnotify_data_path(const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_PATH: return data; default: return NULL; } } static inline struct super_block *fsnotify_data_sb(const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_INODE: return ((struct inode *)data)->i_sb; case FSNOTIFY_EVENT_DENTRY: return ((struct dentry *)data)->d_sb; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry->d_sb; case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *) data)->sb; default: return NULL; } } static inline struct fs_error_report *fsnotify_data_error_report( const void *data, int data_type) { switch (data_type) { case FSNOTIFY_EVENT_ERROR: return (struct fs_error_report *) data; default: return NULL; } } /* * Index to merged marks iterator array that correlates to a type of watch. * The type of watched object can be deduced from the iterator type, but not * the other way around, because an event can match different watched objects * of the same object type. * For example, both parent and child are watching an object of type inode. */ enum fsnotify_iter_type { FSNOTIFY_ITER_TYPE_INODE, FSNOTIFY_ITER_TYPE_VFSMOUNT, FSNOTIFY_ITER_TYPE_SB, FSNOTIFY_ITER_TYPE_PARENT, FSNOTIFY_ITER_TYPE_INODE2, FSNOTIFY_ITER_TYPE_COUNT }; /* The type of object that a mark is attached to */ enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_ANY = -1, FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; static inline bool fsnotify_valid_obj_type(unsigned int obj_type) { return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT); } struct fsnotify_iter_info { struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT]; struct fsnotify_group *current_group; unsigned int report_mask; int srcu_idx; }; static inline bool fsnotify_iter_should_report_type( struct fsnotify_iter_info *iter_info, int iter_type) { return (iter_info->report_mask & (1U << iter_type)); } static inline void fsnotify_iter_set_report_type( struct fsnotify_iter_info *iter_info, int iter_type) { iter_info->report_mask |= (1U << iter_type); } static inline struct fsnotify_mark *fsnotify_iter_mark( struct fsnotify_iter_info *iter_info, int iter_type) { if (fsnotify_iter_should_report_type(iter_info, iter_type)) return iter_info->marks[iter_type]; return NULL; } static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type, struct fsnotify_mark **markp) { while (type < FSNOTIFY_ITER_TYPE_COUNT) { *markp = fsnotify_iter_mark(iter, type); if (*markp) break; type++; } return type; } #define FSNOTIFY_ITER_FUNCS(name, NAME) \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \ } FSNOTIFY_ITER_FUNCS(inode, INODE) FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) #define fsnotify_foreach_iter_type(type) \ for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++) #define fsnotify_foreach_iter_mark_type(iter, mark, type) \ for (type = 0; \ type = fsnotify_iter_step(iter, type, &mark), \ type < FSNOTIFY_ITER_TYPE_COUNT; \ type++) /* * fsnotify_connp_t is what we embed in objects which connector can be attached * to. fsnotify_connp_t * is how we refer from connector back to object. */ struct fsnotify_mark_connector; typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; /* * Inode/vfsmount/sb point to this structure which tracks all marks attached to * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this * structure. We destroy this structure when there are no more marks attached * to it. The structure is protected by fsnotify_mark_srcu. */ struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ union { /* Object pointer [lock] */ fsnotify_connp_t *obj; /* Used listing heads to free after srcu period expires */ struct fsnotify_mark_connector *destroy_next; }; struct hlist_head list; }; /* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events * of a type matching mask or only interested in those events. * * These are flushed when an inode is evicted from core and may be flushed * when the inode is modified (as seen by fsnotify_access). Some fsnotify * users (such as dnotify) will flush these when the open fd is closed and not * at inode eviction or modification. * * Text in brackets is showing the lock(s) protecting modifications of a * particular entry. obj_lock means either inode->i_lock or * mnt->mnt_root->d_lock depending on the mark type. */ struct fsnotify_mark { /* Mask this mark is for [mark->lock, group->mark_mutex] */ __u32 mask; /* We hold one for presence in g_list. Also one ref for each 'thing' * in kernel that found and may be using this mark. */ refcount_t refcnt; /* Group this mark is for. Set on mark creation, stable until last ref * is dropped */ struct fsnotify_group *group; /* List of marks by group->marks_list. Also reused for queueing * mark into destroy_list when it's waiting for the end of SRCU period * before it can be freed. [group->mark_mutex] */ struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; /* List of marks for inode / vfsmount [connector->lock, mark ref] */ struct hlist_node obj_list; /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; /* Events types and flags to ignore [mark->lock, group->mark_mutex] */ __u32 ignore_mask; /* General fsnotify mark flags */ #define FSNOTIFY_MARK_FLAG_ALIVE 0x0001 #define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002 /* inotify mark flags */ #define FSNOTIFY_MARK_FLAG_EXCL_UNLINK 0x0010 #define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020 /* fanotify mark flags */ #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 #define FSNOTIFY_MARK_FLAG_HAS_FSID 0x0800 #define FSNOTIFY_MARK_FLAG_WEAK_FSID 0x1000 unsigned int flags; /* flags [mark->lock] */ }; #ifdef CONFIG_FSNOTIFY /* called from the vfs helpers */ /* main fsnotify call to send events */ extern int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, struct inode *inode, u32 cookie); extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); extern u32 fsnotify_get_cookie(void); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) { /* FS_EVENT_ON_CHILD is set on marks that want parent/name info */ if (!(mask & FS_EVENT_ON_CHILD)) return 0; /* * This object might be watched by a mark that cares about parent/name * info, does it care about the specific set of events that can be * reported with parent/name info? */ return mask & FS_EVENTS_POSS_TO_PARENT; } static inline int fsnotify_inode_watches_children(struct inode *inode) { /* FS_EVENT_ON_CHILD is set if the inode may care */ if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD)) return 0; /* this inode might care about child events, does it care about the * specific set of events that can happen on a child? */ return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD; } /* * Update the dentry with a flag indicating the interest of its parent to receive * filesystem events when those events happens to this dentry->d_inode. */ static inline void fsnotify_update_flags(struct dentry *dentry) { assert_spin_locked(&dentry->d_lock); /* * Serialisation of setting PARENT_WATCHED on the dentries is provided * by d_lock. If inotify_inode_watched changes after we have taken * d_lock, the following __fsnotify_update_child_dentry_flags call will * find our entry, so it will spin until we complete here, and update * us with the new state. */ if (fsnotify_inode_watches_children(dentry->d_parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; } /* called from fsnotify listeners, such as fanotify or dnotify */ /* create a new group */ extern struct fsnotify_group *fsnotify_alloc_group( const struct fsnotify_ops *ops, int flags); /* get reference to a group */ extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); /* group destruction begins, stop queuing new events */ extern void fsnotify_group_stop_queueing(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); /* fasync handler function */ extern int fsnotify_fasync(int fd, struct file *file, int on); /* Free event from memory */ extern void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event); /* attach the event to the group notification queue */ extern int fsnotify_insert_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct fsnotify_group *, struct fsnotify_event *), void (*insert)(struct fsnotify_group *, struct fsnotify_event *)); static inline int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct fsnotify_group *, struct fsnotify_event *)) { return fsnotify_insert_event(group, event, merge, NULL); } /* Queue overflow event to a notification group */ static inline void fsnotify_queue_overflow(struct fsnotify_group *group) { fsnotify_add_event(group, group->overflow_event, NULL); } static inline bool fsnotify_is_overflow_event(u32 mask) { return mask & FS_Q_OVERFLOW; } static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) { assert_spin_locked(&group->notification_lock); return list_empty(&group->notification_list); } extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); /* return AND dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group); /* Remove event queued in the notification list */ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, struct fsnotify_event *event); /* functions used to manipulate the marks attached to inodes */ /* * Canonical "ignore mask" including event flags. * * Note the subtle semantic difference from the legacy ->ignored_mask. * ->ignored_mask traditionally only meant which events should be ignored, * while ->ignore_mask also includes flags regarding the type of objects on * which events should be ignored. */ static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark) { __u32 ignore_mask = mark->ignore_mask; /* The event flags in ignore mask take effect */ if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) return ignore_mask; /* * Legacy behavior: * - Always ignore events on dir * - Ignore events on child if parent is watching children */ ignore_mask |= FS_ISDIR; ignore_mask &= ~FS_EVENT_ON_CHILD; ignore_mask |= mark->mask & FS_EVENT_ON_CHILD; return ignore_mask; } /* Legacy ignored_mask - only event types to ignore */ static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark) { return mark->ignore_mask & ALL_FSNOTIFY_EVENTS; } /* * Check if mask (or ignore mask) should be applied depending if victim is a * directory and whether it is reported to a watching parent. */ static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir, int iter_type) { /* Should mask be applied to a directory? */ if (is_dir && !(mask & FS_ISDIR)) return false; /* Should mask be applied to a child? */ if (iter_type == FSNOTIFY_ITER_TYPE_PARENT && !(mask & FS_EVENT_ON_CHILD)) return false; return true; } /* * Effective ignore mask taking into account if event victim is a * directory and whether it is reported to a watching parent. */ static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark, bool is_dir, int iter_type) { __u32 ignore_mask = fsnotify_ignored_events(mark); if (!ignore_mask) return 0; /* For non-dir and non-child, no need to consult the event flags */ if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT) return ignore_mask; ignore_mask = fsnotify_ignore_mask(mark); if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type)) return 0; return ignore_mask & ALL_FSNOTIFY_EVENTS; } /* Get mask for calculating object interest taking ignore mask into account */ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) { __u32 mask = mark->mask; if (!fsnotify_ignored_events(mark)) return mask; /* Interest in FS_MODIFY may be needed for clearing ignore mask */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mask |= FS_MODIFY; /* * If mark is interested in ignoring events on children, the object must * show interest in those events for fsnotify_parent() to notice it. */ return mask | mark->ignore_mask; } /* Get mask of events for a list of marks */ extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn); /* Calculate mask of events for a list of marks */ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); extern void fsnotify_init_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, struct fsnotify_group *group); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, int add_flags); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, int add_flags); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, FSNOTIFY_OBJ_TYPE_INODE, add_flags); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, FSNOTIFY_OBJ_TYPE_INODE, add_flags); } /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); /* detach mark from inode / mount list, group list, drop inode reference */ extern void fsnotify_detach_mark(struct fsnotify_mark *mark); /* free mark */ extern void fsnotify_free_mark(struct fsnotify_mark *mark); /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); /* run all the marks in a group, and clear all of the vfsmount marks */ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); } /* run all the marks in a group, and clear all of the inode marks */ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); } /* run all the marks in a group, and clear all of the sn marks */ static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); } extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); static inline void fsnotify_init_event(struct fsnotify_event *event) { INIT_LIST_HEAD(&event->list); } #else static inline int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, struct inode *inode, u32 cookie) { return 0; } static inline int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { return 0; } static inline void __fsnotify_inode_delete(struct inode *inode) {} static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) {} static inline void fsnotify_sb_delete(struct super_block *sb) {} static inline void fsnotify_update_flags(struct dentry *dentry) {} static inline u32 fsnotify_get_cookie(void) { return 0; } static inline void fsnotify_unmount_inodes(struct super_block *sb) {} #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ #endif /* __LINUX_FSNOTIFY_BACKEND_H */ |
41295 859 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H #include <asm/processor.h> #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #include <asm/asm.h> #include <linux/bitops.h> #include <asm/alternative.h> enum cpuid_leafs { CPUID_1_EDX = 0, CPUID_8000_0001_EDX, CPUID_8086_0001_EDX, CPUID_LNX_1, CPUID_1_ECX, CPUID_C000_0001_EDX, CPUID_8000_0001_ECX, CPUID_LNX_2, CPUID_LNX_3, CPUID_7_0_EBX, CPUID_D_1_EAX, CPUID_LNX_4, CPUID_7_1_EAX, CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, CPUID_LNX_5, NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" #define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31) extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; #define X86_CAP_FMT "%s" #define x86_cap_flag(flag) x86_cap_flags[flag] /* * In order to save room, we index into this array by doing * X86_BUG_<name> - NCAPINTS*32. */ extern const char * const x86_bug_flags[NBUGINTS*32]; #define test_cpu_cap(c, bit) \ arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) /* * There are 32 bits/features in each mask word. The high bits * (selected with (bit>>5) give us the word number and the low 5 * bits give us the bit/feature number inside the word. * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can * see if it is set in the mask word. */ #define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) /* * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all * header macros which use NCAPINTS need to be changed. The duplicated macro * use causes the compiler to issue errors for all headers so that all usage * sites can be corrected. */ #define REQUIRED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) #define this_cpu_has(bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ x86_this_cpu_test_bit(bit, cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel * infrastructure to be used. It may *not* directly test the CPU * itself. Use the cpu_has() family if you want true runtime * testing of CPU features, like in hypervisor code where you are * supporting a possible guest feature where host support for it * is not relevant. */ #define cpu_feature_enabled(bit) \ (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_cap(bit) do { \ \ if (!boot_cpu_has(bit)) \ WARN_ON(alternatives_patched); \ \ set_cpu_cap(&boot_cpu_data, bit); \ set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) /* * Static testing of CPU features. Used the same as boot_cpu_has(). It * statically patches the target code for additional performance. Use * static_cpu_has() only in fast paths, where every cycle counts. Which * means that the boot_cpu_has() variant is already fast enough for the * majority of cases and you should stick to using it as it is generally * only two instructions: a RIP-relative MOV and a TEST. * * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know * that this is only used on a fallback path and will sometimes cause * it to manifest the address of boot_cpu_data in a register, fouling * the mainline (post-initialization) code. */ static __always_inline bool _static_cpu_has(u16 bit) { asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" " testb %[bitnum], %a[cap_byte]\n" " jnz %l[t_yes]\n" " jmp %l[t_no]\n" ".popsection\n" : : [feature] "i" (bit), [bitnum] "i" (1 << (bit & 7)), [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3]) : : t_yes, t_no); t_yes: return true; t_no: return false; } #define static_cpu_has(bit) \ ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) #define static_cpu_has_bug(bit) static_cpu_has((bit)) #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) #define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit)) #define MAX_CPU_FEATURES (NCAPINTS * 32) #define cpu_have_feature boot_cpu_has #define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ boot_cpu_data.x86_model #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ #endif /* _ASM_X86_CPUFEATURE_H */ |
4 5 1 5 4 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hpfs/map.c * * Mikulas Patocka (mikulas@artax.karlin.mff.cuni.cz), 1998-1999 * * mapping structures to memory with some minimal checks */ #include "hpfs_fn.h" __le32 *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) { return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); } __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; __le32 *ret; unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) { hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); return NULL; } ret = hpfs_map_4sectors(s, sec, qbh, 4); if (ret) hpfs_prefetch_bitmap(s, bmp_block + 1); return ret; } void hpfs_prefetch_bitmap(struct super_block *s, unsigned bmp_block) { unsigned to_prefetch, next_prefetch; unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; if (unlikely(bmp_block >= n_bands)) return; to_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); if (unlikely(bmp_block + 1 >= n_bands)) next_prefetch = 0; else next_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block + 1]); hpfs_prefetch_sectors(s, to_prefetch, 4 + 4 * (to_prefetch + 4 == next_prefetch)); } /* * Load first code page into kernel memory, return pointer to 256-byte array, * first 128 bytes are uppercasing table for chars 128-255, next 128 bytes are * lowercasing table */ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) { struct buffer_head *bh; secno cpds; unsigned cpi; unsigned char *ptr; unsigned char *cp_table; int i; struct code_page_data *cpd; struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0); if (!cp) return NULL; if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) { pr_err("Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic)); brelse(bh); return NULL; } if (!le32_to_cpu(cp->n_code_pages)) { pr_err("n_code_pages == 0\n"); brelse(bh); return NULL; } cpds = le32_to_cpu(cp->array[0].code_page_data); cpi = le16_to_cpu(cp->array[0].index); brelse(bh); if (cpi >= 3) { pr_err("Code page index out of array\n"); return NULL; } if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL; if (le16_to_cpu(cpd->offs[cpi]) > 0x178) { pr_err("Code page index out of sector\n"); brelse(bh); return NULL; } ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6; if (!(cp_table = kmalloc(256, GFP_KERNEL))) { pr_err("out of memory for code page table\n"); brelse(bh); return NULL; } memcpy(cp_table, ptr, 128); brelse(bh); /* Try to build lowercasing table from uppercasing one */ for (i=128; i<256; i++) cp_table[i]=i; for (i=128; i<256; i++) if (cp_table[i-128]!=i && cp_table[i-128]>=128) cp_table[cp_table[i-128]] = i; return cp_table; } __le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) { struct buffer_head *bh; int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; int i; __le32 *b; if (!(b = kmalloc_array(n, 512, GFP_KERNEL))) { pr_err("can't allocate memory for bitmap directory\n"); return NULL; } for (i=0;i<n;i++) { __le32 *d = hpfs_map_sector(s, bmp+i, &bh, n - i - 1); if (!d) { kfree(b); return NULL; } memcpy((char *)b + 512 * i, d, 512); brelse(bh); } return b; } void hpfs_load_hotfix_map(struct super_block *s, struct hpfs_spare_block *spareblock) { struct quad_buffer_head qbh; __le32 *directory; u32 n_hotfixes, n_used_hotfixes; unsigned i; n_hotfixes = le32_to_cpu(spareblock->n_spares); n_used_hotfixes = le32_to_cpu(spareblock->n_spares_used); if (n_hotfixes > 256 || n_used_hotfixes > n_hotfixes) { hpfs_error(s, "invalid number of hotfixes: %u, used: %u", n_hotfixes, n_used_hotfixes); return; } if (!(directory = hpfs_map_4sectors(s, le32_to_cpu(spareblock->hotfix_map), &qbh, 0))) { hpfs_error(s, "can't load hotfix map"); return; } for (i = 0; i < n_used_hotfixes; i++) { hpfs_sb(s)->hotfix_from[i] = le32_to_cpu(directory[i]); hpfs_sb(s)->hotfix_to[i] = le32_to_cpu(directory[n_hotfixes + i]); } hpfs_sb(s)->n_hotfixes = n_used_hotfixes; hpfs_brelse4(&qbh); } /* * Load fnode to memory */ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_head **bhp) { struct fnode *fnode; if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ino, 1, "fnode")) { return NULL; } if ((fnode = hpfs_map_sector(s, ino, bhp, FNODE_RD_AHEAD))) { if (hpfs_sb(s)->sb_chk) { struct extended_attribute *ea; struct extended_attribute *ea_end; if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) { hpfs_error(s, "bad magic on fnode %08lx", (unsigned long)ino); goto bail; } if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != (bp_internal(&fnode->btree) ? 12 : 8)) { hpfs_error(s, "bad number of nodes in fnode %08lx", (unsigned long)ino); goto bail; } if (le16_to_cpu(fnode->btree.first_free) != 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", (unsigned long)ino); goto bail; } } if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) { hpfs_error(s, "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x", (unsigned long)ino, le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); goto bail; } ea = fnode_ea(fnode); ea_end = fnode_end_ea(fnode); while (ea != ea_end) { if (ea > ea_end) { hpfs_error(s, "bad EA in fnode %08lx", (unsigned long)ino); goto bail; } ea = next_ea(ea); } } } return fnode; bail: brelse(*bhp); return NULL; } struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buffer_head **bhp) { struct anode *anode; if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL; if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD))) if (hpfs_sb(s)->sb_chk) { if (le32_to_cpu(anode->magic) != ANODE_MAGIC) { hpfs_error(s, "bad magic on anode %08x", ano); goto bail; } if (le32_to_cpu(anode->self) != ano) { hpfs_error(s, "self pointer invalid on anode %08x", ano); goto bail; } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != (bp_internal(&anode->btree) ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } if (le16_to_cpu(anode->btree.first_free) != 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; } } return anode; bail: brelse(*bhp); return NULL; } /* * Load dnode to memory and do some checks */ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, struct quad_buffer_head *qbh) { struct dnode *dnode; if (hpfs_sb(s)->sb_chk) { if (hpfs_chk_sectors(s, secno, 4, "dnode")) return NULL; if (secno & 3) { hpfs_error(s, "dnode %08x not byte-aligned", secno); return NULL; } } if ((dnode = hpfs_map_4sectors(s, secno, qbh, DNODE_RD_AHEAD))) if (hpfs_sb(s)->sb_chk) { unsigned p, pp = 0; unsigned char *d = (unsigned char *)dnode; int b = 0; if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) { hpfs_error(s, "bad magic on dnode %08x", secno); goto bail; } if (le32_to_cpu(dnode->self) != secno) hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self)); /* Check dirents - bad dirents would cause infinite loops or shooting to memory */ if (le32_to_cpu(dnode->first_free) > 2048) { hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free)); goto bail; } for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) { struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p); if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) { hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) { if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & SB_RDONLY) goto ok; hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } ok: if (hpfs_sb(s)->sb_chk >= 2) b |= 1 << de->down; if (de->down) if (de_down_pointer(de) < 0x10) { hpfs_error(s, "bad down pointer in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } pp = p; } if (p != le32_to_cpu(dnode->first_free)) { hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno); goto bail; } if (d[pp + 30] != 1 || d[pp + 31] != 255) { hpfs_error(s, "dnode %08x does not end with \\377 entry", secno); goto bail; } if (b == 3) pr_err("unbalanced dnode tree, dnode %08x; see hpfs.txt 4 more info\n", secno); } return dnode; bail: hpfs_brelse4(qbh); return NULL; } dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino) { struct buffer_head *bh; struct fnode *fnode; dnode_secno dno; fnode = hpfs_map_fnode(s, ino, &bh); if (!fnode) return 0; dno = le32_to_cpu(fnode->u.external[0].disk_secno); brelse(bh); return dno; } |
3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 | // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "btree_cache.h" #include "btree_iter.h" #include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update.h" #include "errcode.h" #include "error.h" #include "journal.h" #include "journal_reclaim.h" #include "trace.h" #include <linux/sched/mm.h> static inline bool btree_uses_pcpu_readers(enum btree_id id) { return id == BTREE_ID_subvolumes; } static struct kmem_cache *bch2_key_cache; static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { const struct bkey_cached *ck = obj; const struct bkey_cached_key *key = arg->key; return ck->key.btree_id != key->btree_id || !bpos_eq(ck->key.pos, key->pos); } static const struct rhashtable_params bch2_btree_key_cache_params = { .head_offset = offsetof(struct bkey_cached, hash), .key_offset = offsetof(struct bkey_cached, key), .key_len = sizeof(struct bkey_cached_key), .obj_cmpfn = bch2_btree_key_cache_cmp_fn, }; __flatten inline struct bkey_cached * bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos) { struct bkey_cached_key key = { .btree_id = btree_id, .pos = pos, }; return rhashtable_lookup_fast(&c->btree_key_cache.table, &key, bch2_btree_key_cache_params); } static bool bkey_cached_lock_for_evict(struct bkey_cached *ck) { if (!six_trylock_intent(&ck->c.lock)) return false; if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { six_unlock_intent(&ck->c.lock); return false; } if (!six_trylock_write(&ck->c.lock)) { six_unlock_intent(&ck->c.lock); return false; } return true; } static void bkey_cached_evict(struct btree_key_cache *c, struct bkey_cached *ck) { BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash, bch2_btree_key_cache_params)); memset(&ck->key, ~0, sizeof(ck->key)); atomic_long_dec(&c->nr_keys); } static void bkey_cached_free(struct btree_key_cache *bc, struct bkey_cached *ck) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); ck->btree_trans_barrier_seq = start_poll_synchronize_srcu(&c->btree_trans_barrier); if (ck->c.lock.readers) { list_move_tail(&ck->list, &bc->freed_pcpu); bc->nr_freed_pcpu++; } else { list_move_tail(&ck->list, &bc->freed_nonpcpu); bc->nr_freed_nonpcpu++; } atomic_long_inc(&bc->nr_freed); kfree(ck->k); ck->k = NULL; ck->u64s = 0; six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); } #ifdef __KERNEL__ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc, struct bkey_cached *ck) { struct bkey_cached *pos; bc->nr_freed_nonpcpu++; list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) { if (ULONG_CMP_GE(ck->btree_trans_barrier_seq, pos->btree_trans_barrier_seq)) { list_move(&ck->list, &pos->list); return; } } list_move(&ck->list, &bc->freed_nonpcpu); } #endif static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, struct bkey_cached *ck) { BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); if (!ck->c.lock.readers) { #ifdef __KERNEL__ struct btree_key_cache_freelist *f; bool freed = false; preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); if (f->nr < ARRAY_SIZE(f->objs)) { f->objs[f->nr++] = ck; freed = true; } preempt_enable(); if (!freed) { mutex_lock(&bc->lock); preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); while (f->nr > ARRAY_SIZE(f->objs) / 2) { struct bkey_cached *ck2 = f->objs[--f->nr]; __bkey_cached_move_to_freelist_ordered(bc, ck2); } preempt_enable(); __bkey_cached_move_to_freelist_ordered(bc, ck); mutex_unlock(&bc->lock); } #else mutex_lock(&bc->lock); list_move_tail(&ck->list, &bc->freed_nonpcpu); bc->nr_freed_nonpcpu++; mutex_unlock(&bc->lock); #endif } else { mutex_lock(&bc->lock); list_move_tail(&ck->list, &bc->freed_pcpu); bc->nr_freed_pcpu++; mutex_unlock(&bc->lock); } } static void bkey_cached_free_fast(struct btree_key_cache *bc, struct bkey_cached *ck) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); ck->btree_trans_barrier_seq = start_poll_synchronize_srcu(&c->btree_trans_barrier); list_del_init(&ck->list); atomic_long_inc(&bc->nr_freed); kfree(ck->k); ck->k = NULL; ck->u64s = 0; bkey_cached_move_to_freelist(bc, ck); six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); } static struct bkey_cached * bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, bool *was_new) { struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck = NULL; bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id); int ret; if (!pcpu_readers) { #ifdef __KERNEL__ struct btree_key_cache_freelist *f; preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); if (f->nr) ck = f->objs[--f->nr]; preempt_enable(); if (!ck) { mutex_lock(&bc->lock); preempt_disable(); f = this_cpu_ptr(bc->pcpu_freed); while (!list_empty(&bc->freed_nonpcpu) && f->nr < ARRAY_SIZE(f->objs) / 2) { ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); list_del_init(&ck->list); bc->nr_freed_nonpcpu--; f->objs[f->nr++] = ck; } ck = f->nr ? f->objs[--f->nr] : NULL; preempt_enable(); mutex_unlock(&bc->lock); } #else mutex_lock(&bc->lock); if (!list_empty(&bc->freed_nonpcpu)) { ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); list_del_init(&ck->list); bc->nr_freed_nonpcpu--; } mutex_unlock(&bc->lock); #endif } else { mutex_lock(&bc->lock); if (!list_empty(&bc->freed_pcpu)) { ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list); list_del_init(&ck->list); bc->nr_freed_pcpu--; } mutex_unlock(&bc->lock); } if (ck) { ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_); if (unlikely(ret)) { bkey_cached_move_to_freelist(bc, ck); return ERR_PTR(ret); } path->l[0].b = (void *) ck; path->l[0].lock_seq = six_lock_seq(&ck->c.lock); mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); ret = bch2_btree_node_lock_write(trans, path, &ck->c); if (unlikely(ret)) { btree_node_unlock(trans, path, 0); bkey_cached_move_to_freelist(bc, ck); return ERR_PTR(ret); } return ck; } ck = allocate_dropping_locks(trans, ret, kmem_cache_zalloc(bch2_key_cache, _gfp)); if (ret) { kmem_cache_free(bch2_key_cache, ck); return ERR_PTR(ret); } if (!ck) return NULL; INIT_LIST_HEAD(&ck->list); bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0); ck->c.cached = true; BUG_ON(!six_trylock_intent(&ck->c.lock)); BUG_ON(!six_trylock_write(&ck->c.lock)); *was_new = true; return ck; } static struct bkey_cached * bkey_cached_reuse(struct btree_key_cache *c) { struct bucket_table *tbl; struct rhash_head *pos; struct bkey_cached *ck; unsigned i; mutex_lock(&c->lock); rcu_read_lock(); tbl = rht_dereference_rcu(c->table.tbl, &c->table); for (i = 0; i < tbl->size; i++) rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(c, ck); goto out; } } ck = NULL; out: rcu_read_unlock(); mutex_unlock(&c->lock); return ck; } static struct bkey_cached * btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck; bool was_new = false; ck = bkey_cached_alloc(trans, path, &was_new); if (IS_ERR(ck)) return ck; if (unlikely(!ck)) { ck = bkey_cached_reuse(bc); if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", bch2_btree_id_str(path->btree_id)); return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create); } mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); } ck->c.level = 0; ck->c.btree_id = path->btree_id; ck->key.btree_id = path->btree_id; ck->key.pos = path->pos; ck->valid = false; ck->flags = 1U << BKEY_CACHED_ACCESSED; if (unlikely(rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params))) { /* We raced with another fill: */ if (likely(was_new)) { six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); kfree(ck); } else { bkey_cached_free_fast(bc, ck); } mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); return NULL; } atomic_long_inc(&bc->nr_keys); six_unlock_write(&ck->c.lock); return ck; } static int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, struct bkey_cached *ck) { struct btree_iter iter; struct bkey_s_c k; unsigned new_u64s = 0; struct bkey_i *new_k = NULL; int ret; bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos, BTREE_ITER_KEY_CACHE_FILL| BTREE_ITER_CACHED_NOFILL); iter.flags &= ~BTREE_ITER_WITH_JOURNAL; k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; if (!bch2_btree_node_relock(trans, ck_path, 0)) { trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); goto err; } /* * bch2_varint_decode can read past the end of the buffer by at * most 7 bytes (it won't be used): */ new_u64s = k.k->u64s + 1; /* * Allocate some extra space so that the transaction commit path is less * likely to have to reallocate, since that requires a transaction * restart: */ new_u64s = min(256U, (new_u64s * 3) / 2); if (new_u64s > ck->u64s) { new_u64s = roundup_pow_of_two(new_u64s); new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); if (!new_k) { bch2_trans_unlock(trans); new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); if (!new_k) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(ck->key.btree_id), new_u64s); ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; goto err; } if (!bch2_btree_node_relock(trans, ck_path, 0)) { kfree(new_k); trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); goto err; } ret = bch2_trans_relock(trans); if (ret) { kfree(new_k); goto err; } } } ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c); if (ret) { kfree(new_k); goto err; } if (new_k) { kfree(ck->k); ck->u64s = new_u64s; ck->k = new_k; } bkey_reassemble(ck->k, k); ck->valid = true; bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); /* We're not likely to need this iterator again: */ set_btree_iter_dontneed(&iter); err: bch2_trans_iter_exit(trans, &iter); return ret; } static noinline int bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, unsigned flags) { struct bch_fs *c = trans->c; struct bkey_cached *ck; int ret = 0; BUG_ON(path->level); path->l[1].b = NULL; if (bch2_btree_node_relock_notrace(trans, path, 0)) { ck = (void *) path->l[0].b; goto fill; } retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { ck = btree_key_cache_create(trans, path); ret = PTR_ERR_OR_ZERO(ck); if (ret) goto err; if (!ck) goto retry; mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); path->locks_want = 1; } else { enum six_lock_type lock_want = __btree_lock_want(path, 0); ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto err; BUG_ON(ret); if (ck->key.btree_id != path->btree_id || !bpos_eq(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } mark_btree_node_locked(trans, path, 0, (enum btree_node_locked_type) lock_want); } path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: path->uptodate = BTREE_ITER_UPTODATE; if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { /* * Using the underscore version because we haven't set * path->uptodate yet: */ if (!path->locks_want && !__bch2_btree_path_upgrade(trans, path, 1, NULL)) { trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade); goto err; } ret = btree_key_cache_fill(trans, path, ck); if (ret) goto err; ret = bch2_btree_path_relock(trans, path, _THIS_IP_); if (ret) goto err; path->uptodate = BTREE_ITER_UPTODATE; } if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); BUG_ON(path->uptodate); return ret; err: path->uptodate = BTREE_ITER_NEED_TRAVERSE; if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(ret); } return ret; } int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, unsigned flags) { struct bch_fs *c = trans->c; struct bkey_cached *ck; int ret = 0; EBUG_ON(path->level); path->l[1].b = NULL; if (bch2_btree_node_relock_notrace(trans, path, 0)) { ck = (void *) path->l[0].b; goto fill; } retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); } else { enum six_lock_type lock_want = __btree_lock_want(path, 0); ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_); EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)); if (ret) return ret; if (ck->key.btree_id != path->btree_id || !bpos_eq(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } mark_btree_node_locked(trans, path, 0, (enum btree_node_locked_type) lock_want); } path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: if (!ck->valid) return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); path->uptodate = BTREE_ITER_UPTODATE; EBUG_ON(!ck->valid); EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); return ret; } static int btree_key_cache_flush_pos(struct btree_trans *trans, struct bkey_cached_key key, u64 journal_seq, unsigned commit_flags, bool evict) { struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree_iter c_iter, b_iter; struct bkey_cached *ck = NULL; int ret; bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos, BTREE_ITER_SLOTS| BTREE_ITER_INTENT| BTREE_ITER_ALL_SNAPSHOTS); bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, BTREE_ITER_CACHED| BTREE_ITER_INTENT); b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE; ret = bch2_btree_iter_traverse(&c_iter); if (ret) goto out; ck = (void *) btree_iter_path(trans, &c_iter)->l[0].b; if (!ck) goto out; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { if (evict) goto evict; goto out; } BUG_ON(!ck->valid); if (journal_seq && ck->journal.seq != journal_seq) goto out; trans->journal_res.seq = ck->journal.seq; /* * If we're at the end of the journal, we really want to free up space * in the journal right away - we don't want to pin that old journal * sequence number with a new btree node write, we want to re-journal * the update */ if (ck->journal.seq == journal_last_seq(j)) commit_flags |= BCH_WATERMARK_reclaim; if (ck->journal.seq != journal_last_seq(j) || !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) commit_flags |= BCH_TRANS_COMMIT_no_journal_res; ret = bch2_btree_iter_traverse(&b_iter) ?: bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_KEY_CACHE_RECLAIM| BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_enospc| commit_flags); bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && !bch2_journal_error(j), c, "flushing key cache: %s", bch2_err_str(ret)); if (ret) goto out; bch2_journal_pin_drop(j, &ck->journal); struct btree_path *path = btree_iter_path(trans, &c_iter); BUG_ON(!btree_node_locked(path, 0)); if (!evict) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_dec(&c->btree_key_cache.nr_dirty); } } else { struct btree_path *path2; unsigned i; evict: trans_for_each_path(trans, path2, i) if (path2 != path) __bch2_btree_path_unlock(trans, path2); bch2_btree_node_lock_write_nofail(trans, path, &ck->c); if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_dec(&c->btree_key_cache.nr_dirty); } mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); bkey_cached_evict(&c->btree_key_cache, ck); bkey_cached_free_fast(&c->btree_key_cache, ck); } out: bch2_trans_iter_exit(trans, &b_iter); bch2_trans_iter_exit(trans, &c_iter); return ret; } int bch2_btree_key_cache_journal_flush(struct journal *j, struct journal_entry_pin *pin, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; struct btree_trans *trans = bch2_trans_get(c); int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); int ret = 0; btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read); key = ck->key; if (ck->journal.seq != seq || !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { six_unlock_read(&ck->c.lock); goto unlock; } if (ck->seq != seq) { bch2_journal_pin_update(&c->journal, ck->seq, &ck->journal, bch2_btree_key_cache_journal_flush); six_unlock_read(&ck->c.lock); goto unlock; } six_unlock_read(&ck->c.lock); ret = lockrestart_do(trans, btree_key_cache_flush_pos(trans, key, seq, BCH_TRANS_COMMIT_journal_reclaim, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); bch2_trans_put(trans); return ret; } bool bch2_btree_insert_key_cached(struct btree_trans *trans, unsigned flags, struct btree_insert_entry *insert_entry) { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) (trans->paths + insert_entry->path)->l[0].b; struct bkey_i *insert = insert_entry->k; bool kick_reclaim = false; BUG_ON(insert->k.u64s > ck->u64s); bkey_copy(ck->k, insert); ck->valid = true; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); set_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_inc(&c->btree_key_cache.nr_dirty); if (bch2_nr_btree_keys_need_flush(c)) kick_reclaim = true; } /* * To minimize lock contention, we only add the journal pin here and * defer pin updates to the flush callback via ->seq. Be careful not to * update ->seq on nojournal commits because we don't want to update the * pin to a seq that doesn't include journal updates on disk. Otherwise * we risk losing the update after a crash. * * The only exception is if the pin is not active in the first place. We * have to add the pin because journal reclaim drives key cache * flushing. The flush callback will not proceed unless ->seq matches * the latest pin, so make sure it starts with a consistent value. */ if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) || !journal_pin_active(&ck->journal)) { ck->seq = trans->journal_res.seq; } bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &ck->journal, bch2_btree_key_cache_journal_flush); if (kick_reclaim) journal_reclaim_kick(&c->journal); return true; } void bch2_btree_key_cache_drop(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; BUG_ON(!ck->valid); /* * We just did an update to the btree, bypassing the key cache: the key * cache key is now stale and must be dropped, even if dirty: */ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_dec(&c->btree_key_cache.nr_dirty); bch2_journal_pin_drop(&c->journal, &ck->journal); } ck->valid = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct bch_fs *c = shrink->private_data; struct btree_key_cache *bc = &c->btree_key_cache; struct bucket_table *tbl; struct bkey_cached *ck, *t; size_t scanned = 0, freed = 0, nr = sc->nr_to_scan; unsigned start, flags; int srcu_idx; mutex_lock(&bc->lock); srcu_idx = srcu_read_lock(&c->btree_trans_barrier); flags = memalloc_nofs_save(); /* * Newest freed entries are at the end of the list - once we hit one * that's too new to be freed, we can bail out: */ list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) break; list_del(&ck->list); six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); freed++; bc->nr_freed_nonpcpu--; } list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) break; list_del(&ck->list); six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); freed++; bc->nr_freed_pcpu--; } rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; start = bc->shrink_iter; do { struct rhash_head *pos, *next; pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter)); while (!rht_is_a_nulls(pos)) { next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); ck = container_of(pos, struct bkey_cached, hash); if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { goto next; } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); goto next; } else if (bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(bc, ck); bkey_cached_free(bc, ck); } scanned++; if (scanned >= nr) break; next: pos = next; } bc->shrink_iter++; if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; } while (scanned < nr && bc->shrink_iter != start); rcu_read_unlock(); memalloc_nofs_restore(flags); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); mutex_unlock(&bc->lock); return freed; } static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct bch_fs *c = shrink->private_data; struct btree_key_cache *bc = &c->btree_key_cache; long nr = atomic_long_read(&bc->nr_keys) - atomic_long_read(&bc->nr_dirty); return max(0L, nr); } void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); struct bucket_table *tbl; struct bkey_cached *ck, *n; struct rhash_head *pos; LIST_HEAD(items); unsigned i; #ifdef __KERNEL__ int cpu; #endif shrinker_free(bc->shrink); mutex_lock(&bc->lock); /* * The loop is needed to guard against racing with rehash: */ while (atomic_long_read(&bc->nr_keys)) { rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (tbl) for (i = 0; i < tbl->size; i++) rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { bkey_cached_evict(bc, ck); list_add(&ck->list, &items); } rcu_read_unlock(); } #ifdef __KERNEL__ if (bc->pcpu_freed) { for_each_possible_cpu(cpu) { struct btree_key_cache_freelist *f = per_cpu_ptr(bc->pcpu_freed, cpu); for (i = 0; i < f->nr; i++) { ck = f->objs[i]; list_add(&ck->list, &items); } } } #endif BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu); BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu); list_splice(&bc->freed_pcpu, &items); list_splice(&bc->freed_nonpcpu, &items); mutex_unlock(&bc->lock); list_for_each_entry_safe(ck, n, &items, list) { cond_resched(); list_del(&ck->list); kfree(ck->k); six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); } if (atomic_long_read(&bc->nr_dirty) && !bch2_journal_error(&c->journal) && test_bit(BCH_FS_was_rw, &c->flags)) panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n", atomic_long_read(&bc->nr_dirty)); if (atomic_long_read(&bc->nr_keys)) panic("btree key cache shutdown error: nr_keys nonzero (%li)\n", atomic_long_read(&bc->nr_keys)); if (bc->table_init_done) rhashtable_destroy(&bc->table); free_percpu(bc->pcpu_freed); } void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) { mutex_init(&c->lock); INIT_LIST_HEAD(&c->freed_pcpu); INIT_LIST_HEAD(&c->freed_nonpcpu); } int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); struct shrinker *shrink; #ifdef __KERNEL__ bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); if (!bc->pcpu_freed) return -BCH_ERR_ENOMEM_fs_btree_cache_init; #endif if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params)) return -BCH_ERR_ENOMEM_fs_btree_cache_init; bc->table_init_done = true; shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name); if (!shrink) return -BCH_ERR_ENOMEM_fs_btree_cache_init; bc->shrink = shrink; shrink->seeks = 0; shrink->count_objects = bch2_btree_key_cache_count; shrink->scan_objects = bch2_btree_key_cache_scan; shrink->private_data = c; shrinker_register(shrink); return 0; } void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) { prt_printf(out, "nr_freed:\t%lu", atomic_long_read(&c->nr_freed)); prt_newline(out); prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys)); prt_newline(out); prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty)); prt_newline(out); } void bch2_btree_key_cache_exit(void) { kmem_cache_destroy(bch2_key_cache); } int __init bch2_btree_key_cache_init(void) { bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT); if (!bch2_key_cache) return -ENOMEM; return 0; } |
5 1 2 2 2 1 1 1 11 11 19 23 23 5 23 23 18 18 18 5 5 12 12 18 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 | /* * Copyright (c) 2016 Laurent Pinchart <laurent.pinchart@ideasonboard.com> * * DRM core format related functions * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include <linux/bug.h> #include <linux/ctype.h> #include <linux/export.h> #include <linux/kernel.h> #include <drm/drm_device.h> #include <drm/drm_fourcc.h> /** * drm_mode_legacy_fb_format - compute drm fourcc code from legacy description * @bpp: bits per pixels * @depth: bit depth per pixel * * Computes a drm fourcc pixel format code for the given @bpp/@depth values. * Useful in fbdev emulation code, since that deals in those values. */ uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth) { uint32_t fmt = DRM_FORMAT_INVALID; switch (bpp) { case 1: if (depth == 1) fmt = DRM_FORMAT_C1; break; case 2: if (depth == 2) fmt = DRM_FORMAT_C2; break; case 4: if (depth == 4) fmt = DRM_FORMAT_C4; break; case 8: if (depth == 8) fmt = DRM_FORMAT_C8; break; case 16: switch (depth) { case 15: fmt = DRM_FORMAT_XRGB1555; break; case 16: fmt = DRM_FORMAT_RGB565; break; default: break; } break; case 24: if (depth == 24) fmt = DRM_FORMAT_RGB888; break; case 32: switch (depth) { case 24: fmt = DRM_FORMAT_XRGB8888; break; case 30: fmt = DRM_FORMAT_XRGB2101010; break; case 32: fmt = DRM_FORMAT_ARGB8888; break; default: break; } break; default: break; } return fmt; } EXPORT_SYMBOL(drm_mode_legacy_fb_format); /** * drm_driver_legacy_fb_format - compute drm fourcc code from legacy description * @dev: DRM device * @bpp: bits per pixels * @depth: bit depth per pixel * * Computes a drm fourcc pixel format code for the given @bpp/@depth values. * Unlike drm_mode_legacy_fb_format() this looks at the drivers mode_config, * and depending on the &drm_mode_config.quirk_addfb_prefer_host_byte_order flag * it returns little endian byte order or host byte order framebuffer formats. */ uint32_t drm_driver_legacy_fb_format(struct drm_device *dev, uint32_t bpp, uint32_t depth) { uint32_t fmt = drm_mode_legacy_fb_format(bpp, depth); if (dev->mode_config.quirk_addfb_prefer_host_byte_order) { if (fmt == DRM_FORMAT_XRGB8888) fmt = DRM_FORMAT_HOST_XRGB8888; if (fmt == DRM_FORMAT_ARGB8888) fmt = DRM_FORMAT_HOST_ARGB8888; if (fmt == DRM_FORMAT_RGB565) fmt = DRM_FORMAT_HOST_RGB565; if (fmt == DRM_FORMAT_XRGB1555) fmt = DRM_FORMAT_HOST_XRGB1555; } if (dev->mode_config.quirk_addfb_prefer_xbgr_30bpp && fmt == DRM_FORMAT_XRGB2101010) fmt = DRM_FORMAT_XBGR2101010; return fmt; } EXPORT_SYMBOL(drm_driver_legacy_fb_format); /* * Internal function to query information for a given format. See * drm_format_info() for the public API. */ const struct drm_format_info *__drm_format_info(u32 format) { static const struct drm_format_info formats[] = { { .format = DRM_FORMAT_C1, .depth = 1, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 8, }, .block_h = { 1, }, .hsub = 1, .vsub = 1, .is_color_indexed = true }, { .format = DRM_FORMAT_C2, .depth = 2, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 4, }, .block_h = { 1, }, .hsub = 1, .vsub = 1, .is_color_indexed = true }, { .format = DRM_FORMAT_C4, .depth = 4, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 2, }, .block_h = { 1, }, .hsub = 1, .vsub = 1, .is_color_indexed = true }, { .format = DRM_FORMAT_C8, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1, .is_color_indexed = true }, { .format = DRM_FORMAT_D1, .depth = 1, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 8, }, .block_h = { 1, }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_D2, .depth = 2, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 4, }, .block_h = { 1, }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_D4, .depth = 4, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 2, }, .block_h = { 1, }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_D8, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_R1, .depth = 1, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 8, }, .block_h = { 1, }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_R2, .depth = 2, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 4, }, .block_h = { 1, }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_R4, .depth = 4, .num_planes = 1, .char_per_block = { 1, }, .block_w = { 2, }, .block_h = { 1, }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_R8, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_R10, .depth = 10, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_R12, .depth = 12, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGB332, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGR233, .depth = 8, .num_planes = 1, .cpp = { 1, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XRGB4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XBGR4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGBX4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGRX4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_ARGB4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ABGR4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGBA4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGRA4444, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_XRGB1555, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XBGR1555, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGBX5551, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGRX5551, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_ARGB1555, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ABGR1555, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGBA5551, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGRA5551, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGR565, .depth = 16, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, #ifdef __BIG_ENDIAN { .format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN, .depth = 16, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 }, #endif { .format = DRM_FORMAT_RGB888, .depth = 24, .num_planes = 1, .cpp = { 3, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGR888, .depth = 24, .num_planes = 1, .cpp = { 3, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGBX8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGRX8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGB565_A8, .depth = 24, .num_planes = 2, .cpp = { 2, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGR565_A8, .depth = 24, .num_planes = 2, .cpp = { 2, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_RGBX1010102, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_BGRX1010102, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGBA1010102, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGRA1010102, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGBA8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_XRGB16161616F, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XBGR16161616F, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_ARGB16161616F, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ABGR16161616F, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_AXBXGXRX106106106106, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_XRGB16161616, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_XBGR16161616, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1 }, { .format = DRM_FORMAT_ARGB16161616, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ABGR16161616, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGB888_A8, .depth = 32, .num_planes = 2, .cpp = { 3, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGR888_A8, .depth = 32, .num_planes = 2, .cpp = { 3, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_XRGB8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_XBGR8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGBX8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGRX8888_A8, .depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_YUV410, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 4, .vsub = 4, .is_yuv = true }, { .format = DRM_FORMAT_YVU410, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 4, .vsub = 4, .is_yuv = true }, { .format = DRM_FORMAT_YUV411, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 4, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YVU411, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 4, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YUV420, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_YVU420, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_YUV422, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YVU422, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YUV444, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YVU444, .depth = 0, .num_planes = 3, .cpp = { 1, 1, 1 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_NV12, .depth = 0, .num_planes = 2, .cpp = { 1, 2, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_NV21, .depth = 0, .num_planes = 2, .cpp = { 1, 2, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_NV16, .depth = 0, .num_planes = 2, .cpp = { 1, 2, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_NV61, .depth = 0, .num_planes = 2, .cpp = { 1, 2, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_NV24, .depth = 0, .num_planes = 2, .cpp = { 1, 2, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_NV42, .depth = 0, .num_planes = 2, .cpp = { 1, 2, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YUYV, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YVYU, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_UYVY, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_VYUY, .depth = 0, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_XYUV8888, .depth = 0, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_VUY888, .depth = 0, .num_planes = 1, .cpp = { 3, 0, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_AYUV, .depth = 0, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true, .is_yuv = true }, { .format = DRM_FORMAT_Y210, .depth = 0, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_Y212, .depth = 0, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_Y216, .depth = 0, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_Y410, .depth = 0, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true, .is_yuv = true }, { .format = DRM_FORMAT_Y412, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true, .is_yuv = true }, { .format = DRM_FORMAT_Y416, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true, .is_yuv = true }, { .format = DRM_FORMAT_XVYU2101010, .depth = 0, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_XVYU12_16161616, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_XVYU16161616, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_Y0L0, .depth = 0, .num_planes = 1, .char_per_block = { 8, 0, 0 }, .block_w = { 2, 0, 0 }, .block_h = { 2, 0, 0 }, .hsub = 2, .vsub = 2, .has_alpha = true, .is_yuv = true }, { .format = DRM_FORMAT_X0L0, .depth = 0, .num_planes = 1, .char_per_block = { 8, 0, 0 }, .block_w = { 2, 0, 0 }, .block_h = { 2, 0, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_Y0L2, .depth = 0, .num_planes = 1, .char_per_block = { 8, 0, 0 }, .block_w = { 2, 0, 0 }, .block_h = { 2, 0, 0 }, .hsub = 2, .vsub = 2, .has_alpha = true, .is_yuv = true }, { .format = DRM_FORMAT_X0L2, .depth = 0, .num_planes = 1, .char_per_block = { 8, 0, 0 }, .block_w = { 2, 0, 0 }, .block_h = { 2, 0, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_P010, .depth = 0, .num_planes = 2, .char_per_block = { 2, 4, 0 }, .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true}, { .format = DRM_FORMAT_P012, .depth = 0, .num_planes = 2, .char_per_block = { 2, 4, 0 }, .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true}, { .format = DRM_FORMAT_P016, .depth = 0, .num_planes = 2, .char_per_block = { 2, 4, 0 }, .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true}, { .format = DRM_FORMAT_P210, .depth = 0, .num_planes = 2, .char_per_block = { 2, 4, 0 }, .block_w = { 1, 1, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_VUY101010, .depth = 0, .num_planes = 1, .cpp = { 0, 0, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_YUV420_8BIT, .depth = 0, .num_planes = 1, .cpp = { 0, 0, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_YUV420_10BIT, .depth = 0, .num_planes = 1, .cpp = { 0, 0, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_NV15, .depth = 0, .num_planes = 2, .char_per_block = { 5, 5, 0 }, .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true }, { .format = DRM_FORMAT_NV20, .depth = 0, .num_planes = 2, .char_per_block = { 5, 5, 0 }, .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_NV30, .depth = 0, .num_planes = 2, .char_per_block = { 5, 5, 0 }, .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_Q410, .depth = 0, .num_planes = 3, .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_Q401, .depth = 0, .num_planes = 3, .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_P030, .depth = 0, .num_planes = 2, .char_per_block = { 4, 8, 0 }, .block_w = { 3, 3, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true}, }; unsigned int i; for (i = 0; i < ARRAY_SIZE(formats); ++i) { if (formats[i].format == format) return &formats[i]; } return NULL; } /** * drm_format_info - query information for a given format * @format: pixel format (DRM_FORMAT_*) * * The caller should only pass a supported pixel format to this function. * Unsupported pixel formats will generate a warning in the kernel log. * * Returns: * The instance of struct drm_format_info that describes the pixel format, or * NULL if the format is unsupported. */ const struct drm_format_info *drm_format_info(u32 format) { const struct drm_format_info *info; info = __drm_format_info(format); WARN_ON(!info); return info; } EXPORT_SYMBOL(drm_format_info); /** * drm_get_format_info - query information for a given framebuffer configuration * @dev: DRM device * @mode_cmd: metadata from the userspace fb creation request * * Returns: * The instance of struct drm_format_info that describes the pixel format, or * NULL if the format is unsupported. */ const struct drm_format_info * drm_get_format_info(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd) { const struct drm_format_info *info = NULL; if (dev->mode_config.funcs->get_format_info) info = dev->mode_config.funcs->get_format_info(mode_cmd); if (!info) info = drm_format_info(mode_cmd->pixel_format); return info; } EXPORT_SYMBOL(drm_get_format_info); /** * drm_format_info_block_width - width in pixels of block. * @info: pixel format info * @plane: plane index * * Returns: * The width in pixels of a block, depending on the plane index. */ unsigned int drm_format_info_block_width(const struct drm_format_info *info, int plane) { if (!info || plane < 0 || plane >= info->num_planes) return 0; if (!info->block_w[plane]) return 1; return info->block_w[plane]; } EXPORT_SYMBOL(drm_format_info_block_width); /** * drm_format_info_block_height - height in pixels of a block * @info: pixel format info * @plane: plane index * * Returns: * The height in pixels of a block, depending on the plane index. */ unsigned int drm_format_info_block_height(const struct drm_format_info *info, int plane) { if (!info || plane < 0 || plane >= info->num_planes) return 0; if (!info->block_h[plane]) return 1; return info->block_h[plane]; } EXPORT_SYMBOL(drm_format_info_block_height); /** * drm_format_info_bpp - number of bits per pixel * @info: pixel format info * @plane: plane index * * Returns: * The actual number of bits per pixel, depending on the plane index. */ unsigned int drm_format_info_bpp(const struct drm_format_info *info, int plane) { if (!info || plane < 0 || plane >= info->num_planes) return 0; return info->char_per_block[plane] * 8 / (drm_format_info_block_width(info, plane) * drm_format_info_block_height(info, plane)); } EXPORT_SYMBOL(drm_format_info_bpp); /** * drm_format_info_min_pitch - computes the minimum required pitch in bytes * @info: pixel format info * @plane: plane index * @buffer_width: buffer width in pixels * * Returns: * The minimum required pitch in bytes for a buffer by taking into consideration * the pixel format information and the buffer width. */ uint64_t drm_format_info_min_pitch(const struct drm_format_info *info, int plane, unsigned int buffer_width) { if (!info || plane < 0 || plane >= info->num_planes) return 0; return DIV_ROUND_UP_ULL((u64)buffer_width * info->char_per_block[plane], drm_format_info_block_width(info, plane) * drm_format_info_block_height(info, plane)); } EXPORT_SYMBOL(drm_format_info_min_pitch); |
7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // SPDX-License-Identifier: GPL-2.0-only /* iptables module to match on related connections */ /* * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_helper.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Related connection matching"); MODULE_ALIAS("ipt_helper"); MODULE_ALIAS("ip6t_helper"); static bool helper_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_helper_info *info = par->matchinfo; const struct nf_conn *ct; const struct nf_conn_help *master_help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; bool ret = info->invert; ct = nf_ct_get(skb, &ctinfo); if (!ct || !ct->master) return ret; master_help = nfct_help(ct->master); if (!master_help) return ret; /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(master_help->helper); if (!helper) return ret; if (info->name[0] == '\0') ret = !ret; else ret ^= !strncmp(helper->name, info->name, strlen(helper->name)); return ret; } static int helper_mt_check(const struct xt_mtchk_param *par) { struct xt_helper_info *info = par->matchinfo; int ret; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } info->name[sizeof(info->name) - 1] = '\0'; return 0; } static void helper_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match helper_mt_reg __read_mostly = { .name = "helper", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = helper_mt_check, .match = helper_mt, .destroy = helper_mt_destroy, .matchsize = sizeof(struct xt_helper_info), .me = THIS_MODULE, }; static int __init helper_mt_init(void) { return xt_register_match(&helper_mt_reg); } static void __exit helper_mt_exit(void) { xt_unregister_match(&helper_mt_reg); } module_init(helper_mt_init); module_exit(helper_mt_exit); |
13 13 13 13 13 20 20 1 3 12 6 15 2 12 4 13 3 3 13 13 20 13 13 6 13 13 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * This is a replacement of the old ipt_recent module, which carried the * following copyright notice: * * Author: Stephen Frost <sfrost@snowman.net> * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/random.h> #include <linux/jhash.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_recent"); MODULE_ALIAS("ip6t_recent"); static unsigned int ip_list_tot __read_mostly = 100; static unsigned int ip_list_hash_size __read_mostly; static unsigned int ip_list_perms __read_mostly = 0644; static unsigned int ip_list_uid __read_mostly; static unsigned int ip_list_gid __read_mostly; module_param(ip_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); module_param(ip_list_uid, uint, 0644); module_param(ip_list_gid, uint, 0644); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files"); /* retained for backwards compatibility */ static unsigned int ip_pkt_list_tot __read_mostly; module_param(ip_pkt_list_tot, uint, 0400); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); #define XT_RECENT_MAX_NSTAMPS 256 struct recent_entry { struct list_head list; struct list_head lru_list; union nf_inet_addr addr; u_int16_t family; u_int8_t ttl; u_int8_t index; u_int16_t nstamps; unsigned long stamps[]; }; struct recent_table { struct list_head list; char name[XT_RECENT_NAME_LEN]; union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; u8 nstamps_max_mask; struct list_head lru_list; struct list_head iphash[]; }; struct recent_net { struct list_head tables; #ifdef CONFIG_PROC_FS struct proc_dir_entry *xt_recent; #endif }; static unsigned int recent_net_id __read_mostly; static inline struct recent_net *recent_pernet(struct net *net) { return net_generic(net, recent_net_id); } static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS static const struct proc_ops recent_mt_proc_ops; #endif static u_int32_t hash_rnd __read_mostly; static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr) { return jhash_1word((__force u32)addr->ip, hash_rnd) & (ip_list_hash_size - 1); } static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr) { return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) & (ip_list_hash_size - 1); } static struct recent_entry * recent_entry_lookup(const struct recent_table *table, const union nf_inet_addr *addrp, u_int16_t family, u_int8_t ttl) { struct recent_entry *e; unsigned int h; if (family == NFPROTO_IPV4) h = recent_entry_hash4(addrp); else h = recent_entry_hash6(addrp); list_for_each_entry(e, &table->iphash[h], list) if (e->family == family && memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 && (ttl == e->ttl || ttl == 0 || e->ttl == 0)) return e; return NULL; } static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) { list_del(&e->list); list_del(&e->lru_list); kfree(e); t->entries--; } /* * Drop entries with timestamps older then 'time'. */ static void recent_entry_reap(struct recent_table *t, unsigned long time, struct recent_entry *working, bool update) { struct recent_entry *e; /* * The head of the LRU list is always the oldest entry. */ e = list_entry(t->lru_list.next, struct recent_entry, lru_list); /* * Do not reap the entry which are going to be updated. */ if (e == working && update) return; /* * The last time stamp is the most recent. */ if (time_after(time, e->stamps[e->index-1])) recent_entry_remove(t, e); } static struct recent_entry * recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, u_int16_t family, u_int8_t ttl) { struct recent_entry *e; unsigned int nstamps_max = t->nstamps_max_mask; if (t->entries >= ip_list_tot) { e = list_entry(t->lru_list.next, struct recent_entry, lru_list); recent_entry_remove(t, e); } nstamps_max += 1; e = kmalloc(struct_size(e, stamps, nstamps_max), GFP_ATOMIC); if (e == NULL) return NULL; memcpy(&e->addr, addr, sizeof(e->addr)); e->ttl = ttl; e->stamps[0] = jiffies; e->nstamps = 1; e->index = 1; e->family = family; if (family == NFPROTO_IPV4) list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]); else list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]); list_add_tail(&e->lru_list, &t->lru_list); t->entries++; return e; } static void recent_entry_update(struct recent_table *t, struct recent_entry *e) { e->index &= t->nstamps_max_mask; e->stamps[e->index++] = jiffies; if (e->index > e->nstamps) e->nstamps = e->index; list_move_tail(&e->lru_list, &t->lru_list); } static struct recent_table *recent_table_lookup(struct recent_net *recent_net, const char *name) { struct recent_table *t; list_for_each_entry(t, &recent_net->tables, list) if (!strcmp(t->name, name)) return t; return NULL; } static void recent_table_flush(struct recent_table *t) { struct recent_entry *e, *next; unsigned int i; for (i = 0; i < ip_list_hash_size; i++) list_for_each_entry_safe(e, next, &t->iphash[i], list) recent_entry_remove(t, e); } static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; struct recent_entry *e; union nf_inet_addr addr = {}, addr_mask; u_int8_t ttl; bool ret = info->invert; if (xt_family(par) == NFPROTO_IPV4) { const struct iphdr *iph = ip_hdr(skb); if (info->side == XT_RECENT_DEST) addr.ip = iph->daddr; else addr.ip = iph->saddr; ttl = iph->ttl; } else { const struct ipv6hdr *iph = ipv6_hdr(skb); if (info->side == XT_RECENT_DEST) memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6)); else memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6)); ttl = iph->hop_limit; } /* use TTL as seen before forwarding */ if (xt_out(par) != NULL && (!skb->sk || !net_eq(net, sock_net(skb->sk)))) ttl++; spin_lock_bh(&recent_lock); t = recent_table_lookup(recent_net, info->name); nf_inet_addr_mask(&addr, &addr_mask, &t->mask); e = recent_entry_lookup(t, &addr_mask, xt_family(par), (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { if (!(info->check_set & XT_RECENT_SET)) goto out; e = recent_entry_init(t, &addr_mask, xt_family(par), ttl); if (e == NULL) par->hotdrop = true; ret = !ret; goto out; } if (info->check_set & XT_RECENT_SET) ret = !ret; else if (info->check_set & XT_RECENT_REMOVE) { recent_entry_remove(t, e); ret = !ret; } else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) { unsigned long time = jiffies - info->seconds * HZ; unsigned int i, hits = 0; for (i = 0; i < e->nstamps; i++) { if (info->seconds && time_after(time, e->stamps[i])) continue; if (!info->hit_count || ++hits >= info->hit_count) { ret = !ret; break; } } /* info->seconds must be non-zero */ if (info->check_set & XT_RECENT_REAP) recent_entry_reap(t, time, e, info->check_set & XT_RECENT_UPDATE && ret); } if (info->check_set & XT_RECENT_SET || (info->check_set & XT_RECENT_UPDATE && ret)) { recent_entry_update(t, e); e->ttl = ttl; } out: spin_unlock_bh(&recent_lock); return ret; } static void recent_table_free(void *addr) { kvfree(addr); } static int recent_mt_check(const struct xt_mtchk_param *par, const struct xt_recent_mtinfo_v1 *info) { struct recent_net *recent_net = recent_pernet(par->net); struct recent_table *t; #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; kuid_t uid; kgid_t gid; #endif unsigned int nstamp_mask; unsigned int i; int ret = -EINVAL; net_get_random_once(&hash_rnd, sizeof(hash_rnd)); if (info->check_set & ~XT_RECENT_VALID_FLAGS) { pr_info_ratelimited("Unsupported userspace flags (%08x)\n", info->check_set); return -EINVAL; } if (hweight8(info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE | XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) return -EINVAL; if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && (info->seconds || info->hit_count || (info->check_set & XT_RECENT_MODIFIERS))) return -EINVAL; if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return -EINVAL; if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) { pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n", info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } ret = xt_check_proc_name(info->name, sizeof(info->name)); if (ret) return ret; if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot) nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1; else if (info->hit_count) nstamp_mask = roundup_pow_of_two(info->hit_count) - 1; else nstamp_mask = 32 - 1; mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (t != NULL) { if (nstamp_mask > t->nstamps_max_mask) { spin_lock_bh(&recent_lock); recent_table_flush(t); t->nstamps_max_mask = nstamp_mask; spin_unlock_bh(&recent_lock); } t->refcnt++; ret = 0; goto out; } t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; } t->refcnt = 1; t->nstamps_max_mask = nstamp_mask; memcpy(&t->mask, &info->mask, sizeof(t->mask)); strcpy(t->name, info->name); INIT_LIST_HEAD(&t->lru_list); for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS uid = make_kuid(&init_user_ns, ip_list_uid); gid = make_kgid(&init_user_ns, ip_list_gid); if (!uid_valid(uid) || !gid_valid(gid)) { recent_table_free(t); ret = -EINVAL; goto out; } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_proc_ops, t); if (pde == NULL) { recent_table_free(t); ret = -ENOMEM; goto out; } proc_set_user(pde, uid, gid); #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &recent_net->tables); spin_unlock_bh(&recent_lock); ret = 0; out: mutex_unlock(&recent_mutex); return ret; } static int recent_mt_check_v0(const struct xt_mtchk_param *par) { const struct xt_recent_mtinfo_v0 *info_v0 = par->matchinfo; struct xt_recent_mtinfo_v1 info_v1; /* Copy revision 0 structure to revision 1 */ memcpy(&info_v1, info_v0, sizeof(struct xt_recent_mtinfo)); /* Set default mask to ensure backward compatible behaviour */ memset(info_v1.mask.all, 0xFF, sizeof(info_v1.mask.all)); return recent_mt_check(par, &info_v1); } static int recent_mt_check_v1(const struct xt_mtchk_param *par) { return recent_mt_check(par, par->matchinfo); } static void recent_mt_destroy(const struct xt_mtdtor_param *par) { struct recent_net *recent_net = recent_pernet(par->net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (--t->refcnt == 0) { spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS if (recent_net->xt_recent != NULL) remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); recent_table_free(t); } mutex_unlock(&recent_mutex); } #ifdef CONFIG_PROC_FS struct recent_iter_state { const struct recent_table *table; unsigned int bucket; }; static void *recent_seq_start(struct seq_file *seq, loff_t *pos) __acquires(recent_lock) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; struct recent_entry *e; loff_t p = *pos; spin_lock_bh(&recent_lock); for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) list_for_each_entry(e, &t->iphash[st->bucket], list) if (p-- == 0) return e; return NULL; } static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; const struct recent_entry *e = v; const struct list_head *head = e->list.next; (*pos)++; while (head == &t->iphash[st->bucket]) { if (++st->bucket >= ip_list_hash_size) return NULL; head = t->iphash[st->bucket].next; } return list_entry(head, struct recent_entry, list); } static void recent_seq_stop(struct seq_file *s, void *v) __releases(recent_lock) { spin_unlock_bh(&recent_lock); } static int recent_seq_show(struct seq_file *seq, void *v) { const struct recent_entry *e = v; struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; unsigned int i; i = (e->index - 1) & t->nstamps_max_mask; if (e->family == NFPROTO_IPV4) seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.ip, e->ttl, e->stamps[i], e->index); else seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); seq_putc(seq, '\n'); return 0; } static const struct seq_operations recent_seq_ops = { .start = recent_seq_start, .next = recent_seq_next, .stop = recent_seq_stop, .show = recent_seq_show, }; static int recent_seq_open(struct inode *inode, struct file *file) { struct recent_iter_state *st; st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); if (st == NULL) return -ENOMEM; st->table = pde_data(inode); return 0; } static ssize_t recent_mt_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { struct recent_table *t = pde_data(file_inode(file)); struct recent_entry *e; char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")]; const char *c = buf; union nf_inet_addr addr = {}; u_int16_t family; bool add, succ; if (size == 0) return 0; if (size > sizeof(buf)) size = sizeof(buf); if (copy_from_user(buf, input, size) != 0) return -EFAULT; /* Strict protocol! */ if (*loff != 0) return -ESPIPE; switch (*c) { case '/': /* flush table */ spin_lock_bh(&recent_lock); recent_table_flush(t); spin_unlock_bh(&recent_lock); return size; case '-': /* remove address */ add = false; break; case '+': /* add address */ add = true; break; default: pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n"); return -EINVAL; } ++c; --size; if (strnchr(c, size, ':') != NULL) { family = NFPROTO_IPV6; succ = in6_pton(c, size, (void *)&addr, '\n', NULL); } else { family = NFPROTO_IPV4; succ = in4_pton(c, size, (void *)&addr, '\n', NULL); } if (!succ) return -EINVAL; spin_lock_bh(&recent_lock); e = recent_entry_lookup(t, &addr, family, 0); if (e == NULL) { if (add) recent_entry_init(t, &addr, family, 0); } else { if (add) recent_entry_update(t, e); else recent_entry_remove(t, e); } spin_unlock_bh(&recent_lock); /* Note we removed one above */ *loff += size + 1; return size + 1; } static const struct proc_ops recent_mt_proc_ops = { .proc_open = recent_seq_open, .proc_read = seq_read, .proc_write = recent_mt_proc_write, .proc_release = seq_release_private, .proc_lseek = seq_lseek, }; static int __net_init recent_proc_net_init(struct net *net) { struct recent_net *recent_net = recent_pernet(net); recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net); if (!recent_net->xt_recent) return -ENOMEM; return 0; } static void __net_exit recent_proc_net_exit(struct net *net) { struct recent_net *recent_net = recent_pernet(net); struct recent_table *t; /* recent_net_exit() is called before recent_mt_destroy(). Make sure * that the parent xt_recent proc entry is empty before trying to * remove it. */ spin_lock_bh(&recent_lock); list_for_each_entry(t, &recent_net->tables, list) remove_proc_entry(t->name, recent_net->xt_recent); recent_net->xt_recent = NULL; spin_unlock_bh(&recent_lock); remove_proc_entry("xt_recent", net->proc_net); } #else static inline int recent_proc_net_init(struct net *net) { return 0; } static inline void recent_proc_net_exit(struct net *net) { } #endif /* CONFIG_PROC_FS */ static int __net_init recent_net_init(struct net *net) { struct recent_net *recent_net = recent_pernet(net); INIT_LIST_HEAD(&recent_net->tables); return recent_proc_net_init(net); } static void __net_exit recent_net_exit(struct net *net) { recent_proc_net_exit(net); } static struct pernet_operations recent_net_ops = { .init = recent_net_init, .exit = recent_net_exit, .id = &recent_net_id, .size = sizeof(struct recent_net), }; static struct xt_match recent_mt_reg[] __read_mostly = { { .name = "recent", .revision = 0, .family = NFPROTO_IPV4, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo), .checkentry = recent_mt_check_v0, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 0, .family = NFPROTO_IPV6, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo), .checkentry = recent_mt_check_v0, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 1, .family = NFPROTO_IPV4, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo_v1), .checkentry = recent_mt_check_v1, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 1, .family = NFPROTO_IPV6, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo_v1), .checkentry = recent_mt_check_v1, .destroy = recent_mt_destroy, .me = THIS_MODULE, } }; static int __init recent_mt_init(void) { int err; BUILD_BUG_ON_NOT_POWER_OF_2(XT_RECENT_MAX_NSTAMPS); if (!ip_list_tot || ip_pkt_list_tot >= XT_RECENT_MAX_NSTAMPS) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); err = register_pernet_subsys(&recent_net_ops); if (err) return err; err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); if (err) unregister_pernet_subsys(&recent_net_ops); return err; } static void __exit recent_mt_exit(void) { xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); unregister_pernet_subsys(&recent_net_ops); } module_init(recent_mt_init); module_exit(recent_mt_exit); |
39224 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H #include <linux/uaccess.h> #include <linux/types.h> #include <asm/processor.h> #include <asm/fpu/api.h> #include <asm/user.h> /* Bit 63 of XCR0 is reserved for future expansion */ #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) #define XSTATE_CPUID 0x0000000d #define TILE_CPUID 0x0000001d #define FXSAVE_SIZE 512 #define XSAVE_HDR_SIZE 64 #define XSAVE_HDR_OFFSET FXSAVE_SIZE #define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) #define XSAVE_ALIGNMENT 64 /* All currently supported user features */ #define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \ XFEATURE_MASK_SSE | \ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM | \ XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_XTILE) /* * Features which are restored when returning to user space. * PKRU is not restored on return to user space because PKRU * is switched eagerly in switch_to() and flush_thread() */ #define XFEATURE_MASK_USER_RESTORE \ (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) /* Features which are dynamically enabled for a process on request */ #define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \ XFEATURE_MASK_CET_USER) /* * A supervisor state component may not always contain valuable information, * and its size may be huge. Saving/restoring such supervisor state components * at each context switch can cause high CPU and space overhead, which should * be avoided. Such supervisor state components should only be saved/restored * on demand. The on-demand supervisor features are set in this mask. * * Unlike the existing supported supervisor features, an independent supervisor * feature does not allocate a buffer in task->fpu, and the corresponding * supervisor state component cannot be saved/restored at each context switch. * * To support an independent supervisor feature, a developer should follow the * dos and don'ts as below: * - Do dynamically allocate a buffer for the supervisor state component. * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the * state component to/from the buffer. * - Don't set the bit corresponding to the independent supervisor feature in * IA32_XSS at run time, since it has been set at boot time. */ #define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR) /* * Unsupported supervisor features. When a supervisor feature in this mask is * supported in the future, move it to the supported supervisor feature mask. */ #define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \ XFEATURE_MASK_CET_KERNEL) /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) /* * The feature mask required to restore FPU state: * - All user states which are not eagerly switched in switch_to()/exec() * - The suporvisor states */ #define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ XFEATURE_MASK_SUPERVISOR_SUPPORTED) /* * Features in this mask have space allocated in the signal frame, but may not * have that space initialized when the feature is in its init state. */ #define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \ XFEATURE_MASK_USER_DYNAMIC) extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); int xfeature_size(int xfeature_nr); void xsaves(struct xregs_state *xsave, u64 mask); void xrstors(struct xregs_state *xsave, u64 mask); int xfd_enable_feature(u64 xfd_err); #ifdef CONFIG_X86_64 DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); #endif #ifdef CONFIG_X86_64 DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); static __always_inline __pure bool fpu_state_size_dynamic(void) { return static_branch_unlikely(&__fpu_state_size_dynamic); } #else static __always_inline __pure bool fpu_state_size_dynamic(void) { return false; } #endif #endif |
8 6 1 6 2 1 1 1 7 1 5 1 1 1 33 1 1 5 4 3 3 3 1 3 4 4 4 3 6 1 1 4 4 3 3 2 2 1 1 5 11 16 321 1 495 23 373 39 19 9 1 1 1 1 5 2 1 1 11 1 2 337 6 4 5 16 1035 993 62 46 815 812 3 777 35 223 3 45 40 5 241 1 15 1 9 134 133 348 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kmod.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> #include <linux/phylib_stubs.h> #include <linux/wireless.h> #include <linux/if_bridge.h> #include <net/dsa_stubs.h> #include <net/wext.h> #include "dev.h" /* * Map an interface index to its name (SIOCGIFNAME) */ /* * We need this ioctl for efficient implementation of the * if_indextoname() function required by the IPv6 API. Without * it, we would have to search all the interfaces to find a * match. --pb */ static int dev_ifname(struct net *net, struct ifreq *ifr) { ifr->ifr_name[IFNAMSIZ-1] = 0; return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex); } /* * Perform a SIOCGIFCONF call. This structure will change * size eventually, and there is nothing I can do about it. * Thus we will need a 'compatibility mode'. */ int dev_ifconf(struct net *net, struct ifconf __user *uifc) { struct net_device *dev; void __user *pos; size_t size; int len, total = 0, done; /* both the ifconf and the ifreq structures are slightly different */ if (in_compat_syscall()) { struct compat_ifconf ifc32; if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf))) return -EFAULT; pos = compat_ptr(ifc32.ifcbuf); len = ifc32.ifc_len; size = sizeof(struct compat_ifreq); } else { struct ifconf ifc; if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) return -EFAULT; pos = ifc.ifc_buf; len = ifc.ifc_len; size = sizeof(struct ifreq); } /* Loop over the interfaces, and write an info block for each. */ rtnl_lock(); for_each_netdev(net, dev) { if (!pos) done = inet_gifconf(dev, NULL, 0, size); else done = inet_gifconf(dev, pos + total, len - total, size); if (done < 0) { rtnl_unlock(); return -EFAULT; } total += done; } rtnl_unlock(); return put_user(total, &uifc->ifc_len); } static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) { struct ifmap *ifmap = &ifr->ifr_map; if (in_compat_syscall()) { struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap; cifmap->mem_start = dev->mem_start; cifmap->mem_end = dev->mem_end; cifmap->base_addr = dev->base_addr; cifmap->irq = dev->irq; cifmap->dma = dev->dma; cifmap->port = dev->if_port; return 0; } ifmap->mem_start = dev->mem_start; ifmap->mem_end = dev->mem_end; ifmap->base_addr = dev->base_addr; ifmap->irq = dev->irq; ifmap->dma = dev->dma; ifmap->port = dev->if_port; return 0; } static int dev_setifmap(struct net_device *dev, struct ifreq *ifr) { struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; if (!dev->netdev_ops->ndo_set_config) return -EOPNOTSUPP; if (in_compat_syscall()) { struct ifmap ifmap = { .mem_start = cifmap->mem_start, .mem_end = cifmap->mem_end, .base_addr = cifmap->base_addr, .irq = cifmap->irq, .dma = cifmap->dma, .port = cifmap->port, }; return dev->netdev_ops->ndo_set_config(dev, &ifmap); } return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map); } /* * Perform the SIOCxIFxxx calls, inside rcu_read_lock() */ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); if (!dev) return -ENODEV; switch (cmd) { case SIOCGIFFLAGS: /* Get interface flags */ ifr->ifr_flags = (short) dev_get_flags(dev); return 0; case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ ifr->ifr_metric = 0; return 0; case SIOCGIFMTU: /* Get the MTU of a device */ ifr->ifr_mtu = dev->mtu; return 0; case SIOCGIFSLAVE: err = -EINVAL; break; case SIOCGIFMAP: return dev_getifmap(dev, ifr); case SIOCGIFINDEX: ifr->ifr_ifindex = dev->ifindex; return 0; case SIOCGIFTXQLEN: ifr->ifr_qlen = dev->tx_queue_len; return 0; default: /* dev_ioctl() should ensure this case * is never reached */ WARN_ON(1); err = -ENOTTY; break; } return err; } static int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) { enum hwtstamp_tx_types tx_type; enum hwtstamp_rx_filters rx_filter; int tx_type_valid = 0; int rx_filter_valid = 0; if (cfg->flags & ~HWTSTAMP_FLAG_MASK) return -EINVAL; tx_type = cfg->tx_type; rx_filter = cfg->rx_filter; switch (tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: case HWTSTAMP_TX_ONESTEP_SYNC: case HWTSTAMP_TX_ONESTEP_P2P: tx_type_valid = 1; break; case __HWTSTAMP_TX_CNT: /* not a real value */ break; } switch (rx_filter) { case HWTSTAMP_FILTER_NONE: case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: rx_filter_valid = 1; break; case __HWTSTAMP_FILTER_CNT: /* not a real value */ break; } if (!tx_type_valid || !rx_filter_valid) return -ERANGE; return 0; } static int dev_eth_ioctl(struct net_device *dev, struct ifreq *ifr, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; if (!ops->ndo_eth_ioctl) return -EOPNOTSUPP; if (!netif_device_present(dev)) return -ENODEV; return ops->ndo_eth_ioctl(dev, ifr, cmd); } /** * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC * or of attached phylib PHY * @dev: Network device * @cfg: Timestamping configuration structure * * Helper for enforcing a common policy that phylib timestamping, if available, * should take precedence in front of hardware timestamping provided by the * netdev. * * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and * there only exists a phydev->mii_ts->hwtstamp() method. So this will return * -EOPNOTSUPP for phylib for now, which is still more accurate than letting * the netdev handle the GET request. */ static int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { if (phy_has_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); } static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) { const struct net_device_ops *ops = dev->netdev_ops; struct kernel_hwtstamp_config kernel_cfg = {}; struct hwtstamp_config cfg; int err; if (!ops->ndo_hwtstamp_get) return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP); /* legacy */ if (!netif_device_present(dev)) return -ENODEV; kernel_cfg.ifr = ifr; err = dev_get_hwtstamp_phylib(dev, &kernel_cfg); if (err) return err; /* If the request was resolved through an unconverted driver, omit * the copy_to_user(), since the implementation has already done that */ if (!kernel_cfg.copied_to_user) { hwtstamp_config_from_kernel(&cfg, &kernel_cfg); if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) return -EFAULT; } return 0; } /** * dev_set_hwtstamp_phylib() - Change hardware timestamping of NIC * or of attached phylib PHY * @dev: Network device * @cfg: Timestamping configuration structure * @extack: Netlink extended ack message structure, for error reporting * * Helper for enforcing a common policy that phylib timestamping, if available, * should take precedence in front of hardware timestamping provided by the * netdev. If the netdev driver needs to perform specific actions even for PHY * timestamping to work properly (a switch port must trap the timestamped * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in * dev->priv_flags. */ int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; bool phy_ts = phy_has_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; bool changed = false; int err; cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) { err = ops->ndo_hwtstamp_get(dev, &old_cfg); if (err) return err; } if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) { err = ops->ndo_hwtstamp_set(dev, cfg, extack); if (err) { if (extack->_msg) netdev_err(dev, "%s\n", extack->_msg); return err; } } if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); if (phy_ts) { err = phy_hwtstamp_set(dev->phydev, cfg, extack); if (err) { if (changed) ops->ndo_hwtstamp_set(dev, &old_cfg, NULL); return err; } } return 0; } EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib); static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) { const struct net_device_ops *ops = dev->netdev_ops; struct kernel_hwtstamp_config kernel_cfg = {}; struct netlink_ext_ack extack = {}; struct hwtstamp_config cfg; int err; if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; hwtstamp_config_to_kernel(&kernel_cfg, &cfg); kernel_cfg.ifr = ifr; err = net_hwtstamp_validate(&kernel_cfg); if (err) return err; err = dsa_conduit_hwtstamp_validate(dev, &kernel_cfg, &extack); if (err) { if (extack._msg) netdev_err(dev, "%s\n", extack._msg); return err; } if (!ops->ndo_hwtstamp_set) return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP); /* legacy */ if (!netif_device_present(dev)) return -ENODEV; err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack); if (err) return err; /* The driver may have modified the configuration, so copy the * updated version of it back to user space */ if (!kernel_cfg.copied_to_user) { hwtstamp_config_from_kernel(&cfg, &kernel_cfg); if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) return -EFAULT; } return 0; } static int generic_hwtstamp_ioctl_lower(struct net_device *dev, int cmd, struct kernel_hwtstamp_config *kernel_cfg) { struct ifreq ifrr; int err; strscpy_pad(ifrr.ifr_name, dev->name, IFNAMSIZ); ifrr.ifr_ifru = kernel_cfg->ifr->ifr_ifru; err = dev_eth_ioctl(dev, &ifrr, cmd); if (err) return err; kernel_cfg->ifr->ifr_ifru = ifrr.ifr_ifru; kernel_cfg->copied_to_user = true; return 0; } int generic_hwtstamp_get_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg) { const struct net_device_ops *ops = dev->netdev_ops; if (!netif_device_present(dev)) return -ENODEV; if (ops->ndo_hwtstamp_get) return dev_get_hwtstamp_phylib(dev, kernel_cfg); /* Legacy path: unconverted lower driver */ return generic_hwtstamp_ioctl_lower(dev, SIOCGHWTSTAMP, kernel_cfg); } EXPORT_SYMBOL(generic_hwtstamp_get_lower); int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; if (!netif_device_present(dev)) return -ENODEV; if (ops->ndo_hwtstamp_set) return dev_set_hwtstamp_phylib(dev, kernel_cfg, extack); /* Legacy path: unconverted lower driver */ return generic_hwtstamp_ioctl_lower(dev, SIOCSHWTSTAMP, kernel_cfg); } EXPORT_SYMBOL(generic_hwtstamp_set_lower); static int dev_siocbond(struct net_device *dev, struct ifreq *ifr, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocbond) { if (netif_device_present(dev)) return ops->ndo_siocbond(dev, ifr, cmd); else return -ENODEV; } return -EOPNOTSUPP; } static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocdevprivate) { if (netif_device_present(dev)) return ops->ndo_siocdevprivate(dev, ifr, data, cmd); else return -ENODEV; } return -EOPNOTSUPP; } static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocwandev) { if (netif_device_present(dev)) return ops->ndo_siocwandev(dev, ifs); else return -ENODEV; } return -EOPNOTSUPP; } /* * Perform the SIOCxIFxxx calls, inside rtnl_lock() */ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); const struct net_device_ops *ops; netdevice_tracker dev_tracker; if (!dev) return -ENODEV; ops = dev->netdev_ops; switch (cmd) { case SIOCSIFFLAGS: /* Set interface flags */ return dev_change_flags(dev, ifr->ifr_flags, NULL); case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ return -EOPNOTSUPP; case SIOCSIFMTU: /* Set the MTU of a device */ return dev_set_mtu(dev, ifr->ifr_mtu); case SIOCSIFHWADDR: if (dev->addr_len > sizeof(struct sockaddr)) return -EINVAL; return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; case SIOCSIFMAP: return dev_setifmap(dev, ifr); case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); case SIOCDELMULTI: if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; return dev_change_tx_queue_len(dev, ifr->ifr_qlen); case SIOCSIFNAME: ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); case SIOCWANDEV: return dev_siocwandev(dev, &ifr->ifr_settings); case SIOCBRADDIF: case SIOCBRDELIF: if (!netif_device_present(dev)) return -ENODEV; if (!netif_is_bridge_master(dev)) return -EOPNOTSUPP; netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); netdev_put(dev, &dev_tracker); rtnl_lock(); return err; case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15: return dev_siocdevprivate(dev, ifr, data, cmd); case SIOCSHWTSTAMP: return dev_set_hwtstamp(dev, ifr); case SIOCGHWTSTAMP: return dev_get_hwtstamp(dev, ifr); case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: return dev_eth_ioctl(dev, ifr, cmd); case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: case SIOCBONDCHANGEACTIVE: return dev_siocbond(dev, ifr, cmd); /* Unknown ioctl */ default: err = -EINVAL; } return err; } /** * dev_load - load a network module * @net: the applicable net namespace * @name: name of interface * * If a network interface is not present and the process has suitable * privileges this function loads the module. If module loading is not * available in this kernel then it becomes a nop. */ void dev_load(struct net *net, const char *name) { struct net_device *dev; int no_module; rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); rcu_read_unlock(); no_module = !dev; if (no_module && capable(CAP_NET_ADMIN)) no_module = request_module("netdev-%s", name); if (no_module && capable(CAP_SYS_MODULE)) request_module("%s", name); } EXPORT_SYMBOL(dev_load); /* * This function handles all "interface"-type I/O control requests. The actual * 'doing' part of this is dev_ifsioc above. */ /** * dev_ioctl - network device ioctl * @net: the applicable net namespace * @cmd: command to issue * @ifr: pointer to a struct ifreq in user space * @data: data exchanged with userspace * @need_copyout: whether or not copy_to_user() should be called * * Issue ioctl functions to devices. This is normally called by the * user space syscall interfaces but can sometimes be useful for * other purposes. The return value is the return from the syscall if * positive or a negative errno code on error. */ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout) { int ret; char *colon; if (need_copyout) *need_copyout = true; if (cmd == SIOCGIFNAME) return dev_ifname(net, ifr); ifr->ifr_name[IFNAMSIZ-1] = 0; colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; /* * See which interface the caller is talking about. */ switch (cmd) { case SIOCGIFHWADDR: dev_load(net, ifr->ifr_name); ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name); if (colon) *colon = ':'; return ret; /* * These ioctl calls: * - can be done by all. * - atomic and do not require locking. * - return a value */ case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: case SIOCGIFSLAVE: case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: dev_load(net, ifr->ifr_name); rcu_read_lock(); ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); if (colon) *colon = ':'; return ret; case SIOCETHTOOL: dev_load(net, ifr->ifr_name); ret = dev_ethtool(net, ifr, data); if (colon) *colon = ':'; return ret; /* * These ioctl calls: * - require superuser power. * - require strict serialization. * - return a value */ case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: dev_load(net, ifr->ifr_name); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; rtnl_lock(); ret = dev_ifsioc(net, ifr, data, cmd); rtnl_unlock(); if (colon) *colon = ':'; return ret; /* * These ioctl calls: * - require superuser power. * - require strict serialization. * - do not return a value */ case SIOCSIFMAP: case SIOCSIFTXQLEN: if (!capable(CAP_NET_ADMIN)) return -EPERM; fallthrough; /* * These ioctl calls: * - require local superuser power. * - require strict serialization. * - do not return a value */ case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: case SIOCSIFHWADDR: case SIOCSIFSLAVE: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFHWBROADCAST: case SIOCSMIIREG: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: case SIOCSHWTSTAMP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; fallthrough; case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, data, cmd); rtnl_unlock(); if (need_copyout) *need_copyout = false; return ret; case SIOCGIFMEM: /* Get the per device memory space. We can add this but * currently do not support it */ case SIOCSIFMEM: /* Set the per device memory buffer space. * Not applicable in our case */ case SIOCSIFLINK: return -ENOTTY; /* * Unknown or private ioctl. */ default: if (cmd == SIOCWANDEV || cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, data, cmd); rtnl_unlock(); return ret; } return -ENOTTY; } } |
67 286 40 271 282 49 2 264 264 35 174 1 6 171 262 169 54 2 227 196 108 6 98 2 2 37 101 50 87 178 48 131 291 276 135 135 29 281 280 1 284 370 2 492 17 138 34 46 302 284 6 4 280 223 418 20 337 135 282 188 170 18 363 80 21 381 116 25 414 309 51 104 101 13 125 126 15 15 483 170 353 352 35 279 50 679 11 672 644 517 494 185 277 6 1 22 23 19 1 19 1 20 24 2 23 1 6 130 145 14 4 34 5 5 5 13 27 8 35 10 19 492 47 38 126 437 23 449 451 38 136 347 411 179 362 27 17 280 51 65 137 42 7 1 7 3 2 2 11 9 2 8 3 9 2 9 2 10 1 10 1 7 3 10 1 10 1 10 1 10 1 1 1 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 | // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * (C) 2002-2013 Jozsef Kadlecsik <kadlec@netfilter.org> * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/timer.h> #include <linux/module.h> #include <linux/in.h> #include <linux/tcp.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <net/ip6_checksum.h> #include <asm/unaligned.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_log.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ static const char *const tcp_conntrack_names[] = { "NONE", "SYN_SENT", "SYN_RECV", "ESTABLISHED", "FIN_WAIT", "CLOSE_WAIT", "LAST_ACK", "TIME_WAIT", "CLOSE", "SYN_SENT2", }; enum nf_ct_tcp_action { NFCT_TCP_IGNORE, NFCT_TCP_INVALID, NFCT_TCP_ACCEPT, }; #define SECS * HZ #define MINS * 60 SECS #define HOURS * 60 MINS #define DAYS * 24 HOURS static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = { [TCP_CONNTRACK_SYN_SENT] = 2 MINS, [TCP_CONNTRACK_SYN_RECV] = 60 SECS, [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS, [TCP_CONNTRACK_FIN_WAIT] = 2 MINS, [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS, [TCP_CONNTRACK_LAST_ACK] = 30 SECS, [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, [TCP_CONNTRACK_CLOSE] = 10 SECS, [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ [TCP_CONNTRACK_RETRANS] = 5 MINS, [TCP_CONNTRACK_UNACK] = 5 MINS, }; #define sNO TCP_CONNTRACK_NONE #define sSS TCP_CONNTRACK_SYN_SENT #define sSR TCP_CONNTRACK_SYN_RECV #define sES TCP_CONNTRACK_ESTABLISHED #define sFW TCP_CONNTRACK_FIN_WAIT #define sCW TCP_CONNTRACK_CLOSE_WAIT #define sLA TCP_CONNTRACK_LAST_ACK #define sTW TCP_CONNTRACK_TIME_WAIT #define sCL TCP_CONNTRACK_CLOSE #define sS2 TCP_CONNTRACK_SYN_SENT2 #define sIV TCP_CONNTRACK_MAX #define sIG TCP_CONNTRACK_IGNORE /* What TCP flags are set from RST/SYN/FIN/ACK. */ enum tcp_bit_set { TCP_SYN_SET, TCP_SYNACK_SET, TCP_FIN_SET, TCP_ACK_SET, TCP_RST_SET, TCP_NONE_SET, }; /* * The TCP state transition table needs a few words... * * We are the man in the middle. All the packets go through us * but might get lost in transit to the destination. * It is assumed that the destinations can't receive segments * we haven't seen. * * The checked segment is in window, but our windows are *not* * equivalent with the ones of the sender/receiver. We always * try to guess the state of the current sender. * * The meaning of the states are: * * NONE: initial state * SYN_SENT: SYN-only packet seen * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open * SYN_RECV: SYN-ACK packet seen * ESTABLISHED: ACK packet seen * FIN_WAIT: FIN packet seen * CLOSE_WAIT: ACK seen (after FIN) * LAST_ACK: FIN seen (after FIN) * TIME_WAIT: last ACK seen * CLOSE: closed connection (RST) * * Packets marked as IGNORED (sIG): * if they may be either invalid or valid * and the receiver may send back a connection * closing RST or a SYN/ACK. * * Packets marked as INVALID (sIV): * if we regard them as truly invalid packets */ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, /* * sNO -> sSS Initialize a new connection * sSS -> sSS Retransmitted SYN * sS2 -> sS2 Late retransmitted SYN * sSR -> sIG * sES -> sIG Error: SYNs in window outside the SYN_SENT state * are errors. Receiver will reply with RST * and close the connection. * Or we are not in sync and hold a dead connection. * sFW -> sIG * sCW -> sIG * sLA -> sIG * sTW -> sSS Reopened connection (RFC 1122). * sCL -> sSS */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR }, /* * sNO -> sIV Too late and no reason to do anything * sSS -> sIV Client can't send SYN and then SYN/ACK * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open * sES -> sIV Invalid SYN/ACK packets sent by the client * sFW -> sIV * sCW -> sIV * sLA -> sIV * sTW -> sIV * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sNO -> sIV Too late and no reason to do anything... * sSS -> sIV Client migth not send FIN in this state: * we enforce waiting for a SYN/ACK reply first. * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions, waiting for * the last ACK. * Migth be a retransmitted FIN as well... * sCW -> sLA * sLA -> sLA Retransmitted FIN. Remain in the same state. * sTW -> sTW * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* * sNO -> sES Assumed. * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. * sS2 -> sIV * sSR -> sES Established state is reached. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW * sLA -> sTW Last ACK detected (RFC5961 challenged) * sTW -> sTW Retransmitted last ACK. Remain in the same state. * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { /* REPLY */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, /* * sNO -> sIV Never reached. * sSS -> sS2 Simultaneous open * sS2 -> sS2 Retransmitted simultaneous SYN * sSR -> sIV Invalid SYN packets sent by the server * sES -> sIV * sFW -> sIV * sCW -> sIV * sLA -> sIV * sTW -> sSS Reopened connection, but server may have switched role * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* * sSS -> sSR Standard open. * sS2 -> sSR Simultaneous open * sSR -> sIG Retransmitted SYN/ACK, ignore it. * sES -> sIG Late retransmitted SYN/ACK? * sFW -> sIG Might be SYN/ACK answering ignored SYN * sCW -> sIG * sLA -> sIG * sTW -> sIG * sCL -> sIG */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sSS -> sIV Server might not send FIN in this state. * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions. * sCW -> sLA * sLA -> sLA Retransmitted FIN. * sTW -> sTW * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, /* * sSS -> sIG Might be a half-open connection. * sS2 -> sIG * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW * sLA -> sTW Last ACK detected (RFC5961 challenged) * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; #ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) return; seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } #endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) { if (tcph->rst) return TCP_RST_SET; else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); else if (tcph->fin) return TCP_FIN_SET; else if (tcph->ack) return TCP_ACK_SET; else return TCP_NONE_SET; } /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering in IP Filter' by Guido van Rooij. http://www.sane.nl/events/sane2000/papers.html http://www.darkart.com/mirrors/www.obfuscation.org/ipf/ The boundaries and the conditions are changed according to RFC793: the packet must intersect the window (i.e. segments may be after the right or before the left edge) and thus receivers may ACK segments after the right edge of the window. td_maxend = max(sack + max(win,1)) seen in reply packets td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets td_maxwin += seq + len - sender.td_maxend if seq + len > sender.td_maxend td_end = max(seq + len) seen in sent packets I. Upper bound for valid data: seq <= sender.td_maxend II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin III. Upper bound for valid (s)ack: sack <= receiver.td_end IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW where sack is the highest right edge of sack block found in the packet or ack in the case of packet without SACK option. The upper bound limit for a valid (s)ack is not ignored - we doesn't have to deal with fragments. */ static inline __u32 segment_seq_plus_len(__u32 seq, size_t len, unsigned int dataoff, const struct tcphdr *tcph) { /* XXX Should I use payload length field in IP/IPv6 header ? * - YK */ return (seq + len - dataoff - tcph->doff*4 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); } /* Fixme: what about big packets? */ #define MAXACKWINCONST 66000 #define MAXACKWINDOW(sender) \ ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ : MAXACKWINCONST) /* * Simplified tcp_parse_options routine from tcp_input.c */ static void tcp_options(const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, struct ip_ct_tcp_state *state) { unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; const unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); if (!length) return; ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), length, buff); if (!ptr) return; state->td_scale = 0; state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; while (length > 0) { int opcode=*ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: return; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; continue; default: if (length < 2) return; opsize=*ptr++; if (opsize < 2) /* "silly options" */ return; if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_SACK_PERM && opsize == TCPOLEN_SACK_PERM) state->flags |= IP_CT_TCP_FLAG_SACK_PERM; else if (opcode == TCPOPT_WINDOW && opsize == TCPOLEN_WINDOW) { state->td_scale = *(u_int8_t *)ptr; if (state->td_scale > TCP_MAX_WSCALE) state->td_scale = TCP_MAX_WSCALE; state->flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; } ptr += opsize - 2; length -= opsize; } } } static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, __u32 *sack) { unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; const unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); __u32 tmp; if (!length) return; ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), length, buff); if (!ptr) return; /* Fast path for timestamp-only option */ if (length == TCPOLEN_TSTAMP_ALIGNED && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) return; while (length > 0) { int opcode = *ptr++; int opsize, i; switch (opcode) { case TCPOPT_EOL: return; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; continue; default: if (length < 2) return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_SACK && opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK) && !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK)) { for (i = 0; i < (opsize - TCPOLEN_SACK_BASE); i += TCPOLEN_SACK_PERBLOCK) { tmp = get_unaligned_be32((__be32 *)(ptr+i)+1); if (after(tmp, *sack)) *sack = tmp; } return; } ptr += opsize - 2; length -= opsize; } } } static void tcp_init_sender(struct ip_ct_tcp_state *sender, struct ip_ct_tcp_state *receiver, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u32 end, u32 win, enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); /* RFC 1323: * Both sides must send the Window Scale option * to enable window scaling in either direction. */ if (dir == IP_CT_DIR_REPLY && !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; } } __printf(6, 7) static enum nf_ct_tcp_action nf_tcp_log_invalid(const struct sk_buff *skb, const struct nf_conn *ct, const struct nf_hook_state *state, const struct ip_ct_tcp_state *sender, enum nf_ct_tcp_action ret, const char *fmt, ...) { const struct nf_tcp_net *tn = nf_tcp_pernet(nf_ct_net(ct)); struct va_format vaf; va_list args; bool be_liberal; be_liberal = sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal; if (be_liberal) return NFCT_TCP_ACCEPT; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; nf_ct_l4proto_log_invalid(skb, ct, state, "%pV", &vaf); va_end(args); return ret; } static enum nf_ct_tcp_action tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, const struct nf_hook_state *hook_state) { struct ip_ct_tcp *state = &ct->proto.tcp; struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; __u32 seq, ack, sack, end, win, swin; bool in_recv_win, seq_ok; s32 receiver_offset; u16 win_raw; /* * Get the required data from the packet. */ seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win_raw = ntohs(tcph->window); win = win_raw; end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; if (sender->td_maxwin == 0) { /* * Initialize sender data. */ if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; } else { /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ sender->td_end = end; swin = win << sender->td_scale; sender->td_maxwin = (swin == 0 ? 1 : swin); sender->td_maxend = end + sender->td_maxwin; if (receiver->td_maxwin == 0) { /* We haven't seen traffic in the other * direction yet but we have to tweak window * tracking to pass III and IV until that * happens. */ receiver->td_end = receiver->td_maxend = sack; } else if (sack == receiver->td_end + 1) { /* Likely a reply to a keepalive. * Needed for III. */ receiver->td_end++; } } } else if (tcph->syn && after(end, sender->td_end) && (state->state == TCP_CONNTRACK_SYN_SENT || state->state == TCP_CONNTRACK_SYN_RECV)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." * * Re-init state for this direction, just like for the first * syn(-ack) reply, it might differ in seq, ack or tcp options. */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; } if (!(tcph->ack)) { /* * If there is no ACK, just pretend it was set and OK. */ ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { /* * Broken TCP stacks, that set ACK in RST packets as well * with zero ack value. */ ack = sack = receiver->td_end; } if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) /* * RST sent answering SYN. */ seq = end = sender->td_end; seq_ok = before(seq, sender->td_maxend + 1); if (!seq_ok) { u32 overshot = end - sender->td_maxend + 1; bool ack_ok; ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1); in_recv_win = receiver->td_maxwin && after(end, sender->td_end - receiver->td_maxwin - 1); if (in_recv_win && ack_ok && overshot <= receiver->td_maxwin && before(sack, receiver->td_end + 1)) { /* Work around TCPs that send more bytes than allowed by * the receive window. * * If the (marked as invalid) packet is allowed to pass by * the ruleset and the peer acks this data, then its possible * all future packets will trigger 'ACK is over upper bound' check. * * Thus if only the sequence check fails then do update td_end so * possible ACK for this data can update internal state. */ sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, "%u bytes more than expected", overshot); } return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID, "SEQ is over upper bound %u (over the window of the receiver)", sender->td_maxend + 1); } if (!before(sack, receiver->td_end + 1)) return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID, "ACK is over upper bound %u (ACKed data not seen yet)", receiver->td_end + 1); /* Is the ending sequence in the receive window (if available)? */ in_recv_win = !receiver->td_maxwin || after(end, sender->td_end - receiver->td_maxwin - 1); if (!in_recv_win) return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, "SEQ is under lower bound %u (already ACKed data retransmitted)", sender->td_end - receiver->td_maxwin - 1); if (!after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, "ignored ACK under lower bound %u (possible overly delayed)", receiver->td_end - MAXACKWINDOW(sender) - 1); /* Take into account window scaling (RFC 1323). */ if (!tcph->syn) win <<= sender->td_scale; /* Update sender data. */ swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) { sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } if (tcph->ack) { if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { sender->td_maxack = ack; sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; } else if (after(ack, sender->td_maxack)) { sender->td_maxack = ack; } } /* Update receiver data. */ if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } if (ack == receiver->td_end) receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; /* Check retransmissions. */ if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end && state->last_win == win_raw) { state->retrans++; } else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->last_win = win_raw; state->retrans = 0; } } return NFCT_TCP_ACCEPT; } static void __cold nf_tcp_handle_invalid(struct nf_conn *ct, enum ip_conntrack_dir dir, int index, const struct sk_buff *skb, const struct nf_hook_state *hook_state) { const unsigned int *timeouts; const struct nf_tcp_net *tn; unsigned int timeout; u32 expires; if (!test_bit(IPS_ASSURED_BIT, &ct->status) || test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) return; /* We don't want to have connections hanging around in ESTABLISHED * state for long time 'just because' conntrack deemed a FIN/RST * out-of-window. * * Shrink the timeout just like when there is unacked data. * This speeds up eviction of 'dead' connections where the * connection and conntracks internal state are out of sync. */ switch (index) { case TCP_RST_SET: case TCP_FIN_SET: break; default: return; } if (ct->proto.tcp.last_dir != dir && (ct->proto.tcp.last_index == TCP_FIN_SET || ct->proto.tcp.last_index == TCP_RST_SET)) { expires = nf_ct_expires(ct); if (expires < 120 * HZ) return; tn = nf_tcp_pernet(nf_ct_net(ct)); timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = tn->timeouts; timeout = READ_ONCE(timeouts[TCP_CONNTRACK_UNACK]); if (expires > timeout) { nf_ct_l4proto_log_invalid(skb, ct, hook_state, "packet (index %d, dir %d) response for index %d lower timeout to %u", index, dir, ct->proto.tcp.last_index, timeout); WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); } } else { ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; } } /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| TCPHDR_URG) + 1] = { [TCPHDR_SYN] = 1, [TCPHDR_SYN|TCPHDR_URG] = 1, [TCPHDR_SYN|TCPHDR_ACK] = 1, [TCPHDR_RST] = 1, [TCPHDR_RST|TCPHDR_ACK] = 1, [TCPHDR_FIN|TCPHDR_ACK] = 1, [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, [TCPHDR_ACK] = 1, [TCPHDR_ACK|TCPHDR_URG] = 1, }; static void tcp_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg); } /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ static bool tcp_error(const struct tcphdr *th, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { unsigned int tcplen = skb->len - dataoff; u8 tcpflags; /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { tcp_error_log(skb, state, "truncated packet"); return true; } /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (state->net->ct.sysctl_checksum && state->hook == NF_INET_PRE_ROUTING && nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) { tcp_error_log(skb, state, "bad checksum"); return true; } /* Check TCP flags. */ tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { tcp_error_log(skb, state, "invalid tcp flag combination"); return true; } return false; } static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *th, const struct nf_hook_state *state) { enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); const struct nf_tcp_net *tn = nf_tcp_pernet(net); /* Don't need lock here: this conntrack not in circulation yet */ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { tcp_error_log(skb, state, "invalid new"); return false; } if (new_state == TCP_CONNTRACK_SYN_SENT) { memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); /* SYN packet */ ct->proto.tcp.seen[0].td_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); if (ct->proto.tcp.seen[0].td_maxwin == 0) ct->proto.tcp.seen[0].td_maxwin = 1; ct->proto.tcp.seen[0].td_maxend = ct->proto.tcp.seen[0].td_end; tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); } else if (tn->tcp_loose == 0) { /* Don't try to pick up connections. */ return false; } else { memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ ct->proto.tcp.seen[0].td_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); if (ct->proto.tcp.seen[0].td_maxwin == 0) ct->proto.tcp.seen[0].td_maxwin = 1; ct->proto.tcp.seen[0].td_maxend = ct->proto.tcp.seen[0].td_end + ct->proto.tcp.seen[0].td_maxwin; /* We assume SACK and liberal window checking to handle * window scaling */ ct->proto.tcp.seen[0].flags = ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | IP_CT_TCP_FLAG_BE_LIBERAL; } /* tcp_packet will set them */ ct->proto.tcp.last_index = TCP_NONE_SET; return true; } static bool tcp_can_early_drop(const struct nf_conn *ct) { switch (ct->proto.tcp.state) { case TCP_CONNTRACK_FIN_WAIT: case TCP_CONNTRACK_LAST_ACK: case TCP_CONNTRACK_TIME_WAIT: case TCP_CONNTRACK_CLOSE: case TCP_CONNTRACK_CLOSE_WAIT: return true; default: break; } return false; } void nf_conntrack_tcp_set_closing(struct nf_conn *ct) { enum tcp_conntrack old_state; const unsigned int *timeouts; u32 timeout; if (!nf_ct_is_confirmed(ct)) return; spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; ct->proto.tcp.state = TCP_CONNTRACK_CLOSE; if (old_state == TCP_CONNTRACK_CLOSE || test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { spin_unlock_bh(&ct->lock); return; } timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) { const struct nf_tcp_net *tn; tn = nf_tcp_pernet(nf_ct_net(ct)); timeouts = tn->timeouts; } timeout = timeouts[TCP_CONNTRACK_CLOSE]; WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); spin_unlock_bh(&ct->lock); nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) { state->td_end = 0; state->td_maxend = 0; state->td_maxwin = 0; state->td_maxack = 0; state->td_scale = 0; state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; } /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = nf_tcp_pernet(net); enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; enum nf_ct_tcp_action res; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) return -NF_ACCEPT; if (tcp_error(th, skb, dataoff, state)) return -NF_ACCEPT; if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th, state)) return -NF_ACCEPT; spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; /* RFC 1122: "When a connection is closed actively, * it MUST linger in TIME-WAIT state for a time 2xMSL * (Maximum Segment Lifetime). However, it MAY accept * a new SYN from the remote TCP to reopen the connection * directly from TIME-WAIT state, if..." * We ignore the conditions because we are in the * TIME-WAIT state anyway. * * Handle aborted connections: we and the server * think there is an existing connection but the client * aborts it and starts a new one. */ if (((ct->proto.tcp.seen[dir].flags | ct->proto.tcp.seen[!dir].flags) & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process * context and we must give it a chance to terminate. */ if (nf_ct_kill(ct)) return -NF_REPEAT; return NF_DROP; } fallthrough; case TCP_CONNTRACK_IGNORE: /* Ignored packets: * * Our connection entry may be out of sync, so ignore * packets which may signal the real connection between * the client and the server. * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. * * If the ignored packet is invalid, the receiver will send * a RST we'll catch below. */ if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We get in sync from the previously annotated * values. */ old_state = TCP_CONNTRACK_SYN_SENT; new_state = TCP_CONNTRACK_SYN_RECV; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = ct->proto.tcp.last_end; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = ct->proto.tcp.last_win == 0 ? 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]); break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.last_win = ntohs(th->window); /* a) This is a SYN in ORIGINAL. The client and the server * may be in sync but we are not. In that case, we annotate * the TCP options and let the packet go through. If it is a * valid SYN packet, the server will reply with a SYN/ACK, and * then we'll get in sync. Otherwise, the server potentially * responds with a challenge ACK if implementing RFC5961. */ if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; ct->proto.tcp.last_flags = ct->proto.tcp.last_wscale = 0; tcp_options(skb, dataoff, th, &seen); if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; ct->proto.tcp.last_wscale = seen.td_scale; } if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } /* Mark the potential for RFC5961 challenge ACK, * this pose a special problem for LAST_ACK state * as ACK is intrepretated as ACKing last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK) ct->proto.tcp.last_flags |= IP_CT_EXP_CHALLENGE_ACK; } /* possible challenge ack reply to syn */ if (old_state == TCP_CONNTRACK_SYN_SENT && index == TCP_ACK_SET && dir == IP_CT_DIR_REPLY) ct->proto.tcp.last_ack = ntohl(th->ack_seq); spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "packet (index %d) in dir %d ignored, state %s", index, dir, tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or * the SYN/ACK from the server is lost, the client may transmit * a keep-alive packet while in SYN_SENT state. This needs to * be associated with the original conntrack entry in order to * generate a new SYN with the correct sequence number. */ if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); spin_unlock_bh(&ct->lock); return NF_ACCEPT; } /* Invalid packet */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "packet (index %d) in dir %d invalid, state %s", index, dir, tcp_conntrack_names[old_state]); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" * e.g. in response to spurious SYNs. Conntrack MUST * not believe this ACK is acking last FIN. */ if (old_state == TCP_CONNTRACK_LAST_ACK && index == TCP_ACK_SET && ct->proto.tcp.last_dir != dir && ct->proto.tcp.last_index == TCP_SYN_SET && (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored"); return NF_ACCEPT; /* Don't change state */ } break; case TCP_CONNTRACK_SYN_SENT2: /* tcp_conntracks table is not smart enough to handle * simultaneous open. */ ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; break; case TCP_CONNTRACK_SYN_RECV: if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) new_state = TCP_CONNTRACK_ESTABLISHED; break; case TCP_CONNTRACK_CLOSE: if (index != TCP_RST_SET) break; /* If we are closing, tuple might have been re-used already. * last_index, last_ack, and all other ct fields used for * sequence/window validation are outdated in that case. * * As the conntrack can already be expired by GC under pressure, * just skip validation checks. */ if (tcp_can_early_drop(ct)) goto in_window; /* td_maxack might be outdated if we let a SYN through earlier */ if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && ct->proto.tcp.last_index != TCP_SYN_SET) { u32 seq = ntohl(th->seq); /* If we are not in established state and SEQ=0 this is most * likely an answer to a SYN we let go through above (last_index * can be updated due to out-of-order ACKs). */ if (seq == 0 && !nf_conntrack_tcp_established(ct)) break; if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) && !tn->tcp_ignore_invalid_rst) { /* Invalid RST */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst"); return -NF_ACCEPT; } if (!nf_conntrack_tcp_established(ct) || seq == ct->proto.tcp.seen[!dir].td_maxack) break; /* Check if rst is part of train, such as * foo:80 > bar:4379: P, 235946583:235946602(19) ack 42 * foo:80 > bar:4379: R, 235946602:235946602(0) ack 42 */ if (ct->proto.tcp.last_index == TCP_ACK_SET && ct->proto.tcp.last_dir == dir && seq == ct->proto.tcp.last_end) break; /* ... RST sequence number doesn't match exactly, keep * established state to allow a possible challenge ACK. */ new_state = old_state; } if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then * c) we hold a half-open connection. * * Delete our connection entry. * We skip window checking, because packet might ACK * segments we ignored. */ goto in_window; } /* Reset in response to a challenge-ack we let through earlier */ if (old_state == TCP_CONNTRACK_SYN_SENT && ct->proto.tcp.last_index == TCP_ACK_SET && ct->proto.tcp.last_dir == IP_CT_DIR_REPLY && ntohl(th->seq) == ct->proto.tcp.last_ack) goto in_window; break; default: /* Keep compilers happy. */ break; } res = tcp_in_window(ct, dir, index, skb, dataoff, th, state); switch (res) { case NFCT_TCP_IGNORE: spin_unlock_bh(&ct->lock); return NF_ACCEPT; case NFCT_TCP_INVALID: nf_tcp_handle_invalid(ct, dir, index, skb, state); spin_unlock_bh(&ct->lock); return -NF_ACCEPT; case NFCT_TCP_ACCEPT: break; } in_window: /* From now on we have got in-window packets */ ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = tn->timeouts; if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if (unlikely(index == TCP_RST_SET)) timeout = timeouts[TCP_CONNTRACK_CLOSE]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; else if (ct->proto.tcp.last_win == 0 && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) { /* do not renew timeout on SYN retransmit. * * Else port reuse by client or NAT middlebox can keep * entry alive indefinitely (including nat info). */ return NF_ACCEPT; } /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ if (new_state == TCP_CONNTRACK_ESTABLISHED && timeout > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct, bool destroy) { struct nlattr *nest_parms; struct nf_ct_tcp_flags tmp = {}; spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state)) goto nla_put_failure; if (destroy) goto skip_state; if (nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, ct->proto.tcp.seen[0].td_scale) || nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, ct->proto.tcp.seen[1].td_scale)) goto nla_put_failure; tmp.flags = ct->proto.tcp.seen[0].flags; if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, sizeof(struct nf_ct_tcp_flags), &tmp)) goto nla_put_failure; tmp.flags = ct->proto.tcp.seen[1].flags; if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, sizeof(struct nf_ct_tcp_flags), &tmp)) goto nla_put_failure; skip_state: spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: spin_unlock_bh(&ct->lock); return -1; } static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; #define TCP_NLATTR_SIZE ( \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \ NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags))) static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) { struct nlattr *pattr = cda[CTA_PROTOINFO_TCP]; struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; int err; /* updates could not contain anything about the private * protocol info, in that case skip the parsing */ if (!pattr) return 0; err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy, NULL); if (err < 0) return err; if (tb[CTA_PROTOINFO_TCP_STATE] && nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) return -EINVAL; spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { struct nf_ct_tcp_flags *attr = nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]); ct->proto.tcp.seen[0].flags &= ~attr->mask; ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; } if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) { struct nf_ct_tcp_flags *attr = nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]); ct->proto.tcp.seen[1].flags &= ~attr->mask; ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; } if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] && tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { ct->proto.tcp.seen[0].td_scale = nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); ct->proto.tcp.seen[1].td_scale = nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); } spin_unlock_bh(&ct->lock); return 0; } static unsigned int tcp_nlattr_tuple_size(void) { static unsigned int size __read_mostly; if (!size) size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); return size; } #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { struct nf_tcp_net *tn = nf_tcp_pernet(net); unsigned int *timeouts = data; int i; if (!timeouts) timeouts = tn->timeouts; /* set default TCP timeouts. */ for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++) timeouts[i] = tn->timeouts[i]; if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) { timeouts[TCP_CONNTRACK_SYN_SENT] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ; } if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) { timeouts[TCP_CONNTRACK_SYN_RECV] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ; } if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) { timeouts[TCP_CONNTRACK_ESTABLISHED] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ; } if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) { timeouts[TCP_CONNTRACK_FIN_WAIT] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ; } if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) { timeouts[TCP_CONNTRACK_CLOSE_WAIT] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ; } if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) { timeouts[TCP_CONNTRACK_LAST_ACK] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ; } if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) { timeouts[TCP_CONNTRACK_TIME_WAIT] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ; } if (tb[CTA_TIMEOUT_TCP_CLOSE]) { timeouts[TCP_CONNTRACK_CLOSE] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ; } if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) { timeouts[TCP_CONNTRACK_SYN_SENT2] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ; } if (tb[CTA_TIMEOUT_TCP_RETRANS]) { timeouts[TCP_CONNTRACK_RETRANS] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ; } if (tb[CTA_TIMEOUT_TCP_UNACK]) { timeouts[TCP_CONNTRACK_UNACK] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ; } timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT]; return 0; } static int tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT, htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV, htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED, htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT, htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT, htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK, htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT, htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE, htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2, htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS, htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK, htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_tcp_init_net(struct net *net) { struct nf_tcp_net *tn = nf_tcp_pernet(net); int i; for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) tn->timeouts[i] = tcp_timeouts[i]; /* timeouts[0] is unused, make it same as SYN_SENT so * ->timeouts[0] contains 'new' timeout, like udp or icmp. */ tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT]; /* If it is set to zero, we disable picking up already established * connections. */ tn->tcp_loose = 1; /* "Be conservative in what you do, * be liberal in what you accept from others." * If it's non-zero, we mark only out of window RST segments as INVALID. */ tn->tcp_be_liberal = 0; /* If it's non-zero, we turn off RST sequence number check */ tn->tcp_ignore_invalid_rst = 0; /* Max number of the retransmitted packets without receiving an (acceptable) * ACK from the destination. If this number is reached, a shorter timer * will be started. */ tn->tcp_max_retrans = 3; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) tn->offload_timeout = 30 * HZ; #endif } const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = { .l4proto = IPPROTO_TCP, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = tcp_print_conntrack, #endif .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, .from_nlattr = nlattr_to_tcp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nlattr_tuple_size = tcp_nlattr_tuple_size, .nlattr_size = TCP_NLATTR_SIZE, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = tcp_timeout_nlattr_to_obj, .obj_to_nlattr = tcp_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_TCP_MAX, .obj_size = sizeof(unsigned int) * TCP_CONNTRACK_TIMEOUT_MAX, .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ }; |
7 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * * Copyright (c) 2001-2007, 2013-2014, Ericsson AB * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "core.h" #include "bearer.h" /* Convert Ethernet address (media address format) to string */ static int tipc_eth_addr2str(struct tipc_media_addr *addr, char *strbuf, int bufsz) { if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ return 1; sprintf(strbuf, "%pM", addr->value); return 0; } /* Convert from media address format to discovery message addr format */ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { memset(msg, 0, TIPC_MEDIA_INFO_SIZE); msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } /* Convert raw mac address format to media addr format */ static int tipc_eth_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, const char *msg) { memset(addr, 0, sizeof(*addr)); ether_addr_copy(addr->value, msg); addr->media_id = TIPC_MEDIA_TYPE_ETH; addr->broadcast = is_broadcast_ether_addr(addr->value); return 0; } /* Convert discovery msg addr format to Ethernet media addr format */ static int tipc_eth_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, char *msg) { /* Skip past preamble: */ msg += TIPC_MEDIA_ADDR_OFFSET; return tipc_eth_raw2addr(b, addr, msg); } /* Ethernet media registration info */ struct tipc_media eth_media_info = { .send_msg = tipc_l2_send_msg, .enable_media = tipc_enable_l2_media, .disable_media = tipc_disable_l2_media, .addr2str = tipc_eth_addr2str, .addr2msg = tipc_eth_addr2msg, .msg2addr = tipc_eth_msg2addr, .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .min_win = TIPC_DEF_LINK_WIN, .max_win = TIPC_MAX_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_ETH, .hwaddr_len = ETH_ALEN, .name = "eth" }; |
18 18 18 18 10 10 10 10 10 10 10 10 10 15 9 10 10 10 15 10 10 10 10 10 8 8 8 8 8 8 8 8 8 8 8 15 15 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "send.h" #include "main.h" #include <linux/atomic.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/workqueue.h> #include "distributed-arp-table.h" #include "fragmentation.h" #include "gateway_client.h" #include "hard-interface.h" #include "log.h" #include "network-coding.h" #include "originator.h" #include "routing.h" #include "soft-interface.h" #include "translation-table.h" static void batadv_send_outstanding_bcast_packet(struct work_struct *work); /** * batadv_send_skb_packet() - send an already prepared packet * @skb: the packet to send * @hard_iface: the interface to use to send the broadcast packet * @dst_addr: the payload destination * * Send out an already prepared packet to the given neighbor or broadcast it * using the specified interface. Either hard_iface or neigh_node must be not * NULL. * If neigh_node is NULL, then the packet is broadcasted using hard_iface, * otherwise it is sent as unicast to the given neighbor. * * Regardless of the return value, the skb is consumed. * * Return: A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. */ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const u8 *dst_addr) { struct batadv_priv *bat_priv; struct ethhdr *ethhdr; int ret; bat_priv = netdev_priv(hard_iface->soft_iface); if (hard_iface->if_status != BATADV_IF_ACTIVE) goto send_skb_err; if (unlikely(!hard_iface->net_dev)) goto send_skb_err; if (!(hard_iface->net_dev->flags & IFF_UP)) { pr_warn("Interface %s is not up - can't send packet via that interface!\n", hard_iface->net_dev->name); goto send_skb_err; } /* push to the ethernet header. */ if (batadv_skb_head_push(skb, ETH_HLEN) < 0) goto send_skb_err; skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); ether_addr_copy(ethhdr->h_source, hard_iface->net_dev->dev_addr); ether_addr_copy(ethhdr->h_dest, dst_addr); ethhdr->h_proto = htons(ETH_P_BATMAN); skb_set_network_header(skb, ETH_HLEN); skb->protocol = htons(ETH_P_BATMAN); skb->dev = hard_iface->net_dev; /* Save a clone of the skb to use when decoding coded packets */ batadv_nc_skb_store_for_decoding(bat_priv, skb); /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. */ ret = dev_queue_xmit(skb); return net_xmit_eval(ret); send_skb_err: kfree_skb(skb); return NET_XMIT_DROP; } /** * batadv_send_broadcast_skb() - Send broadcast packet via hard interface * @skb: packet to be transmitted (with batadv header and no outer eth header) * @hard_iface: outgoing interface * * Return: A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. */ int batadv_send_broadcast_skb(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr); } /** * batadv_send_unicast_skb() - Send unicast packet to neighbor * @skb: packet to be transmitted (with batadv header and no outer eth header) * @neigh: neighbor which is used as next hop to destination * * Return: A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. */ int batadv_send_unicast_skb(struct sk_buff *skb, struct batadv_neigh_node *neigh) { #ifdef CONFIG_BATMAN_ADV_BATMAN_V struct batadv_hardif_neigh_node *hardif_neigh; #endif int ret; ret = batadv_send_skb_packet(skb, neigh->if_incoming, neigh->addr); #ifdef CONFIG_BATMAN_ADV_BATMAN_V hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr); if (hardif_neigh && ret != NET_XMIT_DROP) hardif_neigh->bat_v.last_unicast_tx = jiffies; batadv_hardif_neigh_put(hardif_neigh); #endif return ret; } /** * batadv_send_skb_to_orig() - Lookup next-hop and transmit skb. * @skb: Packet to be transmitted. * @orig_node: Final destination of the packet. * @recv_if: Interface used when receiving the packet (can be NULL). * * Looks up the best next-hop towards the passed originator and passes the * skb on for preparation of MAC header. If the packet originated from this * host, NULL can be passed as recv_if and no interface alternating is * attempted. * * Return: negative errno code on a failure, -EINPROGRESS if the skb is * buffered for later transmit or the NET_XMIT status returned by the * lower routine if the packet has been passed down. */ int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; int ret; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) { ret = -EINVAL; goto free_skb; } /* Check if the skb is too large to send in one piece and fragment * it if needed. */ if (atomic_read(&bat_priv->fragmentation) && skb->len > neigh_node->if_incoming->net_dev->mtu) { /* Fragment and send packet. */ ret = batadv_frag_send_packet(skb, orig_node, neigh_node); /* skb was consumed */ skb = NULL; goto put_neigh_node; } /* try to network code the packet, if it is received on an interface * (i.e. being forwarded). If the packet originates from this node or if * network coding fails, then send the packet as usual. */ if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) ret = -EINPROGRESS; else ret = batadv_send_unicast_skb(skb, neigh_node); /* skb was consumed */ skb = NULL; put_neigh_node: batadv_neigh_node_put(neigh_node); free_skb: kfree_skb(skb); return ret; } /** * batadv_send_skb_push_fill_unicast() - extend the buffer and initialize the * common fields for unicast packets * @skb: the skb carrying the unicast header to initialize * @hdr_size: amount of bytes to push at the beginning of the skb * @orig_node: the destination node * * Return: false if the buffer extension was not possible or true otherwise. */ static bool batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_unicast_packet *unicast_packet; u8 ttvn = (u8)atomic_read(&orig_node->last_ttvn); if (batadv_skb_head_push(skb, hdr_size) < 0) return false; unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_packet->version = BATADV_COMPAT_VERSION; /* batman packet type: unicast */ unicast_packet->packet_type = BATADV_UNICAST; /* set unicast ttl */ unicast_packet->ttl = BATADV_TTL; /* copy the destination for faster routing */ ether_addr_copy(unicast_packet->dest, orig_node->orig); /* set the destination tt version number */ unicast_packet->ttvn = ttvn; return true; } /** * batadv_send_skb_prepare_unicast() - encapsulate an skb with a unicast header * @skb: the skb containing the payload to encapsulate * @orig_node: the destination node * * Return: false if the payload could not be encapsulated or true otherwise. */ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, struct batadv_orig_node *orig_node) { size_t uni_size = sizeof(struct batadv_unicast_packet); return batadv_send_skb_push_fill_unicast(skb, uni_size, orig_node); } /** * batadv_send_skb_prepare_unicast_4addr() - encapsulate an skb with a * unicast 4addr header * @bat_priv: the bat priv with all the soft interface information * @skb: the skb containing the payload to encapsulate * @orig: the destination node * @packet_subtype: the unicast 4addr packet subtype to use * * Return: false if the payload could not be encapsulated or true otherwise. */ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node *orig, int packet_subtype) { struct batadv_hard_iface *primary_if; struct batadv_unicast_4addr_packet *uc_4addr_packet; bool ret = false; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; /* Pull the header space and fill the unicast_packet substructure. * We can do that because the first member of the uc_4addr_packet * is of type struct unicast_packet */ if (!batadv_send_skb_push_fill_unicast(skb, sizeof(*uc_4addr_packet), orig)) goto out; uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR; ether_addr_copy(uc_4addr_packet->src, primary_if->net_dev->dev_addr); uc_4addr_packet->subtype = packet_subtype; uc_4addr_packet->reserved = 0; ret = true; out: batadv_hardif_put(primary_if); return ret; } /** * batadv_send_skb_unicast() - encapsulate and send an skb via unicast * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast * 4addr packets) * @orig_node: the originator to send the packet to * @vid: the vid to be used to search the translation table * * Wrap the given skb into a batman-adv unicast or unicast-4addr header * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied * as packet_type. Then send this frame to the given orig_node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, int packet_subtype, struct batadv_orig_node *orig_node, unsigned short vid) { struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr; int ret = NET_XMIT_DROP; if (!orig_node) goto out; switch (packet_type) { case BATADV_UNICAST: if (!batadv_send_skb_prepare_unicast(skb, orig_node)) goto out; break; case BATADV_UNICAST_4ADDR: if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node, packet_subtype)) goto out; break; default: /* this function supports UNICAST and UNICAST_4ADDR only. It * should never be invoked with any other packet type */ goto out; } /* skb->data might have been reallocated by * batadv_send_skb_prepare_unicast{,_4addr}() */ ethhdr = eth_hdr(skb); unicast_packet = (struct batadv_unicast_packet *)skb->data; /* inform the destination node that we are still missing a correct route * for this client. The destination will receive this packet and will * try to reroute it because the ttvn contained in the header is less * than the current one */ if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) unicast_packet->ttvn = unicast_packet->ttvn - 1; ret = batadv_send_skb_to_orig(skb, orig_node, NULL); /* skb was consumed */ skb = NULL; out: kfree_skb(skb); return ret; } /** * batadv_send_skb_via_tt_generic() - send an skb via TT lookup * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast * 4addr packets) * @dst_hint: can be used to override the destination contained in the skb * @vid: the vid to be used to search the translation table * * Look up the recipient node for the destination address in the ethernet * header via the translation table. Wrap the given skb into a batman-adv * unicast or unicast-4addr header depending on whether BATADV_UNICAST or * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame * to the according destination node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, int packet_subtype, u8 *dst_hint, unsigned short vid) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; u8 *src, *dst; int ret; src = ethhdr->h_source; dst = ethhdr->h_dest; /* if we got an hint! let's send the packet to this client (if any) */ if (dst_hint) { src = NULL; dst = dst_hint; } orig_node = batadv_transtable_search(bat_priv, src, dst, vid); ret = batadv_send_skb_unicast(bat_priv, skb, packet_type, packet_subtype, orig_node, vid); batadv_orig_node_put(orig_node); return ret; } /** * batadv_send_skb_via_gw() - send an skb via gateway lookup * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @vid: the vid to be used to search the translation table * * Look up the currently selected gateway. Wrap the given skb into a batman-adv * unicast header and send this frame to this gateway node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret; orig_node = batadv_gw_get_selected_orig(bat_priv); ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, BATADV_P_DATA, orig_node, vid); batadv_orig_node_put(orig_node); return ret; } /** * batadv_forw_packet_free() - free a forwarding packet * @forw_packet: The packet to free * @dropped: whether the packet is freed because is dropped * * This frees a forwarding packet and releases any resources it might * have claimed. */ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, bool dropped) { if (dropped) kfree_skb(forw_packet->skb); else consume_skb(forw_packet->skb); batadv_hardif_put(forw_packet->if_incoming); batadv_hardif_put(forw_packet->if_outgoing); if (forw_packet->queue_left) atomic_inc(forw_packet->queue_left); kfree(forw_packet); } /** * batadv_forw_packet_alloc() - allocate a forwarding packet * @if_incoming: The (optional) if_incoming to be grabbed * @if_outgoing: The (optional) if_outgoing to be grabbed * @queue_left: The (optional) queue counter to decrease * @bat_priv: The bat_priv for the mesh of this forw_packet * @skb: The raw packet this forwarding packet shall contain * * Allocates a forwarding packet and tries to get a reference to the * (optional) if_incoming, if_outgoing and queue_left. If queue_left * is NULL then bat_priv is optional, too. * * Return: An allocated forwarding packet on success, NULL otherwise. */ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, struct batadv_priv *bat_priv, struct sk_buff *skb) { struct batadv_forw_packet *forw_packet; const char *qname; if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) { qname = "unknown"; if (queue_left == &bat_priv->bcast_queue_left) qname = "bcast"; if (queue_left == &bat_priv->batman_queue_left) qname = "batman"; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s queue is full\n", qname); return NULL; } forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); if (!forw_packet) goto err; if (if_incoming) kref_get(&if_incoming->refcount); if (if_outgoing) kref_get(&if_outgoing->refcount); INIT_HLIST_NODE(&forw_packet->list); INIT_HLIST_NODE(&forw_packet->cleanup_list); forw_packet->skb = skb; forw_packet->queue_left = queue_left; forw_packet->if_incoming = if_incoming; forw_packet->if_outgoing = if_outgoing; forw_packet->num_packets = 0; return forw_packet; err: if (queue_left) atomic_inc(queue_left); return NULL; } /** * batadv_forw_packet_was_stolen() - check whether someone stole this packet * @forw_packet: the forwarding packet to check * * This function checks whether the given forwarding packet was claimed by * someone else for free(). * * Return: True if someone stole it, false otherwise. */ static bool batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet) { return !hlist_unhashed(&forw_packet->cleanup_list); } /** * batadv_forw_packet_steal() - claim a forw_packet for free() * @forw_packet: the forwarding packet to steal * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock) * * This function tries to steal a specific forw_packet from global * visibility for the purpose of getting it for free(). That means * the caller is *not* allowed to requeue it afterwards. * * Return: True if stealing was successful. False if someone else stole it * before us. */ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet, spinlock_t *lock) { /* did purging routine steal it earlier? */ spin_lock_bh(lock); if (batadv_forw_packet_was_stolen(forw_packet)) { spin_unlock_bh(lock); return false; } hlist_del_init(&forw_packet->list); /* Just to spot misuse of this function */ hlist_add_fake(&forw_packet->cleanup_list); spin_unlock_bh(lock); return true; } /** * batadv_forw_packet_list_steal() - claim a list of forward packets for free() * @forw_list: the to be stolen forward packets * @cleanup_list: a backup pointer, to be able to dispose the packet later * @hard_iface: the interface to steal forward packets from * * This function claims responsibility to free any forw_packet queued on the * given hard_iface. If hard_iface is NULL forwarding packets on all hard * interfaces will be claimed. * * The packets are being moved from the forw_list to the cleanup_list. This * makes it possible for already running threads to notice the claim. */ static void batadv_forw_packet_list_steal(struct hlist_head *forw_list, struct hlist_head *cleanup_list, const struct batadv_hard_iface *hard_iface) { struct batadv_forw_packet *forw_packet; struct hlist_node *safe_tmp_node; hlist_for_each_entry_safe(forw_packet, safe_tmp_node, forw_list, list) { /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if (hard_iface && forw_packet->if_incoming != hard_iface && forw_packet->if_outgoing != hard_iface) continue; hlist_del(&forw_packet->list); hlist_add_head(&forw_packet->cleanup_list, cleanup_list); } } /** * batadv_forw_packet_list_free() - free a list of forward packets * @head: a list of to be freed forw_packets * * This function cancels the scheduling of any packet in the provided list, * waits for any possibly running packet forwarding thread to finish and * finally, safely frees this forward packet. * * This function might sleep. */ static void batadv_forw_packet_list_free(struct hlist_head *head) { struct batadv_forw_packet *forw_packet; struct hlist_node *safe_tmp_node; hlist_for_each_entry_safe(forw_packet, safe_tmp_node, head, cleanup_list) { cancel_delayed_work_sync(&forw_packet->delayed_work); hlist_del(&forw_packet->cleanup_list); batadv_forw_packet_free(forw_packet, true); } } /** * batadv_forw_packet_queue() - try to queue a forwarding packet * @forw_packet: the forwarding packet to queue * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock) * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list) * @send_time: timestamp (jiffies) when the packet is to be sent * * This function tries to (re)queue a forwarding packet. Requeuing * is prevented if the according interface is shutting down * (e.g. if batadv_forw_packet_list_steal() was called for this * packet earlier). * * Calling batadv_forw_packet_queue() after a call to * batadv_forw_packet_steal() is forbidden! * * Caller needs to ensure that forw_packet->delayed_work was initialized. */ static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet, spinlock_t *lock, struct hlist_head *head, unsigned long send_time) { spin_lock_bh(lock); /* did purging routine steal it from us? */ if (batadv_forw_packet_was_stolen(forw_packet)) { /* If you got it for free() without trouble, then * don't get back into the queue after stealing... */ WARN_ONCE(hlist_fake(&forw_packet->cleanup_list), "Requeuing after batadv_forw_packet_steal() not allowed!\n"); spin_unlock_bh(lock); return; } hlist_del_init(&forw_packet->list); hlist_add_head(&forw_packet->list, head); queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work, send_time - jiffies); spin_unlock_bh(lock); } /** * batadv_forw_packet_bcast_queue() - try to queue a broadcast packet * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * * This function tries to (re)queue a broadcast packet. * * Caller needs to ensure that forw_packet->delayed_work was initialized. */ static void batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bcast_list_lock, &bat_priv->forw_bcast_list, send_time); } /** * batadv_forw_packet_ogmv1_queue() - try to queue an OGMv1 packet * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * * This function tries to (re)queue an OGMv1 packet. * * Caller needs to ensure that forw_packet->delayed_work was initialized. */ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bat_list_lock, &bat_priv->forw_bat_list, send_time); } /** * batadv_forw_bcast_packet_to_list() - queue broadcast packet for transmissions * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * @if_in: the interface where the packet was received on * @if_out: the outgoing interface to queue on * * Adds a broadcast packet to the queue and sets up timers. Broadcast packets * are sent multiple times to increase probability for being received. * * This call clones the given skb, hence the caller needs to take into * account that the data segment of the original skb might not be * modifiable anymore. * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet, struct batadv_hard_iface *if_in, struct batadv_hard_iface *if_out) { struct batadv_forw_packet *forw_packet; unsigned long send_time = jiffies; struct sk_buff *newskb; newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) goto err; forw_packet = batadv_forw_packet_alloc(if_in, if_out, &bat_priv->bcast_queue_left, bat_priv, newskb); if (!forw_packet) goto err_packet_free; forw_packet->own = own_packet; INIT_DELAYED_WORK(&forw_packet->delayed_work, batadv_send_outstanding_bcast_packet); send_time += delay ? delay : msecs_to_jiffies(5); batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time); return NETDEV_TX_OK; err_packet_free: kfree_skb(newskb); err: return NETDEV_TX_BUSY; } /** * batadv_forw_bcast_packet_if() - forward and queue a broadcast packet * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * @if_in: the interface where the packet was received on * @if_out: the outgoing interface to forward to * * Transmits a broadcast packet on the specified interface either immediately * or if a delay is given after that. Furthermore, queues additional * retransmissions if this interface is a wireless one. * * This call clones the given skb, hence the caller needs to take into * account that the data segment of the original skb might not be * modifiable anymore. * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet, struct batadv_hard_iface *if_in, struct batadv_hard_iface *if_out) { unsigned int num_bcasts = if_out->num_bcasts; struct sk_buff *newskb; int ret = NETDEV_TX_OK; if (!delay) { newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) return NETDEV_TX_BUSY; batadv_send_broadcast_skb(newskb, if_out); num_bcasts--; } /* delayed broadcast or rebroadcasts? */ if (num_bcasts >= 1) { BATADV_SKB_CB(skb)->num_bcasts = num_bcasts; ret = batadv_forw_bcast_packet_to_list(bat_priv, skb, delay, own_packet, if_in, if_out); } return ret; } /** * batadv_send_no_broadcast() - check whether (re)broadcast is necessary * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to check * @own_packet: true if it is a self-generated broadcast packet * @if_out: the outgoing interface checked and considered for (re)broadcast * * Return: False if a packet needs to be (re)broadcasted on the given interface, * true otherwise. */ static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv, struct sk_buff *skb, bool own_packet, struct batadv_hard_iface *if_out) { struct batadv_hardif_neigh_node *neigh_node = NULL; struct batadv_bcast_packet *bcast_packet; u8 *orig_neigh; u8 *neigh_addr; char *type; int ret; if (!own_packet) { neigh_addr = eth_hdr(skb)->h_source; neigh_node = batadv_hardif_neigh_get(if_out, neigh_addr); } bcast_packet = (struct batadv_bcast_packet *)skb->data; orig_neigh = neigh_node ? neigh_node->orig : NULL; ret = batadv_hardif_no_broadcast(if_out, bcast_packet->orig, orig_neigh); batadv_hardif_neigh_put(neigh_node); /* ok, may broadcast */ if (!ret) return false; /* no broadcast */ switch (ret) { case BATADV_HARDIF_BCAST_NORECIPIENT: type = "no neighbor"; break; case BATADV_HARDIF_BCAST_DUPFWD: type = "single neighbor is source"; break; case BATADV_HARDIF_BCAST_DUPORIG: type = "single neighbor is originator"; break; default: type = "unknown"; } batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n", bcast_packet->orig, if_out->net_dev->name, type); return true; } /** * __batadv_forw_bcast_packet() - forward and queue a broadcast packet * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * * Transmits a broadcast packet either immediately or if a delay is given * after that. Furthermore, queues additional retransmissions on wireless * interfaces. * * This call clones the given skb, hence the caller needs to take into * account that the data segment of the given skb might not be * modifiable anymore. * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet) { struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if; int ret = NETDEV_TX_OK; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) return NETDEV_TX_BUSY; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) continue; if (batadv_send_no_broadcast(bat_priv, skb, own_packet, hard_iface)) { batadv_hardif_put(hard_iface); continue; } ret = batadv_forw_bcast_packet_if(bat_priv, skb, delay, own_packet, primary_if, hard_iface); batadv_hardif_put(hard_iface); if (ret == NETDEV_TX_BUSY) break; } rcu_read_unlock(); batadv_hardif_put(primary_if); return ret; } /** * batadv_forw_bcast_packet() - forward and queue a broadcast packet * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * * Transmits a broadcast packet either immediately or if a delay is given * after that. Furthermore, queues additional retransmissions on wireless * interfaces. * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ int batadv_forw_bcast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet) { return __batadv_forw_bcast_packet(bat_priv, skb, delay, own_packet); } /** * batadv_send_bcast_packet() - send and queue a broadcast packet * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * * Transmits a broadcast packet either immediately or if a delay is given * after that. Furthermore, queues additional retransmissions on wireless * interfaces. * * Consumes the provided skb. */ void batadv_send_bcast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet) { __batadv_forw_bcast_packet(bat_priv, skb, delay, own_packet); consume_skb(skb); } /** * batadv_forw_packet_bcasts_left() - check if a retransmission is necessary * @forw_packet: the forwarding packet to check * * Checks whether a given packet has any (re)transmissions left on the provided * interface. * * hard_iface may be NULL: In that case the number of transmissions this skb had * so far is compared with the maximum amount of retransmissions independent of * any interface instead. * * Return: True if (re)transmissions are left, false otherwise. */ static bool batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet) { return BATADV_SKB_CB(forw_packet->skb)->num_bcasts; } /** * batadv_forw_packet_bcasts_dec() - decrement retransmission counter of a * packet * @forw_packet: the packet to decrease the counter for */ static void batadv_forw_packet_bcasts_dec(struct batadv_forw_packet *forw_packet) { BATADV_SKB_CB(forw_packet->skb)->num_bcasts--; } /** * batadv_forw_packet_is_rebroadcast() - check packet for previous transmissions * @forw_packet: the packet to check * * Return: True if this packet was transmitted before, false otherwise. */ bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet) { unsigned char num_bcasts = BATADV_SKB_CB(forw_packet->skb)->num_bcasts; return num_bcasts != forw_packet->if_outgoing->num_bcasts; } /** * batadv_send_outstanding_bcast_packet() - transmit a queued broadcast packet * @work: work queue item * * Transmits a queued broadcast packet and if necessary reschedules it. */ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { unsigned long send_time = jiffies + msecs_to_jiffies(5); struct batadv_forw_packet *forw_packet; struct delayed_work *delayed_work; struct batadv_priv *bat_priv; struct sk_buff *skb1; bool dropped = false; delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { dropped = true; goto out; } if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) { dropped = true; goto out; } /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (!skb1) goto out; batadv_send_broadcast_skb(skb1, forw_packet->if_outgoing); batadv_forw_packet_bcasts_dec(forw_packet); if (batadv_forw_packet_bcasts_left(forw_packet)) { batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time); return; } out: /* do we get something for free()? */ if (batadv_forw_packet_steal(forw_packet, &bat_priv->forw_bcast_list_lock)) batadv_forw_packet_free(forw_packet, dropped); } /** * batadv_purge_outstanding_packets() - stop/purge scheduled bcast/OGMv1 packets * @bat_priv: the bat priv with all the soft interface information * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on * * This method cancels and purges any broadcast and OGMv1 packet on the given * hard_iface. If hard_iface is NULL, broadcast and OGMv1 packets on all hard * interfaces will be canceled and purged. * * This function might sleep. */ void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface) { struct hlist_head head = HLIST_HEAD_INIT; if (hard_iface) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s(): %s\n", __func__, hard_iface->net_dev->name); else batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s()\n", __func__); /* claim bcast list for free() */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); batadv_forw_packet_list_steal(&bat_priv->forw_bcast_list, &head, hard_iface); spin_unlock_bh(&bat_priv->forw_bcast_list_lock); /* claim batman packet list for free() */ spin_lock_bh(&bat_priv->forw_bat_list_lock); batadv_forw_packet_list_steal(&bat_priv->forw_bat_list, &head, hard_iface); spin_unlock_bh(&bat_priv->forw_bat_list_lock); /* then cancel or wait for packet workers to finish and free */ batadv_forw_packet_list_free(&head); } |
5 2 3 3 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | /* SPDX-License-Identifier: GPL-2.0-only */ /* L2TP internal definitions. * * Copyright (c) 2008,2009 Katalix Systems Ltd */ #include <linux/refcount.h> #ifndef _L2TP_CORE_H_ #define _L2TP_CORE_H_ #include <net/dst.h> #include <net/sock.h> #ifdef CONFIG_XFRM #include <net/xfrm.h> #endif /* Random numbers used for internal consistency checks of tunnel and session structures */ #define L2TP_TUNNEL_MAGIC 0x42114DDA #define L2TP_SESSION_MAGIC 0x0C04EB7D /* Per tunnel session hash table size */ #define L2TP_HASH_BITS 4 #define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS) /* System-wide session hash table size */ #define L2TP_HASH_BITS_2 8 #define L2TP_HASH_SIZE_2 BIT(L2TP_HASH_BITS_2) struct sk_buff; struct l2tp_stats { atomic_long_t tx_packets; atomic_long_t tx_bytes; atomic_long_t tx_errors; atomic_long_t rx_packets; atomic_long_t rx_bytes; atomic_long_t rx_seq_discards; atomic_long_t rx_oos_packets; atomic_long_t rx_errors; atomic_long_t rx_cookie_discards; atomic_long_t rx_invalid; }; struct l2tp_tunnel; /* L2TP session configuration */ struct l2tp_session_cfg { enum l2tp_pwtype pw_type; unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */ unsigned int send_seq:1; /* send packets with sequence numbers? */ unsigned int lns_mode:1; /* behave as LNS? * LAC enables sequence numbers under LNS control. */ u16 l2specific_type; /* Layer 2 specific type */ u8 cookie[8]; /* optional cookie */ int cookie_len; /* 0, 4 or 8 bytes */ u8 peer_cookie[8]; /* peer's cookie */ int peer_cookie_len; /* 0, 4 or 8 bytes */ int reorder_timeout; /* configured reorder timeout (in jiffies) */ char *ifname; }; /* Represents a session (pseudowire) instance. * Tracks runtime state including cookies, dataplane packet sequencing, and IO statistics. * Is linked into a per-tunnel session hashlist; and in the case of an L2TPv3 session into * an additional per-net ("global") hashlist. */ #define L2TP_SESSION_NAME_MAX 32 struct l2tp_session { int magic; /* should be L2TP_SESSION_MAGIC */ long dead; struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */ u32 session_id; u32 peer_session_id; u8 cookie[8]; int cookie_len; u8 peer_cookie[8]; int peer_cookie_len; u16 l2specific_type; u16 hdr_len; u32 nr; /* session NR state (receive) */ u32 ns; /* session NR state (send) */ struct sk_buff_head reorder_q; /* receive reorder queue */ u32 nr_max; /* max NR. Depends on tunnel */ u32 nr_window_size; /* NR window size */ u32 nr_oos; /* NR of last OOS packet */ int nr_oos_count; /* for OOS recovery */ int nr_oos_count_max; struct hlist_node hlist; /* hash list node */ refcount_t ref_count; char name[L2TP_SESSION_NAME_MAX]; /* for logging */ char ifname[IFNAMSIZ]; unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */ unsigned int send_seq:1; /* send packets with sequence numbers? */ unsigned int lns_mode:1; /* behave as LNS? * LAC enables sequence numbers under LNS control. */ int reorder_timeout; /* configured reorder timeout (in jiffies) */ int reorder_skip; /* set if skip to next nr */ enum l2tp_pwtype pwtype; struct l2tp_stats stats; struct hlist_node global_hlist; /* global hash list node */ /* Session receive handler for data packets. * Each pseudowire implementation should implement this callback in order to * handle incoming packets. Packets are passed to the pseudowire handler after * reordering, if data sequence numbers are enabled for the session. */ void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); /* Session close handler. * Each pseudowire implementation may implement this callback in order to carry * out pseudowire-specific shutdown actions. * The callback is called by core after unhashing the session and purging its * reorder queue. */ void (*session_close)(struct l2tp_session *session); /* Session show handler. * Pseudowire-specific implementation of debugfs session rendering. * The callback is called by l2tp_debugfs.c after rendering core session * information. */ void (*show)(struct seq_file *m, void *priv); u8 priv[]; /* private data */ }; /* L2TP tunnel configuration */ struct l2tp_tunnel_cfg { enum l2tp_encap_type encap; /* Used only for kernel-created sockets */ struct in_addr local_ip; struct in_addr peer_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr *local_ip6; struct in6_addr *peer_ip6; #endif u16 local_udp_port; u16 peer_udp_port; unsigned int use_udp_checksums:1, udp6_zero_tx_checksums:1, udp6_zero_rx_checksums:1; }; /* Represents a tunnel instance. * Tracks runtime state including IO statistics. * Holds the tunnel socket (either passed from userspace or directly created by the kernel). * Maintains a hashlist of sessions belonging to the tunnel instance. * Is linked into a per-net list of tunnels. */ #define L2TP_TUNNEL_NAME_MAX 20 struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ unsigned long dead; struct rcu_head rcu; spinlock_t hlist_lock; /* write-protection for session_hlist */ bool acpt_newsess; /* indicates whether this tunnel accepts * new sessions. Protected by hlist_lock. */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; /* hashed list of sessions, hashed by id */ u32 tunnel_id; u32 peer_tunnel_id; int version; /* 2=>L2TPv2, 3=>L2TPv3 */ char name[L2TP_TUNNEL_NAME_MAX]; /* for logging */ enum l2tp_encap_type encap; struct l2tp_stats stats; struct list_head list; /* list node on per-namespace list of tunnels */ struct net *l2tp_net; /* the net we belong to */ refcount_t ref_count; void (*old_sk_destruct)(struct sock *sk); struct sock *sock; /* parent socket */ int fd; /* parent fd, if tunnel socket was created * by userspace */ struct work_struct del_work; }; /* Pseudowire ops callbacks for use with the l2tp genetlink interface */ struct l2tp_nl_cmd_ops { /* The pseudowire session create callback is responsible for creating a session * instance for a specific pseudowire type. * It must call l2tp_session_create and l2tp_session_register to register the * session instance, as well as carry out any pseudowire-specific initialisation. * It must return >= 0 on success, or an appropriate negative errno value on failure. */ int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); /* The pseudowire session delete callback is responsible for initiating the deletion * of a session instance. * It must call l2tp_session_delete, as well as carry out any pseudowire-specific * teardown actions. */ void (*session_delete)(struct l2tp_session *session); }; static inline void *l2tp_session_priv(struct l2tp_session *session) { return &session->priv[0]; } /* Tunnel and session refcounts */ void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel); void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel); void l2tp_session_inc_refcount(struct l2tp_session *session); void l2tp_session_dec_refcount(struct l2tp_session *session); /* Tunnel and session lookup. * These functions take a reference on the instances they return, so * the caller must ensure that the reference is dropped appropriately. */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth); struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, u32 session_id); struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); /* Tunnel and session lifetime management. * Creation of a new instance is a two-step process: create, then register. * Destruction is triggered using the *_delete functions, and completes asynchronously. */ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg); void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel); void l2tp_session_delete(struct l2tp_session *session); /* Receive path helpers. If data sequencing is enabled for the session these * functions handle queuing and reordering prior to passing packets to the * pseudowire code to be passed to userspace. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); /* Transmit path helpers for sending packets over the tunnel socket. */ void l2tp_session_set_header_len(struct l2tp_session *session, int version); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb); /* Pseudowire management. * Pseudowires should register with l2tp core on module init, and unregister * on module exit. */ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); /* IOCTL helper for IP encap modules. */ int l2tp_ioctl(struct sock *sk, int cmd, int *karg); /* Extract the tunnel structure from a socket's sk_user_data pointer, * validating the tunnel magic feather. */ struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk); static inline int l2tp_get_l2specific_len(struct l2tp_session *session) { switch (session->l2specific_type) { case L2TP_L2SPECTYPE_DEFAULT: return 4; case L2TP_L2SPECTYPE_NONE: default: return 0; } } static inline u32 l2tp_tunnel_dst_mtu(const struct l2tp_tunnel *tunnel) { struct dst_entry *dst; u32 mtu; dst = sk_dst_get(tunnel->sock); if (!dst) return 0; mtu = dst_mtu(dst); dst_release(dst); return mtu; } #ifdef CONFIG_XFRM static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) { struct sock *sk = tunnel->sock; return sk && (rcu_access_pointer(sk->sk_policy[0]) || rcu_access_pointer(sk->sk_policy[1])); } #else static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) { return false; } #endif static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, unsigned char **ptr, unsigned char **optr) { int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); if (opt_len > 0) { int off = *ptr - *optr; if (!pskb_may_pull(skb, off + opt_len)) return -1; if (skb->data != *optr) { *optr = skb->data; *ptr = skb->data + off; } } return 0; } #define MODULE_ALIAS_L2TP_PWTYPE(type) \ MODULE_ALIAS("net-l2tp-type-" __stringify(type)) #endif /* _L2TP_CORE_H_ */ |
2391 85 1157 2309 2306 2310 2307 102 100 1 1605 28 1602 6 1620 24 1602 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 | // SPDX-License-Identifier: GPL-2.0 /* sysfs entries for device PM */ #include <linux/device.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/export.h> #include <linux/pm_qos.h> #include <linux/pm_runtime.h> #include <linux/pm_wakeup.h> #include <linux/atomic.h> #include <linux/jiffies.h> #include "power.h" /* * control - Report/change current runtime PM setting of the device * * Runtime power management of a device can be blocked with the help of * this attribute. All devices have one of the following two values for * the power/control file: * * + "auto\n" to allow the device to be power managed at run time; * + "on\n" to prevent the device from being power managed at run time; * * The default for all devices is "auto", which means that devices may be * subject to automatic power management, depending on their drivers. * Changing this attribute to "on" prevents the driver from power managing * the device at run time. Doing that while the device is suspended causes * it to be woken up. * * wakeup - Report/change current wakeup option for device * * Some devices support "wakeup" events, which are hardware signals * used to activate devices from suspended or low power states. Such * devices have one of three values for the sysfs power/wakeup file: * * + "enabled\n" to issue the events; * + "disabled\n" not to do so; or * + "\n" for temporary or permanent inability to issue wakeup. * * (For example, unconfigured USB devices can't issue wakeups.) * * Familiar examples of devices that can issue wakeup events include * keyboards and mice (both PS2 and USB styles), power buttons, modems, * "Wake-On-LAN" Ethernet links, GPIO lines, and more. Some events * will wake the entire system from a suspend state; others may just * wake up the device (if the system as a whole is already active). * Some wakeup events use normal IRQ lines; other use special out * of band signaling. * * It is the responsibility of device drivers to enable (or disable) * wakeup signaling as part of changing device power states, respecting * the policy choices provided through the driver model. * * Devices may not be able to generate wakeup events from all power * states. Also, the events may be ignored in some configurations; * for example, they might need help from other devices that aren't * active, or which may have wakeup disabled. Some drivers rely on * wakeup events internally (unless they are disabled), keeping * their hardware in low power modes whenever they're unused. This * saves runtime power, without requiring system-wide sleep states. * * async - Report/change current async suspend setting for the device * * Asynchronous suspend and resume of the device during system-wide power * state transitions can be enabled by writing "enabled" to this file. * Analogously, if "disabled" is written to this file, the device will be * suspended and resumed synchronously. * * All devices have one of the following two values for power/async: * * + "enabled\n" to permit the asynchronous suspend/resume of the device; * + "disabled\n" to forbid it; * * NOTE: It generally is unsafe to permit the asynchronous suspend/resume * of a device unless it is certain that all of the PM dependencies of the * device are known to the PM core. However, for some devices this * attribute is set to "enabled" by bus type code or device drivers and in * that cases it should be safe to leave the default value. * * autosuspend_delay_ms - Report/change a device's autosuspend_delay value * * Some drivers don't want to carry out a runtime suspend as soon as a * device becomes idle; they want it always to remain idle for some period * of time before suspending it. This period is the autosuspend_delay * value (expressed in milliseconds) and it can be controlled by the user. * If the value is negative then the device will never be runtime * suspended. * * NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay * value are used only if the driver calls pm_runtime_use_autosuspend(). * * wakeup_count - Report the number of wakeup events related to the device */ const char power_group_name[] = "power"; EXPORT_SYMBOL_GPL(power_group_name); static const char ctrl_auto[] = "auto"; static const char ctrl_on[] = "on"; static ssize_t control_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", dev->power.runtime_auto ? ctrl_auto : ctrl_on); } static ssize_t control_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) { device_lock(dev); if (sysfs_streq(buf, ctrl_auto)) pm_runtime_allow(dev); else if (sysfs_streq(buf, ctrl_on)) pm_runtime_forbid(dev); else n = -EINVAL; device_unlock(dev); return n; } static DEVICE_ATTR_RW(control); static ssize_t runtime_active_time_show(struct device *dev, struct device_attribute *attr, char *buf) { u64 tmp = pm_runtime_active_time(dev); do_div(tmp, NSEC_PER_MSEC); return sysfs_emit(buf, "%llu\n", tmp); } static DEVICE_ATTR_RO(runtime_active_time); static ssize_t runtime_suspended_time_show(struct device *dev, struct device_attribute *attr, char *buf) { u64 tmp = pm_runtime_suspended_time(dev); do_div(tmp, NSEC_PER_MSEC); return sysfs_emit(buf, "%llu\n", tmp); } static DEVICE_ATTR_RO(runtime_suspended_time); static ssize_t runtime_status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; if (dev->power.runtime_error) { output = "error"; } else if (dev->power.disable_depth) { output = "unsupported"; } else { switch (dev->power.runtime_status) { case RPM_SUSPENDED: output = "suspended"; break; case RPM_SUSPENDING: output = "suspending"; break; case RPM_RESUMING: output = "resuming"; break; case RPM_ACTIVE: output = "active"; break; default: return -EIO; } } return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(runtime_status); static ssize_t autosuspend_delay_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { if (!dev->power.use_autosuspend) return -EIO; return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay); } static ssize_t autosuspend_delay_ms_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { long delay; if (!dev->power.use_autosuspend) return -EIO; if (kstrtol(buf, 10, &delay) != 0 || delay != (int) delay) return -EINVAL; device_lock(dev); pm_runtime_set_autosuspend_delay(dev, delay); device_unlock(dev); return n; } static DEVICE_ATTR_RW(autosuspend_delay_ms); static ssize_t pm_qos_resume_latency_us_show(struct device *dev, struct device_attribute *attr, char *buf) { s32 value = dev_pm_qos_requested_resume_latency(dev); if (value == 0) return sysfs_emit(buf, "n/a\n"); if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) value = 0; return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_resume_latency_us_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { s32 value; int ret; if (!kstrtos32(buf, 0, &value)) { /* * Prevent users from writing negative or "no constraint" values * directly. */ if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) return -EINVAL; if (value == 0) value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } else if (sysfs_streq(buf, "n/a")) { value = 0; } else { return -EINVAL; } ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req, value); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_resume_latency_us); static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev, struct device_attribute *attr, char *buf) { s32 value = dev_pm_qos_get_user_latency_tolerance(dev); if (value < 0) return sysfs_emit(buf, "%s\n", "auto"); if (value == PM_QOS_LATENCY_ANY) return sysfs_emit(buf, "%s\n", "any"); return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { s32 value; int ret; if (kstrtos32(buf, 0, &value) == 0) { /* Users can't write negative values directly */ if (value < 0) return -EINVAL; } else { if (sysfs_streq(buf, "auto")) value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; else if (sysfs_streq(buf, "any")) value = PM_QOS_LATENCY_ANY; else return -EINVAL; } ret = dev_pm_qos_update_user_latency_tolerance(dev, value); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_latency_tolerance_us); static ssize_t pm_qos_no_power_off_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) & PM_QOS_FLAG_NO_POWER_OFF)); } static ssize_t pm_qos_no_power_off_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { int ret; if (kstrtoint(buf, 0, &ret)) return -EINVAL; if (ret != 0 && ret != 1) return -EINVAL; ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_no_power_off); #ifdef CONFIG_PM_SLEEP static const char _enabled[] = "enabled"; static const char _disabled[] = "disabled"; static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", device_can_wakeup(dev) ? (device_may_wakeup(dev) ? _enabled : _disabled) : ""); } static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { if (!device_can_wakeup(dev)) return -EINVAL; if (sysfs_streq(buf, _enabled)) device_set_wakeup_enable(dev, 1); else if (sysfs_streq(buf, _disabled)) device_set_wakeup_enable(dev, 0); else return -EINVAL; return n; } static DEVICE_ATTR_RW(wakeup); static ssize_t wakeup_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_count); static ssize_t wakeup_active_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->active_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_active_count); static ssize_t wakeup_abort_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_abort_count); static ssize_t wakeup_expire_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->expire_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_expire_count); static ssize_t wakeup_active_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned int active; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { active = dev->power.wakeup->active; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%u\n", active); } static DEVICE_ATTR_RO(wakeup_active); static ssize_t wakeup_total_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->total_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_total_time_ms); static ssize_t wakeup_max_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->max_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_max_time_ms); static ssize_t wakeup_last_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->last_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { if (dev->power.wakeup && dev->power.wakeup->dev) return device_change_owner(dev->power.wakeup->dev, kuid, kgid); return 0; } static DEVICE_ATTR_RO(wakeup_last_time_ms); #ifdef CONFIG_PM_AUTOSLEEP static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms); #endif /* CONFIG_PM_AUTOSLEEP */ #else /* CONFIG_PM_SLEEP */ static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { return 0; } #endif #ifdef CONFIG_PM_ADVANCED_DEBUG static ssize_t runtime_usage_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", atomic_read(&dev->power.usage_count)); } static DEVICE_ATTR_RO(runtime_usage); static ssize_t runtime_active_kids_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", dev->power.ignore_children ? 0 : atomic_read(&dev->power.child_count)); } static DEVICE_ATTR_RO(runtime_active_kids); static ssize_t runtime_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; if (dev->power.disable_depth && !dev->power.runtime_auto) output = "disabled & forbidden"; else if (dev->power.disable_depth) output = "disabled"; else if (!dev->power.runtime_auto) output = "forbidden"; else output = "enabled"; return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(runtime_enabled); #ifdef CONFIG_PM_SLEEP static ssize_t async_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", device_async_suspend_enabled(dev) ? _enabled : _disabled); } static ssize_t async_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { if (sysfs_streq(buf, _enabled)) device_enable_async_suspend(dev); else if (sysfs_streq(buf, _disabled)) device_disable_async_suspend(dev); else return -EINVAL; return n; } static DEVICE_ATTR_RW(async); #endif /* CONFIG_PM_SLEEP */ #endif /* CONFIG_PM_ADVANCED_DEBUG */ static struct attribute *power_attrs[] = { #ifdef CONFIG_PM_ADVANCED_DEBUG #ifdef CONFIG_PM_SLEEP &dev_attr_async.attr, #endif &dev_attr_runtime_status.attr, &dev_attr_runtime_usage.attr, &dev_attr_runtime_active_kids.attr, &dev_attr_runtime_enabled.attr, #endif /* CONFIG_PM_ADVANCED_DEBUG */ NULL, }; static const struct attribute_group pm_attr_group = { .name = power_group_name, .attrs = power_attrs, }; static struct attribute *wakeup_attrs[] = { #ifdef CONFIG_PM_SLEEP &dev_attr_wakeup.attr, &dev_attr_wakeup_count.attr, &dev_attr_wakeup_active_count.attr, &dev_attr_wakeup_abort_count.attr, &dev_attr_wakeup_expire_count.attr, &dev_attr_wakeup_active.attr, &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, #ifdef CONFIG_PM_AUTOSLEEP &dev_attr_wakeup_prevent_sleep_time_ms.attr, #endif #endif NULL, }; static const struct attribute_group pm_wakeup_attr_group = { .name = power_group_name, .attrs = wakeup_attrs, }; static struct attribute *runtime_attrs[] = { #ifndef CONFIG_PM_ADVANCED_DEBUG &dev_attr_runtime_status.attr, #endif &dev_attr_control.attr, &dev_attr_runtime_suspended_time.attr, &dev_attr_runtime_active_time.attr, &dev_attr_autosuspend_delay_ms.attr, NULL, }; static const struct attribute_group pm_runtime_attr_group = { .name = power_group_name, .attrs = runtime_attrs, }; static struct attribute *pm_qos_resume_latency_attrs[] = { &dev_attr_pm_qos_resume_latency_us.attr, NULL, }; static const struct attribute_group pm_qos_resume_latency_attr_group = { .name = power_group_name, .attrs = pm_qos_resume_latency_attrs, }; static struct attribute *pm_qos_latency_tolerance_attrs[] = { &dev_attr_pm_qos_latency_tolerance_us.attr, NULL, }; static const struct attribute_group pm_qos_latency_tolerance_attr_group = { .name = power_group_name, .attrs = pm_qos_latency_tolerance_attrs, }; static struct attribute *pm_qos_flags_attrs[] = { &dev_attr_pm_qos_no_power_off.attr, NULL, }; static const struct attribute_group pm_qos_flags_attr_group = { .name = power_group_name, .attrs = pm_qos_flags_attrs, }; int dpm_sysfs_add(struct device *dev) { int rc; /* No need to create PM sysfs if explicitly disabled. */ if (device_pm_not_required(dev)) return 0; rc = sysfs_create_group(&dev->kobj, &pm_attr_group); if (rc) return rc; if (!pm_runtime_has_no_callbacks(dev)) { rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group); if (rc) goto err_out; } if (device_can_wakeup(dev)) { rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); if (rc) goto err_runtime; } if (dev->power.set_latency_tolerance) { rc = sysfs_merge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); if (rc) goto err_wakeup; } rc = pm_wakeup_source_sysfs_add(dev); if (rc) goto err_latency; return 0; err_latency: sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); err_wakeup: sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); err_runtime: sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); err_out: sysfs_remove_group(&dev->kobj, &pm_attr_group); return rc; } int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { int rc; if (device_pm_not_required(dev)) return 0; rc = sysfs_group_change_owner(&dev->kobj, &pm_attr_group, kuid, kgid); if (rc) return rc; if (!pm_runtime_has_no_callbacks(dev)) { rc = sysfs_group_change_owner( &dev->kobj, &pm_runtime_attr_group, kuid, kgid); if (rc) return rc; } if (device_can_wakeup(dev)) { rc = sysfs_group_change_owner(&dev->kobj, &pm_wakeup_attr_group, kuid, kgid); if (rc) return rc; rc = dpm_sysfs_wakeup_change_owner(dev, kuid, kgid); if (rc) return rc; } if (dev->power.set_latency_tolerance) { rc = sysfs_group_change_owner( &dev->kobj, &pm_qos_latency_tolerance_attr_group, kuid, kgid); if (rc) return rc; } return 0; } int wakeup_sysfs_add(struct device *dev) { int ret = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); if (!ret) kobject_uevent(&dev->kobj, KOBJ_CHANGE); return ret; } void wakeup_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); kobject_uevent(&dev->kobj, KOBJ_CHANGE); } int pm_qos_sysfs_add_resume_latency(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_resume_latency_attr_group); } void pm_qos_sysfs_remove_resume_latency(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_resume_latency_attr_group); } int pm_qos_sysfs_add_flags(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group); } void pm_qos_sysfs_remove_flags(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group); } int pm_qos_sysfs_add_latency_tolerance(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); } void pm_qos_sysfs_remove_latency_tolerance(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); } void rpm_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); } void dpm_sysfs_remove(struct device *dev) { if (device_pm_not_required(dev)) return; sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); dev_pm_qos_constraints_destroy(dev); rpm_sysfs_remove(dev); sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); sysfs_remove_group(&dev->kobj, &pm_attr_group); } |
1 2 2 2 1 1 2 988 3 987 2 2 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/scatterlist.h> #include <linux/mutex.h> #include <linux/timer.h> #include <linux/usb.h> #define SIMPLE_IO_TIMEOUT 10000 /* in milliseconds */ /*-------------------------------------------------------------------------*/ static int override_alt = -1; module_param_named(alt, override_alt, int, 0644); MODULE_PARM_DESC(alt, ">= 0 to override altsetting selection"); static void complicated_callback(struct urb *urb); /*-------------------------------------------------------------------------*/ /* FIXME make these public somewhere; usbdevfs.h? */ /* Parameter for usbtest driver. */ struct usbtest_param_32 { /* inputs */ __u32 test_num; /* 0..(TEST_CASES-1) */ __u32 iterations; __u32 length; __u32 vary; __u32 sglen; /* outputs */ __s32 duration_sec; __s32 duration_usec; }; /* * Compat parameter to the usbtest driver. * This supports older user space binaries compiled with 64 bit compiler. */ struct usbtest_param_64 { /* inputs */ __u32 test_num; /* 0..(TEST_CASES-1) */ __u32 iterations; __u32 length; __u32 vary; __u32 sglen; /* outputs */ __s64 duration_sec; __s64 duration_usec; }; /* IOCTL interface to the driver. */ #define USBTEST_REQUEST_32 _IOWR('U', 100, struct usbtest_param_32) /* COMPAT IOCTL interface to the driver. */ #define USBTEST_REQUEST_64 _IOWR('U', 100, struct usbtest_param_64) /*-------------------------------------------------------------------------*/ #define GENERIC /* let probe() bind using module params */ /* Some devices that can be used for testing will have "real" drivers. * Entries for those need to be enabled here by hand, after disabling * that "real" driver. */ //#define IBOT2 /* grab iBOT2 webcams */ //#define KEYSPAN_19Qi /* grab un-renumerated serial adapter */ /*-------------------------------------------------------------------------*/ struct usbtest_info { const char *name; u8 ep_in; /* bulk/intr source */ u8 ep_out; /* bulk/intr sink */ unsigned autoconf:1; unsigned ctrl_out:1; unsigned iso:1; /* try iso in/out */ unsigned intr:1; /* try interrupt in/out */ int alt; }; /* this is accessed only through usbfs ioctl calls. * one ioctl to issue a test ... one lock per device. * tests create other threads if they need them. * urbs and buffers are allocated dynamically, * and data generated deterministically. */ struct usbtest_dev { struct usb_interface *intf; struct usbtest_info *info; int in_pipe; int out_pipe; int in_iso_pipe; int out_iso_pipe; int in_int_pipe; int out_int_pipe; struct usb_endpoint_descriptor *iso_in, *iso_out; struct usb_endpoint_descriptor *int_in, *int_out; struct mutex lock; #define TBUF_SIZE 256 u8 *buf; }; static struct usb_device *testdev_to_usbdev(struct usbtest_dev *test) { return interface_to_usbdev(test->intf); } /* set up all urbs so they can be used with either bulk or interrupt */ #define INTERRUPT_RATE 1 /* msec/transfer */ #define ERROR(tdev, fmt, args...) \ dev_err(&(tdev)->intf->dev , fmt , ## args) #define WARNING(tdev, fmt, args...) \ dev_warn(&(tdev)->intf->dev , fmt , ## args) #define GUARD_BYTE 0xA5 #define MAX_SGLEN 128 /*-------------------------------------------------------------------------*/ static inline void endpoint_update(int edi, struct usb_host_endpoint **in, struct usb_host_endpoint **out, struct usb_host_endpoint *e) { if (edi) { if (!*in) *in = e; } else { if (!*out) *out = e; } } static int get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf) { int tmp; struct usb_host_interface *alt; struct usb_host_endpoint *in, *out; struct usb_host_endpoint *iso_in, *iso_out; struct usb_host_endpoint *int_in, *int_out; struct usb_device *udev; for (tmp = 0; tmp < intf->num_altsetting; tmp++) { unsigned ep; in = out = NULL; iso_in = iso_out = NULL; int_in = int_out = NULL; alt = intf->altsetting + tmp; if (override_alt >= 0 && override_alt != alt->desc.bAlternateSetting) continue; /* take the first altsetting with in-bulk + out-bulk; * ignore other endpoints and altsettings. */ for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) { struct usb_host_endpoint *e; int edi; e = alt->endpoint + ep; edi = usb_endpoint_dir_in(&e->desc); switch (usb_endpoint_type(&e->desc)) { case USB_ENDPOINT_XFER_BULK: endpoint_update(edi, &in, &out, e); continue; case USB_ENDPOINT_XFER_INT: if (dev->info->intr) endpoint_update(edi, &int_in, &int_out, e); continue; case USB_ENDPOINT_XFER_ISOC: if (dev->info->iso) endpoint_update(edi, &iso_in, &iso_out, e); fallthrough; default: continue; } } if ((in && out) || iso_in || iso_out || int_in || int_out) goto found; } return -EINVAL; found: udev = testdev_to_usbdev(dev); dev->info->alt = alt->desc.bAlternateSetting; if (alt->desc.bAlternateSetting != 0) { tmp = usb_set_interface(udev, alt->desc.bInterfaceNumber, alt->desc.bAlternateSetting); if (tmp < 0) return tmp; } if (in) dev->in_pipe = usb_rcvbulkpipe(udev, in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); if (out) dev->out_pipe = usb_sndbulkpipe(udev, out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); if (iso_in) { dev->iso_in = &iso_in->desc; dev->in_iso_pipe = usb_rcvisocpipe(udev, iso_in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } if (iso_out) { dev->iso_out = &iso_out->desc; dev->out_iso_pipe = usb_sndisocpipe(udev, iso_out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } if (int_in) { dev->int_in = &int_in->desc; dev->in_int_pipe = usb_rcvintpipe(udev, int_in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } if (int_out) { dev->int_out = &int_out->desc; dev->out_int_pipe = usb_sndintpipe(udev, int_out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } return 0; } /*-------------------------------------------------------------------------*/ /* Support for testing basic non-queued I/O streams. * * These just package urbs as requests that can be easily canceled. * Each urb's data buffer is dynamically allocated; callers can fill * them with non-zero test data (or test for it) when appropriate. */ static void simple_callback(struct urb *urb) { complete(urb->context); } static struct urb *usbtest_alloc_urb( struct usb_device *udev, int pipe, unsigned long bytes, unsigned transfer_flags, unsigned offset, u8 bInterval, usb_complete_t complete_fn) { struct urb *urb; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return urb; if (bInterval) usb_fill_int_urb(urb, udev, pipe, NULL, bytes, complete_fn, NULL, bInterval); else usb_fill_bulk_urb(urb, udev, pipe, NULL, bytes, complete_fn, NULL); urb->interval = (udev->speed == USB_SPEED_HIGH) ? (INTERRUPT_RATE << 3) : INTERRUPT_RATE; urb->transfer_flags = transfer_flags; if (usb_pipein(pipe)) urb->transfer_flags |= URB_SHORT_NOT_OK; if ((bytes + offset) == 0) return urb; if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) urb->transfer_buffer = usb_alloc_coherent(udev, bytes + offset, GFP_KERNEL, &urb->transfer_dma); else urb->transfer_buffer = kmalloc(bytes + offset, GFP_KERNEL); if (!urb->transfer_buffer) { usb_free_urb(urb); return NULL; } /* To test unaligned transfers add an offset and fill the unused memory with a guard value */ if (offset) { memset(urb->transfer_buffer, GUARD_BYTE, offset); urb->transfer_buffer += offset; if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) urb->transfer_dma += offset; } /* For inbound transfers use guard byte so that test fails if data not correctly copied */ memset(urb->transfer_buffer, usb_pipein(urb->pipe) ? GUARD_BYTE : 0, bytes); return urb; } static struct urb *simple_alloc_urb( struct usb_device *udev, int pipe, unsigned long bytes, u8 bInterval) { return usbtest_alloc_urb(udev, pipe, bytes, URB_NO_TRANSFER_DMA_MAP, 0, bInterval, simple_callback); } static struct urb *complicated_alloc_urb( struct usb_device *udev, int pipe, unsigned long bytes, u8 bInterval) { return usbtest_alloc_urb(udev, pipe, bytes, URB_NO_TRANSFER_DMA_MAP, 0, bInterval, complicated_callback); } static unsigned pattern; static unsigned mod_pattern; module_param_named(pattern, mod_pattern, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(mod_pattern, "i/o pattern (0 == zeroes)"); static unsigned get_maxpacket(struct usb_device *udev, int pipe) { struct usb_host_endpoint *ep; ep = usb_pipe_endpoint(udev, pipe); return le16_to_cpup(&ep->desc.wMaxPacketSize); } static int ss_isoc_get_packet_num(struct usb_device *udev, int pipe) { struct usb_host_endpoint *ep = usb_pipe_endpoint(udev, pipe); return USB_SS_MULT(ep->ss_ep_comp.bmAttributes) * (1 + ep->ss_ep_comp.bMaxBurst); } static void simple_fill_buf(struct urb *urb) { unsigned i; u8 *buf = urb->transfer_buffer; unsigned len = urb->transfer_buffer_length; unsigned maxpacket; switch (pattern) { default: fallthrough; case 0: memset(buf, 0, len); break; case 1: /* mod63 */ maxpacket = get_maxpacket(urb->dev, urb->pipe); for (i = 0; i < len; i++) *buf++ = (u8) ((i % maxpacket) % 63); break; } } static inline unsigned long buffer_offset(void *buf) { return (unsigned long)buf & (ARCH_KMALLOC_MINALIGN - 1); } static int check_guard_bytes(struct usbtest_dev *tdev, struct urb *urb) { u8 *buf = urb->transfer_buffer; u8 *guard = buf - buffer_offset(buf); unsigned i; for (i = 0; guard < buf; i++, guard++) { if (*guard != GUARD_BYTE) { ERROR(tdev, "guard byte[%d] %d (not %d)\n", i, *guard, GUARD_BYTE); return -EINVAL; } } return 0; } static int simple_check_buf(struct usbtest_dev *tdev, struct urb *urb) { unsigned i; u8 expected; u8 *buf = urb->transfer_buffer; unsigned len = urb->actual_length; unsigned maxpacket = get_maxpacket(urb->dev, urb->pipe); int ret = check_guard_bytes(tdev, urb); if (ret) return ret; for (i = 0; i < len; i++, buf++) { switch (pattern) { /* all-zeroes has no synchronization issues */ case 0: expected = 0; break; /* mod63 stays in sync with short-terminated transfers, * or otherwise when host and gadget agree on how large * each usb transfer request should be. resync is done * with set_interface or set_config. */ case 1: /* mod63 */ expected = (i % maxpacket) % 63; break; /* always fail unsupported patterns */ default: expected = !*buf; break; } if (*buf == expected) continue; ERROR(tdev, "buf[%d] = %d (not %d)\n", i, *buf, expected); return -EINVAL; } return 0; } static void simple_free_urb(struct urb *urb) { unsigned long offset = buffer_offset(urb->transfer_buffer); if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) usb_free_coherent( urb->dev, urb->transfer_buffer_length + offset, urb->transfer_buffer - offset, urb->transfer_dma - offset); else kfree(urb->transfer_buffer - offset); usb_free_urb(urb); } static int simple_io( struct usbtest_dev *tdev, struct urb *urb, int iterations, int vary, int expected, const char *label ) { struct usb_device *udev = urb->dev; int max = urb->transfer_buffer_length; struct completion completion; int retval = 0; unsigned long expire; urb->context = &completion; while (retval == 0 && iterations-- > 0) { init_completion(&completion); if (usb_pipeout(urb->pipe)) { simple_fill_buf(urb); urb->transfer_flags |= URB_ZERO_PACKET; } retval = usb_submit_urb(urb, GFP_KERNEL); if (retval != 0) break; expire = msecs_to_jiffies(SIMPLE_IO_TIMEOUT); if (!wait_for_completion_timeout(&completion, expire)) { usb_kill_urb(urb); retval = (urb->status == -ENOENT ? -ETIMEDOUT : urb->status); } else { retval = urb->status; } urb->dev = udev; if (retval == 0 && usb_pipein(urb->pipe)) retval = simple_check_buf(tdev, urb); if (vary) { int len = urb->transfer_buffer_length; len += vary; len %= max; if (len == 0) len = (vary < max) ? vary : max; urb->transfer_buffer_length = len; } /* FIXME if endpoint halted, clear halt (and log) */ } urb->transfer_buffer_length = max; if (expected != retval) dev_err(&udev->dev, "%s failed, iterations left %d, status %d (not %d)\n", label, iterations, retval, expected); return retval; } /*-------------------------------------------------------------------------*/ /* We use scatterlist primitives to test queued I/O. * Yes, this also tests the scatterlist primitives. */ static void free_sglist(struct scatterlist *sg, int nents) { unsigned i; if (!sg) return; for (i = 0; i < nents; i++) { if (!sg_page(&sg[i])) continue; kfree(sg_virt(&sg[i])); } kfree(sg); } static struct scatterlist * alloc_sglist(int nents, int max, int vary, struct usbtest_dev *dev, int pipe) { struct scatterlist *sg; unsigned int n_size = 0; unsigned i; unsigned size = max; unsigned maxpacket = get_maxpacket(interface_to_usbdev(dev->intf), pipe); if (max == 0) return NULL; sg = kmalloc_array(nents, sizeof(*sg), GFP_KERNEL); if (!sg) return NULL; sg_init_table(sg, nents); for (i = 0; i < nents; i++) { char *buf; unsigned j; buf = kzalloc(size, GFP_KERNEL); if (!buf) { free_sglist(sg, i); return NULL; } /* kmalloc pages are always physically contiguous! */ sg_set_buf(&sg[i], buf, size); switch (pattern) { case 0: /* already zeroed */ break; case 1: for (j = 0; j < size; j++) *buf++ = (u8) (((j + n_size) % maxpacket) % 63); n_size += size; break; } if (vary) { size += vary; size %= max; if (size == 0) size = (vary < max) ? vary : max; } } return sg; } struct sg_timeout { struct timer_list timer; struct usb_sg_request *req; }; static void sg_timeout(struct timer_list *t) { struct sg_timeout *timeout = from_timer(timeout, t, timer); usb_sg_cancel(timeout->req); } static int perform_sglist( struct usbtest_dev *tdev, unsigned iterations, int pipe, struct usb_sg_request *req, struct scatterlist *sg, int nents ) { struct usb_device *udev = testdev_to_usbdev(tdev); int retval = 0; struct sg_timeout timeout = { .req = req, }; timer_setup_on_stack(&timeout.timer, sg_timeout, 0); while (retval == 0 && iterations-- > 0) { retval = usb_sg_init(req, udev, pipe, (udev->speed == USB_SPEED_HIGH) ? (INTERRUPT_RATE << 3) : INTERRUPT_RATE, sg, nents, 0, GFP_KERNEL); if (retval) break; mod_timer(&timeout.timer, jiffies + msecs_to_jiffies(SIMPLE_IO_TIMEOUT)); usb_sg_wait(req); if (!del_timer_sync(&timeout.timer)) retval = -ETIMEDOUT; else retval = req->status; destroy_timer_on_stack(&timeout.timer); /* FIXME check resulting data pattern */ /* FIXME if endpoint halted, clear halt (and log) */ } /* FIXME for unlink or fault handling tests, don't report * failure if retval is as we expected ... */ if (retval) ERROR(tdev, "perform_sglist failed, " "iterations left %d, status %d\n", iterations, retval); return retval; } /*-------------------------------------------------------------------------*/ /* unqueued control message testing * * there's a nice set of device functional requirements in chapter 9 of the * usb 2.0 spec, which we can apply to ANY device, even ones that don't use * special test firmware. * * we know the device is configured (or suspended) by the time it's visible * through usbfs. we can't change that, so we won't test enumeration (which * worked 'well enough' to get here, this time), power management (ditto), * or remote wakeup (which needs human interaction). */ static unsigned realworld = 1; module_param(realworld, uint, 0); MODULE_PARM_DESC(realworld, "clear to demand stricter spec compliance"); static int get_altsetting(struct usbtest_dev *dev) { struct usb_interface *iface = dev->intf; struct usb_device *udev = interface_to_usbdev(iface); int retval; retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_INTERFACE, USB_DIR_IN|USB_RECIP_INTERFACE, 0, iface->altsetting[0].desc.bInterfaceNumber, dev->buf, 1, USB_CTRL_GET_TIMEOUT); switch (retval) { case 1: return dev->buf[0]; case 0: retval = -ERANGE; fallthrough; default: return retval; } } static int set_altsetting(struct usbtest_dev *dev, int alternate) { struct usb_interface *iface = dev->intf; struct usb_device *udev; if (alternate < 0 || alternate >= 256) return -EINVAL; udev = interface_to_usbdev(iface); return usb_set_interface(udev, iface->altsetting[0].desc.bInterfaceNumber, alternate); } static int is_good_config(struct usbtest_dev *tdev, int len) { struct usb_config_descriptor *config; if (len < (int)sizeof(*config)) return 0; config = (struct usb_config_descriptor *) tdev->buf; switch (config->bDescriptorType) { case USB_DT_CONFIG: case USB_DT_OTHER_SPEED_CONFIG: if (config->bLength != 9) { ERROR(tdev, "bogus config descriptor length\n"); return 0; } /* this bit 'must be 1' but often isn't */ if (!realworld && !(config->bmAttributes & 0x80)) { ERROR(tdev, "high bit of config attributes not set\n"); return 0; } if (config->bmAttributes & 0x1f) { /* reserved == 0 */ ERROR(tdev, "reserved config bits set\n"); return 0; } break; default: return 0; } if (le16_to_cpu(config->wTotalLength) == len) /* read it all */ return 1; if (le16_to_cpu(config->wTotalLength) >= TBUF_SIZE) /* max partial read */ return 1; ERROR(tdev, "bogus config descriptor read size\n"); return 0; } static int is_good_ext(struct usbtest_dev *tdev, u8 *buf) { struct usb_ext_cap_descriptor *ext; u32 attr; ext = (struct usb_ext_cap_descriptor *) buf; if (ext->bLength != USB_DT_USB_EXT_CAP_SIZE) { ERROR(tdev, "bogus usb 2.0 extension descriptor length\n"); return 0; } attr = le32_to_cpu(ext->bmAttributes); /* bits[1:15] is used and others are reserved */ if (attr & ~0xfffe) { /* reserved == 0 */ ERROR(tdev, "reserved bits set\n"); return 0; } return 1; } static int is_good_ss_cap(struct usbtest_dev *tdev, u8 *buf) { struct usb_ss_cap_descriptor *ss; ss = (struct usb_ss_cap_descriptor *) buf; if (ss->bLength != USB_DT_USB_SS_CAP_SIZE) { ERROR(tdev, "bogus superspeed device capability descriptor length\n"); return 0; } /* * only bit[1] of bmAttributes is used for LTM and others are * reserved */ if (ss->bmAttributes & ~0x02) { /* reserved == 0 */ ERROR(tdev, "reserved bits set in bmAttributes\n"); return 0; } /* bits[0:3] of wSpeedSupported is used and others are reserved */ if (le16_to_cpu(ss->wSpeedSupported) & ~0x0f) { /* reserved == 0 */ ERROR(tdev, "reserved bits set in wSpeedSupported\n"); return 0; } return 1; } static int is_good_con_id(struct usbtest_dev *tdev, u8 *buf) { struct usb_ss_container_id_descriptor *con_id; con_id = (struct usb_ss_container_id_descriptor *) buf; if (con_id->bLength != USB_DT_USB_SS_CONTN_ID_SIZE) { ERROR(tdev, "bogus container id descriptor length\n"); return 0; } if (con_id->bReserved) { /* reserved == 0 */ ERROR(tdev, "reserved bits set\n"); return 0; } return 1; } /* sanity test for standard requests working with usb_control_mesg() and some * of the utility functions which use it. * * this doesn't test how endpoint halts behave or data toggles get set, since * we won't do I/O to bulk/interrupt endpoints here (which is how to change * halt or toggle). toggle testing is impractical without support from hcds. * * this avoids failing devices linux would normally work with, by not testing * config/altsetting operations for devices that only support their defaults. * such devices rarely support those needless operations. * * NOTE that since this is a sanity test, it's not examining boundary cases * to see if usbcore, hcd, and device all behave right. such testing would * involve varied read sizes and other operation sequences. */ static int ch9_postconfig(struct usbtest_dev *dev) { struct usb_interface *iface = dev->intf; struct usb_device *udev = interface_to_usbdev(iface); int i, alt, retval; /* [9.2.3] if there's more than one altsetting, we need to be able to * set and get each one. mostly trusts the descriptors from usbcore. */ for (i = 0; i < iface->num_altsetting; i++) { /* 9.2.3 constrains the range here */ alt = iface->altsetting[i].desc.bAlternateSetting; if (alt < 0 || alt >= iface->num_altsetting) { dev_err(&iface->dev, "invalid alt [%d].bAltSetting = %d\n", i, alt); } /* [real world] get/set unimplemented if there's only one */ if (realworld && iface->num_altsetting == 1) continue; /* [9.4.10] set_interface */ retval = set_altsetting(dev, alt); if (retval) { dev_err(&iface->dev, "can't set_interface = %d, %d\n", alt, retval); return retval; } /* [9.4.4] get_interface always works */ retval = get_altsetting(dev); if (retval != alt) { dev_err(&iface->dev, "get alt should be %d, was %d\n", alt, retval); return (retval < 0) ? retval : -EDOM; } } /* [real world] get_config unimplemented if there's only one */ if (!realworld || udev->descriptor.bNumConfigurations != 1) { int expected = udev->actconfig->desc.bConfigurationValue; /* [9.4.2] get_configuration always works * ... although some cheap devices (like one TI Hub I've got) * won't return config descriptors except before set_config. */ retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_CONFIGURATION, USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, dev->buf, 1, USB_CTRL_GET_TIMEOUT); if (retval != 1 || dev->buf[0] != expected) { dev_err(&iface->dev, "get config --> %d %d (1 %d)\n", retval, dev->buf[0], expected); return (retval < 0) ? retval : -EDOM; } } /* there's always [9.4.3] a device descriptor [9.6.1] */ retval = usb_get_descriptor(udev, USB_DT_DEVICE, 0, dev->buf, sizeof(udev->descriptor)); if (retval != sizeof(udev->descriptor)) { dev_err(&iface->dev, "dev descriptor --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } /* * there's always [9.4.3] a bos device descriptor [9.6.2] in USB * 3.0 spec */ if (le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0210) { struct usb_bos_descriptor *bos = NULL; struct usb_dev_cap_header *header = NULL; unsigned total, num, length; u8 *buf; retval = usb_get_descriptor(udev, USB_DT_BOS, 0, dev->buf, sizeof(*udev->bos->desc)); if (retval != sizeof(*udev->bos->desc)) { dev_err(&iface->dev, "bos descriptor --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } bos = (struct usb_bos_descriptor *)dev->buf; total = le16_to_cpu(bos->wTotalLength); num = bos->bNumDeviceCaps; if (total > TBUF_SIZE) total = TBUF_SIZE; /* * get generic device-level capability descriptors [9.6.2] * in USB 3.0 spec */ retval = usb_get_descriptor(udev, USB_DT_BOS, 0, dev->buf, total); if (retval != total) { dev_err(&iface->dev, "bos descriptor set --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } length = sizeof(*udev->bos->desc); buf = dev->buf; for (i = 0; i < num; i++) { buf += length; if (buf + sizeof(struct usb_dev_cap_header) > dev->buf + total) break; header = (struct usb_dev_cap_header *)buf; length = header->bLength; if (header->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { dev_warn(&udev->dev, "not device capability descriptor, skip\n"); continue; } switch (header->bDevCapabilityType) { case USB_CAP_TYPE_EXT: if (buf + USB_DT_USB_EXT_CAP_SIZE > dev->buf + total || !is_good_ext(dev, buf)) { dev_err(&iface->dev, "bogus usb 2.0 extension descriptor\n"); return -EDOM; } break; case USB_SS_CAP_TYPE: if (buf + USB_DT_USB_SS_CAP_SIZE > dev->buf + total || !is_good_ss_cap(dev, buf)) { dev_err(&iface->dev, "bogus superspeed device capability descriptor\n"); return -EDOM; } break; case CONTAINER_ID_TYPE: if (buf + USB_DT_USB_SS_CONTN_ID_SIZE > dev->buf + total || !is_good_con_id(dev, buf)) { dev_err(&iface->dev, "bogus container id descriptor\n"); return -EDOM; } break; default: break; } } } /* there's always [9.4.3] at least one config descriptor [9.6.3] */ for (i = 0; i < udev->descriptor.bNumConfigurations; i++) { retval = usb_get_descriptor(udev, USB_DT_CONFIG, i, dev->buf, TBUF_SIZE); if (!is_good_config(dev, retval)) { dev_err(&iface->dev, "config [%d] descriptor --> %d\n", i, retval); return (retval < 0) ? retval : -EDOM; } /* FIXME cross-checking udev->config[i] to make sure usbcore * parsed it right (etc) would be good testing paranoia */ } /* and sometimes [9.2.6.6] speed dependent descriptors */ if (le16_to_cpu(udev->descriptor.bcdUSB) == 0x0200) { struct usb_qualifier_descriptor *d = NULL; /* device qualifier [9.6.2] */ retval = usb_get_descriptor(udev, USB_DT_DEVICE_QUALIFIER, 0, dev->buf, sizeof(struct usb_qualifier_descriptor)); if (retval == -EPIPE) { if (udev->speed == USB_SPEED_HIGH) { dev_err(&iface->dev, "hs dev qualifier --> %d\n", retval); return retval; } /* usb2.0 but not high-speed capable; fine */ } else if (retval != sizeof(struct usb_qualifier_descriptor)) { dev_err(&iface->dev, "dev qualifier --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } else d = (struct usb_qualifier_descriptor *) dev->buf; /* might not have [9.6.2] any other-speed configs [9.6.4] */ if (d) { unsigned max = d->bNumConfigurations; for (i = 0; i < max; i++) { retval = usb_get_descriptor(udev, USB_DT_OTHER_SPEED_CONFIG, i, dev->buf, TBUF_SIZE); if (!is_good_config(dev, retval)) { dev_err(&iface->dev, "other speed config --> %d\n", retval); return (retval < 0) ? retval : -EDOM; } } } } /* FIXME fetch strings from at least the device descriptor */ /* [9.4.5] get_status always works */ retval = usb_get_std_status(udev, USB_RECIP_DEVICE, 0, dev->buf); if (retval) { dev_err(&iface->dev, "get dev status --> %d\n", retval); return retval; } /* FIXME configuration.bmAttributes says if we could try to set/clear * the device's remote wakeup feature ... if we can, test that here */ retval = usb_get_std_status(udev, USB_RECIP_INTERFACE, iface->altsetting[0].desc.bInterfaceNumber, dev->buf); if (retval) { dev_err(&iface->dev, "get interface status --> %d\n", retval); return retval; } /* FIXME get status for each endpoint in the interface */ return 0; } /*-------------------------------------------------------------------------*/ /* use ch9 requests to test whether: * (a) queues work for control, keeping N subtests queued and * active (auto-resubmit) for M loops through the queue. * (b) protocol stalls (control-only) will autorecover. * it's not like bulk/intr; no halt clearing. * (c) short control reads are reported and handled. * (d) queues are always processed in-order */ struct ctrl_ctx { spinlock_t lock; struct usbtest_dev *dev; struct completion complete; unsigned count; unsigned pending; int status; struct urb **urb; struct usbtest_param_32 *param; int last; }; #define NUM_SUBCASES 16 /* how many test subcases here? */ struct subcase { struct usb_ctrlrequest setup; int number; int expected; }; static void ctrl_complete(struct urb *urb) { struct ctrl_ctx *ctx = urb->context; struct usb_ctrlrequest *reqp; struct subcase *subcase; int status = urb->status; unsigned long flags; reqp = (struct usb_ctrlrequest *)urb->setup_packet; subcase = container_of(reqp, struct subcase, setup); spin_lock_irqsave(&ctx->lock, flags); ctx->count--; ctx->pending--; /* queue must transfer and complete in fifo order, unless * usb_unlink_urb() is used to unlink something not at the * physical queue head (not tested). */ if (subcase->number > 0) { if ((subcase->number - ctx->last) != 1) { ERROR(ctx->dev, "subcase %d completed out of order, last %d\n", subcase->number, ctx->last); status = -EDOM; ctx->last = subcase->number; goto error; } } ctx->last = subcase->number; /* succeed or fault in only one way? */ if (status == subcase->expected) status = 0; /* async unlink for cleanup? */ else if (status != -ECONNRESET) { /* some faults are allowed, not required */ if (subcase->expected > 0 && ( ((status == -subcase->expected /* happened */ || status == 0)))) /* didn't */ status = 0; /* sometimes more than one fault is allowed */ else if (subcase->number == 12 && status == -EPIPE) status = 0; else ERROR(ctx->dev, "subtest %d error, status %d\n", subcase->number, status); } /* unexpected status codes mean errors; ideally, in hardware */ if (status) { error: if (ctx->status == 0) { int i; ctx->status = status; ERROR(ctx->dev, "control queue %02x.%02x, err %d, " "%d left, subcase %d, len %d/%d\n", reqp->bRequestType, reqp->bRequest, status, ctx->count, subcase->number, urb->actual_length, urb->transfer_buffer_length); /* FIXME this "unlink everything" exit route should * be a separate test case. */ /* unlink whatever's still pending */ for (i = 1; i < ctx->param->sglen; i++) { struct urb *u = ctx->urb[ (i + subcase->number) % ctx->param->sglen]; if (u == urb || !u->dev) continue; spin_unlock(&ctx->lock); status = usb_unlink_urb(u); spin_lock(&ctx->lock); switch (status) { case -EINPROGRESS: case -EBUSY: case -EIDRM: continue; default: ERROR(ctx->dev, "urb unlink --> %d\n", status); } } status = ctx->status; } } /* resubmit if we need to, else mark this as done */ if ((status == 0) && (ctx->pending < ctx->count)) { status = usb_submit_urb(urb, GFP_ATOMIC); if (status != 0) { ERROR(ctx->dev, "can't resubmit ctrl %02x.%02x, err %d\n", reqp->bRequestType, reqp->bRequest, status); urb->dev = NULL; } else ctx->pending++; } else urb->dev = NULL; /* signal completion when nothing's queued */ if (ctx->pending == 0) complete(&ctx->complete); spin_unlock_irqrestore(&ctx->lock, flags); } static int test_ctrl_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param) { struct usb_device *udev = testdev_to_usbdev(dev); struct urb **urb; struct ctrl_ctx context; int i; if (param->sglen == 0 || param->iterations > UINT_MAX / param->sglen) return -EOPNOTSUPP; spin_lock_init(&context.lock); context.dev = dev; init_completion(&context.complete); context.count = param->sglen * param->iterations; context.pending = 0; context.status = -ENOMEM; context.param = param; context.last = -1; /* allocate and init the urbs we'll queue. * as with bulk/intr sglists, sglen is the queue depth; it also * controls which subtests run (more tests than sglen) or rerun. */ urb = kcalloc(param->sglen, sizeof(struct urb *), GFP_KERNEL); if (!urb) return -ENOMEM; for (i = 0; i < param->sglen; i++) { int pipe = usb_rcvctrlpipe(udev, 0); unsigned len; struct urb *u; struct usb_ctrlrequest req; struct subcase *reqp; /* sign of this variable means: * -: tested code must return this (negative) error code * +: tested code may return this (negative too) error code */ int expected = 0; /* requests here are mostly expected to succeed on any * device, but some are chosen to trigger protocol stalls * or short reads. */ memset(&req, 0, sizeof(req)); req.bRequest = USB_REQ_GET_DESCRIPTOR; req.bRequestType = USB_DIR_IN|USB_RECIP_DEVICE; switch (i % NUM_SUBCASES) { case 0: /* get device descriptor */ req.wValue = cpu_to_le16(USB_DT_DEVICE << 8); len = sizeof(struct usb_device_descriptor); break; case 1: /* get first config descriptor (only) */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); len = sizeof(struct usb_config_descriptor); break; case 2: /* get altsetting (OFTEN STALLS) */ req.bRequest = USB_REQ_GET_INTERFACE; req.bRequestType = USB_DIR_IN|USB_RECIP_INTERFACE; /* index = 0 means first interface */ len = 1; expected = EPIPE; break; case 3: /* get interface status */ req.bRequest = USB_REQ_GET_STATUS; req.bRequestType = USB_DIR_IN|USB_RECIP_INTERFACE; /* interface 0 */ len = 2; break; case 4: /* get device status */ req.bRequest = USB_REQ_GET_STATUS; req.bRequestType = USB_DIR_IN|USB_RECIP_DEVICE; len = 2; break; case 5: /* get device qualifier (MAY STALL) */ req.wValue = cpu_to_le16 (USB_DT_DEVICE_QUALIFIER << 8); len = sizeof(struct usb_qualifier_descriptor); if (udev->speed != USB_SPEED_HIGH) expected = EPIPE; break; case 6: /* get first config descriptor, plus interface */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); len = sizeof(struct usb_config_descriptor); len += sizeof(struct usb_interface_descriptor); break; case 7: /* get interface descriptor (ALWAYS STALLS) */ req.wValue = cpu_to_le16 (USB_DT_INTERFACE << 8); /* interface == 0 */ len = sizeof(struct usb_interface_descriptor); expected = -EPIPE; break; /* NOTE: two consecutive stalls in the queue here. * that tests fault recovery a bit more aggressively. */ case 8: /* clear endpoint halt (MAY STALL) */ req.bRequest = USB_REQ_CLEAR_FEATURE; req.bRequestType = USB_RECIP_ENDPOINT; /* wValue 0 == ep halt */ /* wIndex 0 == ep0 (shouldn't halt!) */ len = 0; pipe = usb_sndctrlpipe(udev, 0); expected = EPIPE; break; case 9: /* get endpoint status */ req.bRequest = USB_REQ_GET_STATUS; req.bRequestType = USB_DIR_IN|USB_RECIP_ENDPOINT; /* endpoint 0 */ len = 2; break; case 10: /* trigger short read (EREMOTEIO) */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); len = 1024; expected = -EREMOTEIO; break; /* NOTE: two consecutive _different_ faults in the queue. */ case 11: /* get endpoint descriptor (ALWAYS STALLS) */ req.wValue = cpu_to_le16(USB_DT_ENDPOINT << 8); /* endpoint == 0 */ len = sizeof(struct usb_interface_descriptor); expected = EPIPE; break; /* NOTE: sometimes even a third fault in the queue! */ case 12: /* get string 0 descriptor (MAY STALL) */ req.wValue = cpu_to_le16(USB_DT_STRING << 8); /* string == 0, for language IDs */ len = sizeof(struct usb_interface_descriptor); /* may succeed when > 4 languages */ expected = EREMOTEIO; /* or EPIPE, if no strings */ break; case 13: /* short read, resembling case 10 */ req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0); /* last data packet "should" be DATA1, not DATA0 */ if (udev->speed == USB_SPEED_SUPER) len = 1024 - 512; else len = 1024 - udev->descriptor.bMaxPacketSize0; expected = -EREMOTEIO; break; case 14: /* short read; try to fill the last packet */ req.wValue = cpu_to_le16((USB_DT_DEVICE << 8) | 0); /* device descriptor size == 18 bytes */ len = udev->descriptor.bMaxPacketSize0; if (udev->speed == USB_SPEED_SUPER) len = 512; switch (len) { case 8: len = 24; break; case 16: len = 32; break; } expected = -EREMOTEIO; break; case 15: req.wValue = cpu_to_le16(USB_DT_BOS << 8); if (udev->bos) len = le16_to_cpu(udev->bos->desc->wTotalLength); else len = sizeof(struct usb_bos_descriptor); if (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0201) expected = -EPIPE; break; default: ERROR(dev, "bogus number of ctrl queue testcases!\n"); context.status = -EINVAL; goto cleanup; } req.wLength = cpu_to_le16(len); urb[i] = u = simple_alloc_urb(udev, pipe, len, 0); if (!u) goto cleanup; reqp = kmalloc(sizeof(*reqp), GFP_KERNEL); if (!reqp) goto cleanup; reqp->setup = req; reqp->number = i % NUM_SUBCASES; reqp->expected = expected; u->setup_packet = (char *) &reqp->setup; u->context = &context; u->complete = ctrl_complete; } /* queue the urbs */ context.urb = urb; spin_lock_irq(&context.lock); for (i = 0; i < param->sglen; i++) { context.status = usb_submit_urb(urb[i], GFP_ATOMIC); if (context.status != 0) { ERROR(dev, "can't submit urb[%d], status %d\n", i, context.status); context.count = context.pending; break; } context.pending++; } spin_unlock_irq(&context.lock); /* FIXME set timer and time out; provide a disconnect hook */ /* wait for the last one to complete */ if (context.pending > 0) wait_for_completion(&context.complete); cleanup: for (i = 0; i < param->sglen; i++) { if (!urb[i]) continue; urb[i]->dev = udev; kfree(urb[i]->setup_packet); simple_free_urb(urb[i]); } kfree(urb); return context.status; } #undef NUM_SUBCASES /*-------------------------------------------------------------------------*/ static void unlink1_callback(struct urb *urb) { int status = urb->status; /* we "know" -EPIPE (stall) never happens */ if (!status) status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { urb->status = status; complete(urb->context); } } static int unlink1(struct usbtest_dev *dev, int pipe, int size, int async) { struct urb *urb; struct completion completion; int retval = 0; init_completion(&completion); urb = simple_alloc_urb(testdev_to_usbdev(dev), pipe, size, 0); if (!urb) return -ENOMEM; urb->context = &completion; urb->complete = unlink1_callback; if (usb_pipeout(urb->pipe)) { simple_fill_buf(urb); urb->transfer_flags |= URB_ZERO_PACKET; } /* keep the endpoint busy. there are lots of hc/hcd-internal * states, and testing should get to all of them over time. * * FIXME want additional tests for when endpoint is STALLing * due to errors, or is just NAKing requests. */ retval = usb_submit_urb(urb, GFP_KERNEL); if (retval != 0) { dev_err(&dev->intf->dev, "submit fail %d\n", retval); return retval; } /* unlinking that should always work. variable delay tests more * hcd states and code paths, even with little other system load. */ msleep(jiffies % (2 * INTERRUPT_RATE)); if (async) { while (!completion_done(&completion)) { retval = usb_unlink_urb(urb); if (retval == 0 && usb_pipein(urb->pipe)) retval = simple_check_buf(dev, urb); switch (retval) { case -EBUSY: case -EIDRM: /* we can't unlink urbs while they're completing * or if they've completed, and we haven't * resubmitted. "normal" drivers would prevent * resubmission, but since we're testing unlink * paths, we can't. */ ERROR(dev, "unlink retry\n"); continue; case 0: case -EINPROGRESS: break; default: dev_err(&dev->intf->dev, "unlink fail %d\n", retval); return retval; } break; } } else usb_kill_urb(urb); wait_for_completion(&completion); retval = urb->status; simple_free_urb(urb); if (async) return (retval == -ECONNRESET) ? 0 : retval - 1000; else return (retval == -ENOENT || retval == -EPERM) ? 0 : retval - 2000; } static int unlink_simple(struct usbtest_dev *dev, int pipe, int len) { int retval = 0; /* test sync and async paths */ retval = unlink1(dev, pipe, len, 1); if (!retval) retval = unlink1(dev, pipe, len, 0); return retval; } /*-------------------------------------------------------------------------*/ struct queued_ctx { struct completion complete; atomic_t pending; unsigned num; int status; struct urb **urbs; }; static void unlink_queued_callback(struct urb *urb) { int status = urb->status; struct queued_ctx *ctx = urb->context; if (ctx->status) goto done; if (urb == ctx->urbs[ctx->num - 4] || urb == ctx->urbs[ctx->num - 2]) { if (status == -ECONNRESET) goto done; /* What error should we report if the URB completed normally? */ } if (status != 0) ctx->status = status; done: if (atomic_dec_and_test(&ctx->pending)) complete(&ctx->complete); } static int unlink_queued(struct usbtest_dev *dev, int pipe, unsigned num, unsigned size) { struct queued_ctx ctx; struct usb_device *udev = testdev_to_usbdev(dev); void *buf; dma_addr_t buf_dma; int i; int retval = -ENOMEM; init_completion(&ctx.complete); atomic_set(&ctx.pending, 1); /* One more than the actual value */ ctx.num = num; ctx.status = 0; buf = usb_alloc_coherent(udev, size, GFP_KERNEL, &buf_dma); if (!buf) return retval; memset(buf, 0, size); /* Allocate and init the urbs we'll queue */ ctx.urbs = kcalloc(num, sizeof(struct urb *), GFP_KERNEL); if (!ctx.urbs) goto free_buf; for (i = 0; i < num; i++) { ctx.urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (!ctx.urbs[i]) goto free_urbs; usb_fill_bulk_urb(ctx.urbs[i], udev, pipe, buf, size, unlink_queued_callback, &ctx); ctx.urbs[i]->transfer_dma = buf_dma; ctx.urbs[i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP; if (usb_pipeout(ctx.urbs[i]->pipe)) { simple_fill_buf(ctx.urbs[i]); ctx.urbs[i]->transfer_flags |= URB_ZERO_PACKET; } } /* Submit all the URBs and then unlink URBs num - 4 and num - 2. */ for (i = 0; i < num; i++) { atomic_inc(&ctx.pending); retval = usb_submit_urb(ctx.urbs[i], GFP_KERNEL); if (retval != 0) { dev_err(&dev->intf->dev, "submit urbs[%d] fail %d\n", i, retval); atomic_dec(&ctx.pending); ctx.status = retval; break; } } if (i == num) { usb_unlink_urb(ctx.urbs[num - 4]); usb_unlink_urb(ctx.urbs[num - 2]); } else { while (--i >= 0) usb_unlink_urb(ctx.urbs[i]); } if (atomic_dec_and_test(&ctx.pending)) /* The extra count */ complete(&ctx.complete); wait_for_completion(&ctx.complete); retval = ctx.status; free_urbs: for (i = 0; i < num; i++) usb_free_urb(ctx.urbs[i]); kfree(ctx.urbs); free_buf: usb_free_coherent(udev, size, buf, buf_dma); return retval; } /*-------------------------------------------------------------------------*/ static int verify_not_halted(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; u16 status; /* shouldn't look or act halted */ retval = usb_get_std_status(urb->dev, USB_RECIP_ENDPOINT, ep, &status); if (retval < 0) { ERROR(tdev, "ep %02x couldn't get no-halt status, %d\n", ep, retval); return retval; } if (status != 0) { ERROR(tdev, "ep %02x bogus status: %04x != 0\n", ep, status); return -EINVAL; } retval = simple_io(tdev, urb, 1, 0, 0, __func__); if (retval != 0) return -EINVAL; return 0; } static int verify_halted(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; u16 status; /* should look and act halted */ retval = usb_get_std_status(urb->dev, USB_RECIP_ENDPOINT, ep, &status); if (retval < 0) { ERROR(tdev, "ep %02x couldn't get halt status, %d\n", ep, retval); return retval; } if (status != 1) { ERROR(tdev, "ep %02x bogus status: %04x != 1\n", ep, status); return -EINVAL; } retval = simple_io(tdev, urb, 1, 0, -EPIPE, __func__); if (retval != -EPIPE) return -EINVAL; retval = simple_io(tdev, urb, 1, 0, -EPIPE, "verify_still_halted"); if (retval != -EPIPE) return -EINVAL; return 0; } static int test_halt(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; /* shouldn't look or act halted now */ retval = verify_not_halted(tdev, ep, urb); if (retval < 0) return retval; /* set halt (protocol test only), verify it worked */ retval = usb_control_msg(urb->dev, usb_sndctrlpipe(urb->dev, 0), USB_REQ_SET_FEATURE, USB_RECIP_ENDPOINT, USB_ENDPOINT_HALT, ep, NULL, 0, USB_CTRL_SET_TIMEOUT); if (retval < 0) { ERROR(tdev, "ep %02x couldn't set halt, %d\n", ep, retval); return retval; } retval = verify_halted(tdev, ep, urb); if (retval < 0) { int ret; /* clear halt anyways, else further tests will fail */ ret = usb_clear_halt(urb->dev, urb->pipe); if (ret) ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, ret); return retval; } /* clear halt (tests API + protocol), verify it worked */ retval = usb_clear_halt(urb->dev, urb->pipe); if (retval < 0) { ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval); return retval; } retval = verify_not_halted(tdev, ep, urb); if (retval < 0) return retval; /* NOTE: could also verify SET_INTERFACE clear halts ... */ return 0; } static int test_toggle_sync(struct usbtest_dev *tdev, int ep, struct urb *urb) { int retval; /* clear initial data toggle to DATA0 */ retval = usb_clear_halt(urb->dev, urb->pipe); if (retval < 0) { ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval); return retval; } /* transfer 3 data packets, should be DATA0, DATA1, DATA0 */ retval = simple_io(tdev, urb, 1, 0, 0, __func__); if (retval != 0) return -EINVAL; /* clear halt resets device side data toggle, host should react to it */ retval = usb_clear_halt(urb->dev, urb->pipe); if (retval < 0) { ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval); return retval; } /* host should use DATA0 again after clear halt */ retval = simple_io(tdev, urb, 1, 0, 0, __func__); return retval; } static int halt_simple(struct usbtest_dev *dev) { int ep; int retval = 0; struct urb *urb; struct usb_device *udev = testdev_to_usbdev(dev); if (udev->speed == USB_SPEED_SUPER) urb = simple_alloc_urb(udev, 0, 1024, 0); else urb = simple_alloc_urb(udev, 0, 512, 0); if (urb == NULL) return -ENOMEM; if (dev->in_pipe) { ep = usb_pipeendpoint(dev->in_pipe) | USB_DIR_IN; urb->pipe = dev->in_pipe; retval = test_halt(dev, ep, urb); if (retval < 0) goto done; } if (dev->out_pipe) { ep = usb_pipeendpoint(dev->out_pipe); urb->pipe = dev->out_pipe; retval = test_halt(dev, ep, urb); } done: simple_free_urb(urb); return retval; } static int toggle_sync_simple(struct usbtest_dev *dev) { int ep; int retval = 0; struct urb *urb; struct usb_device *udev = testdev_to_usbdev(dev); unsigned maxp = get_maxpacket(udev, dev->out_pipe); /* * Create a URB that causes a transfer of uneven amount of data packets * This way the clear toggle has an impact on the data toggle sequence. * Use 2 maxpacket length packets and one zero packet. */ urb = simple_alloc_urb(udev, 0, 2 * maxp, 0); if (urb == NULL) return -ENOMEM; urb->transfer_flags |= URB_ZERO_PACKET; ep = usb_pipeendpoint(dev->out_pipe); urb->pipe = dev->out_pipe; retval = test_toggle_sync(dev, ep, urb); simple_free_urb(urb); return retval; } /*-------------------------------------------------------------------------*/ /* Control OUT tests use the vendor control requests from Intel's * USB 2.0 compliance test device: write a buffer, read it back. * * Intel's spec only _requires_ that it work for one packet, which * is pretty weak. Some HCDs place limits here; most devices will * need to be able to handle more than one OUT data packet. We'll * try whatever we're told to try. */ static int ctrl_out(struct usbtest_dev *dev, unsigned count, unsigned length, unsigned vary, unsigned offset) { unsigned i, j, len; int retval; u8 *buf; char *what = "?"; struct usb_device *udev; if (length < 1 || length > 0xffff || vary >= length) return -EINVAL; buf = kmalloc(length + offset, GFP_KERNEL); if (!buf) return -ENOMEM; buf += offset; udev = testdev_to_usbdev(dev); len = length; retval = 0; /* NOTE: hardware might well act differently if we pushed it * with lots back-to-back queued requests. */ for (i = 0; i < count; i++) { /* write patterned data */ for (j = 0; j < len; j++) buf[j] = (u8)(i + j); retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x5b, USB_DIR_OUT|USB_TYPE_VENDOR, 0, 0, buf, len, USB_CTRL_SET_TIMEOUT); if (retval != len) { what = "write"; if (retval >= 0) { ERROR(dev, "ctrl_out, wlen %d (expected %d)\n", retval, len); retval = -EBADMSG; } break; } /* read it back -- assuming nothing intervened!! */ retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x5c, USB_DIR_IN|USB_TYPE_VENDOR, 0, 0, buf, len, USB_CTRL_GET_TIMEOUT); if (retval != len) { what = "read"; if (retval >= 0) { ERROR(dev, "ctrl_out, rlen %d (expected %d)\n", retval, len); retval = -EBADMSG; } break; } /* fail if we can't verify */ for (j = 0; j < len; j++) { if (buf[j] != (u8)(i + j)) { ERROR(dev, "ctrl_out, byte %d is %d not %d\n", j, buf[j], (u8)(i + j)); retval = -EBADMSG; break; } } if (retval < 0) { what = "verify"; break; } len += vary; /* [real world] the "zero bytes IN" case isn't really used. * hardware can easily trip up in this weird case, since its * status stage is IN, not OUT like other ep0in transfers. */ if (len > length) len = realworld ? 1 : 0; } if (retval < 0) ERROR(dev, "ctrl_out %s failed, code %d, count %d\n", what, retval, i); kfree(buf - offset); return retval; } /*-------------------------------------------------------------------------*/ /* ISO/BULK tests ... mimics common usage * - buffer length is split into N packets (mostly maxpacket sized) * - multi-buffers according to sglen */ struct transfer_context { unsigned count; unsigned pending; spinlock_t lock; struct completion done; int submit_error; unsigned long errors; unsigned long packet_count; struct usbtest_dev *dev; bool is_iso; }; static void complicated_callback(struct urb *urb) { struct transfer_context *ctx = urb->context; unsigned long flags; spin_lock_irqsave(&ctx->lock, flags); ctx->count--; ctx->packet_count += urb->number_of_packets; if (urb->error_count > 0) ctx->errors += urb->error_count; else if (urb->status != 0) ctx->errors += (ctx->is_iso ? urb->number_of_packets : 1); else if (urb->actual_length != urb->transfer_buffer_length) ctx->errors++; else if (check_guard_bytes(ctx->dev, urb) != 0) ctx->errors++; if (urb->status == 0 && ctx->count > (ctx->pending - 1) && !ctx->submit_error) { int status = usb_submit_urb(urb, GFP_ATOMIC); switch (status) { case 0: goto done; default: dev_err(&ctx->dev->intf->dev, "resubmit err %d\n", status); fallthrough; case -ENODEV: /* disconnected */ case -ESHUTDOWN: /* endpoint disabled */ ctx->submit_error = 1; break; } } ctx->pending--; if (ctx->pending == 0) { if (ctx->errors) dev_err(&ctx->dev->intf->dev, "during the test, %lu errors out of %lu\n", ctx->errors, ctx->packet_count); complete(&ctx->done); } done: spin_unlock_irqrestore(&ctx->lock, flags); } static struct urb *iso_alloc_urb( struct usb_device *udev, int pipe, struct usb_endpoint_descriptor *desc, long bytes, unsigned offset ) { struct urb *urb; unsigned i, maxp, packets; if (bytes < 0 || !desc) return NULL; maxp = usb_endpoint_maxp(desc); if (udev->speed >= USB_SPEED_SUPER) maxp *= ss_isoc_get_packet_num(udev, pipe); else maxp *= usb_endpoint_maxp_mult(desc); packets = DIV_ROUND_UP(bytes, maxp); urb = usb_alloc_urb(packets, GFP_KERNEL); if (!urb) return urb; urb->dev = udev; urb->pipe = pipe; urb->number_of_packets = packets; urb->transfer_buffer_length = bytes; urb->transfer_buffer = usb_alloc_coherent(udev, bytes + offset, GFP_KERNEL, &urb->transfer_dma); if (!urb->transfer_buffer) { usb_free_urb(urb); return NULL; } if (offset) { memset(urb->transfer_buffer, GUARD_BYTE, offset); urb->transfer_buffer += offset; urb->transfer_dma += offset; } /* For inbound transfers use guard byte so that test fails if data not correctly copied */ memset(urb->transfer_buffer, usb_pipein(urb->pipe) ? GUARD_BYTE : 0, bytes); for (i = 0; i < packets; i++) { /* here, only the last packet will be short */ urb->iso_frame_desc[i].length = min((unsigned) bytes, maxp); bytes -= urb->iso_frame_desc[i].length; urb->iso_frame_desc[i].offset = maxp * i; } urb->complete = complicated_callback; /* urb->context = SET BY CALLER */ urb->interval = 1 << (desc->bInterval - 1); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; return urb; } static int test_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param, int pipe, struct usb_endpoint_descriptor *desc, unsigned offset) { struct transfer_context context; struct usb_device *udev; unsigned i; unsigned long packets = 0; int status = 0; struct urb **urbs; if (!param->sglen || param->iterations > UINT_MAX / param->sglen) return -EINVAL; if (param->sglen > MAX_SGLEN) return -EINVAL; urbs = kcalloc(param->sglen, sizeof(*urbs), GFP_KERNEL); if (!urbs) return -ENOMEM; memset(&context, 0, sizeof(context)); context.count = param->iterations * param->sglen; context.dev = dev; context.is_iso = !!desc; init_completion(&context.done); spin_lock_init(&context.lock); udev = testdev_to_usbdev(dev); for (i = 0; i < param->sglen; i++) { if (context.is_iso) urbs[i] = iso_alloc_urb(udev, pipe, desc, param->length, offset); else urbs[i] = complicated_alloc_urb(udev, pipe, param->length, 0); if (!urbs[i]) { status = -ENOMEM; goto fail; } packets += urbs[i]->number_of_packets; urbs[i]->context = &context; } packets *= param->iterations; if (context.is_iso) { int transaction_num; if (udev->speed >= USB_SPEED_SUPER) transaction_num = ss_isoc_get_packet_num(udev, pipe); else transaction_num = usb_endpoint_maxp_mult(desc); dev_info(&dev->intf->dev, "iso period %d %sframes, wMaxPacket %d, transactions: %d\n", 1 << (desc->bInterval - 1), (udev->speed >= USB_SPEED_HIGH) ? "micro" : "", usb_endpoint_maxp(desc), transaction_num); dev_info(&dev->intf->dev, "total %lu msec (%lu packets)\n", (packets * (1 << (desc->bInterval - 1))) / ((udev->speed >= USB_SPEED_HIGH) ? 8 : 1), packets); } spin_lock_irq(&context.lock); for (i = 0; i < param->sglen; i++) { ++context.pending; status = usb_submit_urb(urbs[i], GFP_ATOMIC); if (status < 0) { ERROR(dev, "submit iso[%d], error %d\n", i, status); if (i == 0) { spin_unlock_irq(&context.lock); goto fail; } simple_free_urb(urbs[i]); urbs[i] = NULL; context.pending--; context.submit_error = 1; break; } } spin_unlock_irq(&context.lock); wait_for_completion(&context.done); for (i = 0; i < param->sglen; i++) { if (urbs[i]) simple_free_urb(urbs[i]); } /* * Isochronous transfers are expected to fail sometimes. As an * arbitrary limit, we will report an error if any submissions * fail or if the transfer failure rate is > 10%. */ if (status != 0) ; else if (context.submit_error) status = -EACCES; else if (context.errors > (context.is_iso ? context.packet_count / 10 : 0)) status = -EIO; kfree(urbs); return status; fail: for (i = 0; i < param->sglen; i++) { if (urbs[i]) simple_free_urb(urbs[i]); } kfree(urbs); return status; } static int test_unaligned_bulk( struct usbtest_dev *tdev, int pipe, unsigned length, int iterations, unsigned transfer_flags, const char *label) { int retval; struct urb *urb = usbtest_alloc_urb(testdev_to_usbdev(tdev), pipe, length, transfer_flags, 1, 0, simple_callback); if (!urb) return -ENOMEM; retval = simple_io(tdev, urb, iterations, 0, 0, label); simple_free_urb(urb); return retval; } /* Run tests. */ static int usbtest_do_ioctl(struct usb_interface *intf, struct usbtest_param_32 *param) { struct usbtest_dev *dev = usb_get_intfdata(intf); struct usb_device *udev = testdev_to_usbdev(dev); struct urb *urb; struct scatterlist *sg; struct usb_sg_request req; unsigned i; int retval = -EOPNOTSUPP; if (param->iterations <= 0) return -EINVAL; if (param->sglen > MAX_SGLEN) return -EINVAL; /* * Just a bunch of test cases that every HCD is expected to handle. * * Some may need specific firmware, though it'd be good to have * one firmware image to handle all the test cases. * * FIXME add more tests! cancel requests, verify the data, control * queueing, concurrent read+write threads, and so on. */ switch (param->test_num) { case 0: dev_info(&intf->dev, "TEST 0: NOP\n"); retval = 0; break; /* Simple non-queued bulk I/O tests */ case 1: if (dev->out_pipe == 0) break; dev_info(&intf->dev, "TEST 1: write %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->out_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test1"); simple_free_urb(urb); break; case 2: if (dev->in_pipe == 0) break; dev_info(&intf->dev, "TEST 2: read %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->in_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test2"); simple_free_urb(urb); break; case 3: if (dev->out_pipe == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 3: write/%d 0..%d bytes %u times\n", param->vary, param->length, param->iterations); urb = simple_alloc_urb(udev, dev->out_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = simple_io(dev, urb, param->iterations, param->vary, 0, "test3"); simple_free_urb(urb); break; case 4: if (dev->in_pipe == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 4: read/%d 0..%d bytes %u times\n", param->vary, param->length, param->iterations); urb = simple_alloc_urb(udev, dev->in_pipe, param->length, 0); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = simple_io(dev, urb, param->iterations, param->vary, 0, "test4"); simple_free_urb(urb); break; /* Queued bulk I/O tests */ case 5: if (dev->out_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 5: write %d sglists %d entries of %d bytes\n", param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, 0, dev, dev->out_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = perform_sglist(dev, param->iterations, dev->out_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; case 6: if (dev->in_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 6: read %d sglists %d entries of %d bytes\n", param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, 0, dev, dev->in_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = perform_sglist(dev, param->iterations, dev->in_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; case 7: if (dev->out_pipe == 0 || param->sglen == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 7: write/%d %d sglists %d entries 0..%d bytes\n", param->vary, param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, param->vary, dev, dev->out_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk sink (maybe accepts short writes) */ retval = perform_sglist(dev, param->iterations, dev->out_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; case 8: if (dev->in_pipe == 0 || param->sglen == 0 || param->vary == 0) break; dev_info(&intf->dev, "TEST 8: read/%d %d sglists %d entries 0..%d bytes\n", param->vary, param->iterations, param->sglen, param->length); sg = alloc_sglist(param->sglen, param->length, param->vary, dev, dev->in_pipe); if (!sg) { retval = -ENOMEM; break; } /* FIRMWARE: bulk source (maybe generates short writes) */ retval = perform_sglist(dev, param->iterations, dev->in_pipe, &req, sg, param->sglen); free_sglist(sg, param->sglen); break; /* non-queued sanity tests for control (chapter 9 subset) */ case 9: retval = 0; dev_info(&intf->dev, "TEST 9: ch9 (subset) control tests, %d times\n", param->iterations); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = ch9_postconfig(dev); if (retval) dev_err(&intf->dev, "ch9 subset failed, " "iterations left %d\n", i); break; /* queued control messaging */ case 10: retval = 0; dev_info(&intf->dev, "TEST 10: queue %d control calls, %d times\n", param->sglen, param->iterations); retval = test_ctrl_queue(dev, param); break; /* simple non-queued unlinks (ring with one urb) */ case 11: if (dev->in_pipe == 0 || !param->length) break; retval = 0; dev_info(&intf->dev, "TEST 11: unlink %d reads of %d\n", param->iterations, param->length); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = unlink_simple(dev, dev->in_pipe, param->length); if (retval) dev_err(&intf->dev, "unlink reads failed %d, " "iterations left %d\n", retval, i); break; case 12: if (dev->out_pipe == 0 || !param->length) break; retval = 0; dev_info(&intf->dev, "TEST 12: unlink %d writes of %d\n", param->iterations, param->length); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = unlink_simple(dev, dev->out_pipe, param->length); if (retval) dev_err(&intf->dev, "unlink writes failed %d, " "iterations left %d\n", retval, i); break; /* ep halt tests */ case 13: if (dev->out_pipe == 0 && dev->in_pipe == 0) break; retval = 0; dev_info(&intf->dev, "TEST 13: set/clear %d halts\n", param->iterations); for (i = param->iterations; retval == 0 && i--; /* NOP */) retval = halt_simple(dev); if (retval) ERROR(dev, "halts failed, iterations left %d\n", i); break; /* control write tests */ case 14: if (!dev->info->ctrl_out) break; dev_info(&intf->dev, "TEST 14: %d ep0out, %d..%d vary %d\n", param->iterations, realworld ? 1 : 0, param->length, param->vary); retval = ctrl_out(dev, param->iterations, param->length, param->vary, 0); break; /* iso write tests */ case 15: if (dev->out_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 15: write %d iso, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); /* FIRMWARE: iso sink */ retval = test_queue(dev, param, dev->out_iso_pipe, dev->iso_out, 0); break; /* iso read tests */ case 16: if (dev->in_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 16: read %d iso, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); /* FIRMWARE: iso source */ retval = test_queue(dev, param, dev->in_iso_pipe, dev->iso_in, 0); break; /* FIXME scatterlist cancel (needs helper thread) */ /* Tests for bulk I/O using DMA mapping by core and odd address */ case 17: if (dev->out_pipe == 0) break; dev_info(&intf->dev, "TEST 17: write odd addr %d bytes %u times core map\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->out_pipe, param->length, param->iterations, 0, "test17"); break; case 18: if (dev->in_pipe == 0) break; dev_info(&intf->dev, "TEST 18: read odd addr %d bytes %u times core map\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->in_pipe, param->length, param->iterations, 0, "test18"); break; /* Tests for bulk I/O using premapped coherent buffer and odd address */ case 19: if (dev->out_pipe == 0) break; dev_info(&intf->dev, "TEST 19: write odd addr %d bytes %u times premapped\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->out_pipe, param->length, param->iterations, URB_NO_TRANSFER_DMA_MAP, "test19"); break; case 20: if (dev->in_pipe == 0) break; dev_info(&intf->dev, "TEST 20: read odd addr %d bytes %u times premapped\n", param->length, param->iterations); retval = test_unaligned_bulk( dev, dev->in_pipe, param->length, param->iterations, URB_NO_TRANSFER_DMA_MAP, "test20"); break; /* control write tests with unaligned buffer */ case 21: if (!dev->info->ctrl_out) break; dev_info(&intf->dev, "TEST 21: %d ep0out odd addr, %d..%d vary %d\n", param->iterations, realworld ? 1 : 0, param->length, param->vary); retval = ctrl_out(dev, param->iterations, param->length, param->vary, 1); break; /* unaligned iso tests */ case 22: if (dev->out_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 22: write %d iso odd, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); retval = test_queue(dev, param, dev->out_iso_pipe, dev->iso_out, 1); break; case 23: if (dev->in_iso_pipe == 0 || param->sglen == 0) break; dev_info(&intf->dev, "TEST 23: read %d iso odd, %d entries of %d bytes\n", param->iterations, param->sglen, param->length); retval = test_queue(dev, param, dev->in_iso_pipe, dev->iso_in, 1); break; /* unlink URBs from a bulk-OUT queue */ case 24: if (dev->out_pipe == 0 || !param->length || param->sglen < 4) break; retval = 0; dev_info(&intf->dev, "TEST 24: unlink from %d queues of " "%d %d-byte writes\n", param->iterations, param->sglen, param->length); for (i = param->iterations; retval == 0 && i > 0; --i) { retval = unlink_queued(dev, dev->out_pipe, param->sglen, param->length); if (retval) { dev_err(&intf->dev, "unlink queued writes failed %d, " "iterations left %d\n", retval, i); break; } } break; /* Simple non-queued interrupt I/O tests */ case 25: if (dev->out_int_pipe == 0) break; dev_info(&intf->dev, "TEST 25: write %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->out_int_pipe, param->length, dev->int_out->bInterval); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: interrupt sink (maybe accepts short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test25"); simple_free_urb(urb); break; case 26: if (dev->in_int_pipe == 0) break; dev_info(&intf->dev, "TEST 26: read %d bytes %u times\n", param->length, param->iterations); urb = simple_alloc_urb(udev, dev->in_int_pipe, param->length, dev->int_in->bInterval); if (!urb) { retval = -ENOMEM; break; } /* FIRMWARE: interrupt source (maybe generates short writes) */ retval = simple_io(dev, urb, param->iterations, 0, 0, "test26"); simple_free_urb(urb); break; case 27: /* We do performance test, so ignore data compare */ if (dev->out_pipe == 0 || param->sglen == 0 || pattern != 0) break; dev_info(&intf->dev, "TEST 27: bulk write %dMbytes\n", (param->iterations * param->sglen * param->length) / (1024 * 1024)); retval = test_queue(dev, param, dev->out_pipe, NULL, 0); break; case 28: if (dev->in_pipe == 0 || param->sglen == 0 || pattern != 0) break; dev_info(&intf->dev, "TEST 28: bulk read %dMbytes\n", (param->iterations * param->sglen * param->length) / (1024 * 1024)); retval = test_queue(dev, param, dev->in_pipe, NULL, 0); break; /* Test data Toggle/seq_nr clear between bulk out transfers */ case 29: if (dev->out_pipe == 0) break; retval = 0; dev_info(&intf->dev, "TEST 29: Clear toggle between bulk writes %d times\n", param->iterations); for (i = param->iterations; retval == 0 && i > 0; --i) retval = toggle_sync_simple(dev); if (retval) ERROR(dev, "toggle sync failed, iterations left %d\n", i); break; } return retval; } /*-------------------------------------------------------------------------*/ /* We only have this one interface to user space, through usbfs. * User mode code can scan usbfs to find N different devices (maybe on * different busses) to use when testing, and allocate one thread per * test. So discovery is simplified, and we have no device naming issues. * * Don't use these only as stress/load tests. Use them along with * other USB bus activity: plugging, unplugging, mousing, mp3 playback, * video capture, and so on. Run different tests at different times, in * different sequences. Nothing here should interact with other devices, * except indirectly by consuming USB bandwidth and CPU resources for test * threads and request completion. But the only way to know that for sure * is to test when HC queues are in use by many devices. * * WARNING: Because usbfs grabs udev->dev.sem before calling this ioctl(), * it locks out usbcore in certain code paths. Notably, if you disconnect * the device-under-test, hub_wq will wait block forever waiting for the * ioctl to complete ... so that usb_disconnect() can abort the pending * urbs and then call usbtest_disconnect(). To abort a test, you're best * off just killing the userspace task and waiting for it to exit. */ static int usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf) { struct usbtest_dev *dev = usb_get_intfdata(intf); struct usbtest_param_64 *param_64 = buf; struct usbtest_param_32 temp; struct usbtest_param_32 *param_32 = buf; struct timespec64 start; struct timespec64 end; struct timespec64 duration; int retval = -EOPNOTSUPP; /* FIXME USBDEVFS_CONNECTINFO doesn't say how fast the device is. */ pattern = mod_pattern; if (mutex_lock_interruptible(&dev->lock)) return -ERESTARTSYS; /* FIXME: What if a system sleep starts while a test is running? */ /* some devices, like ez-usb default devices, need a non-default * altsetting to have any active endpoints. some tests change * altsettings; force a default so most tests don't need to check. */ if (dev->info->alt >= 0) { if (intf->altsetting->desc.bInterfaceNumber) { retval = -ENODEV; goto free_mutex; } retval = set_altsetting(dev, dev->info->alt); if (retval) { dev_err(&intf->dev, "set altsetting to %d failed, %d\n", dev->info->alt, retval); goto free_mutex; } } switch (code) { case USBTEST_REQUEST_64: temp.test_num = param_64->test_num; temp.iterations = param_64->iterations; temp.length = param_64->length; temp.sglen = param_64->sglen; temp.vary = param_64->vary; param_32 = &temp; break; case USBTEST_REQUEST_32: break; default: retval = -EOPNOTSUPP; goto free_mutex; } ktime_get_ts64(&start); retval = usbtest_do_ioctl(intf, param_32); if (retval < 0) goto free_mutex; ktime_get_ts64(&end); duration = timespec64_sub(end, start); temp.duration_sec = duration.tv_sec; temp.duration_usec = duration.tv_nsec/NSEC_PER_USEC; switch (code) { case USBTEST_REQUEST_32: param_32->duration_sec = temp.duration_sec; param_32->duration_usec = temp.duration_usec; break; case USBTEST_REQUEST_64: param_64->duration_sec = temp.duration_sec; param_64->duration_usec = temp.duration_usec; break; } free_mutex: mutex_unlock(&dev->lock); return retval; } /*-------------------------------------------------------------------------*/ static unsigned force_interrupt; module_param(force_interrupt, uint, 0); MODULE_PARM_DESC(force_interrupt, "0 = test default; else interrupt"); #ifdef GENERIC static unsigned short vendor; module_param(vendor, ushort, 0); MODULE_PARM_DESC(vendor, "vendor code (from usb-if)"); static unsigned short product; module_param(product, ushort, 0); MODULE_PARM_DESC(product, "product code (from vendor)"); #endif static int usbtest_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev; struct usbtest_dev *dev; struct usbtest_info *info; char *rtest, *wtest; char *irtest, *iwtest; char *intrtest, *intwtest; udev = interface_to_usbdev(intf); #ifdef GENERIC /* specify devices by module parameters? */ if (id->match_flags == 0) { /* vendor match required, product match optional */ if (!vendor || le16_to_cpu(udev->descriptor.idVendor) != (u16)vendor) return -ENODEV; if (product && le16_to_cpu(udev->descriptor.idProduct) != (u16)product) return -ENODEV; dev_info(&intf->dev, "matched module params, " "vend=0x%04x prod=0x%04x\n", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); } #endif dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; info = (struct usbtest_info *) id->driver_info; dev->info = info; mutex_init(&dev->lock); dev->intf = intf; /* cacheline-aligned scratch for i/o */ dev->buf = kmalloc(TBUF_SIZE, GFP_KERNEL); if (dev->buf == NULL) { kfree(dev); return -ENOMEM; } /* NOTE this doesn't yet test the handful of difference that are * visible with high speed interrupts: bigger maxpacket (1K) and * "high bandwidth" modes (up to 3 packets/uframe). */ rtest = wtest = ""; irtest = iwtest = ""; intrtest = intwtest = ""; if (force_interrupt || udev->speed == USB_SPEED_LOW) { if (info->ep_in) { dev->in_pipe = usb_rcvintpipe(udev, info->ep_in); rtest = " intr-in"; } if (info->ep_out) { dev->out_pipe = usb_sndintpipe(udev, info->ep_out); wtest = " intr-out"; } } else { if (override_alt >= 0 || info->autoconf) { int status; status = get_endpoints(dev, intf); if (status < 0) { WARNING(dev, "couldn't get endpoints, %d\n", status); kfree(dev->buf); kfree(dev); return status; } /* may find bulk or ISO pipes */ } else { if (info->ep_in) dev->in_pipe = usb_rcvbulkpipe(udev, info->ep_in); if (info->ep_out) dev->out_pipe = usb_sndbulkpipe(udev, info->ep_out); } if (dev->in_pipe) rtest = " bulk-in"; if (dev->out_pipe) wtest = " bulk-out"; if (dev->in_iso_pipe) irtest = " iso-in"; if (dev->out_iso_pipe) iwtest = " iso-out"; if (dev->in_int_pipe) intrtest = " int-in"; if (dev->out_int_pipe) intwtest = " int-out"; } usb_set_intfdata(intf, dev); dev_info(&intf->dev, "%s\n", info->name); dev_info(&intf->dev, "%s {control%s%s%s%s%s%s%s} tests%s\n", usb_speed_string(udev->speed), info->ctrl_out ? " in/out" : "", rtest, wtest, irtest, iwtest, intrtest, intwtest, info->alt >= 0 ? " (+alt)" : ""); return 0; } static int usbtest_suspend(struct usb_interface *intf, pm_message_t message) { return 0; } static int usbtest_resume(struct usb_interface *intf) { return 0; } static void usbtest_disconnect(struct usb_interface *intf) { struct usbtest_dev *dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); dev_dbg(&intf->dev, "disconnect\n"); kfree(dev->buf); kfree(dev); } /* Basic testing only needs a device that can source or sink bulk traffic. * Any device can test control transfers (default with GENERIC binding). * * Several entries work with the default EP0 implementation that's built * into EZ-USB chips. There's a default vendor ID which can be overridden * by (very) small config EEPROMS, but otherwise all these devices act * identically until firmware is loaded: only EP0 works. It turns out * to be easy to make other endpoints work, without modifying that EP0 * behavior. For now, we expect that kind of firmware. */ /* an21xx or fx versions of ez-usb */ static struct usbtest_info ez1_info = { .name = "EZ-USB device", .ep_in = 2, .ep_out = 2, .alt = 1, }; /* fx2 version of ez-usb */ static struct usbtest_info ez2_info = { .name = "FX2 device", .ep_in = 6, .ep_out = 2, .alt = 1, }; /* ezusb family device with dedicated usb test firmware, */ static struct usbtest_info fw_info = { .name = "usb test device", .ep_in = 2, .ep_out = 2, .alt = 1, .autoconf = 1, /* iso and ctrl_out need autoconf */ .ctrl_out = 1, .iso = 1, /* iso_ep's are #8 in/out */ }; /* peripheral running Linux and 'zero.c' test firmware, or * its user-mode cousin. different versions of this use * different hardware with the same vendor/product codes. * host side MUST rely on the endpoint descriptors. */ static struct usbtest_info gz_info = { .name = "Linux gadget zero", .autoconf = 1, .ctrl_out = 1, .iso = 1, .intr = 1, .alt = 0, }; static struct usbtest_info um_info = { .name = "Linux user mode test driver", .autoconf = 1, .alt = -1, }; static struct usbtest_info um2_info = { .name = "Linux user mode ISO test driver", .autoconf = 1, .iso = 1, .alt = -1, }; #ifdef IBOT2 /* this is a nice source of high speed bulk data; * uses an FX2, with firmware provided in the device */ static struct usbtest_info ibot2_info = { .name = "iBOT2 webcam", .ep_in = 2, .alt = -1, }; #endif #ifdef GENERIC /* we can use any device to test control traffic */ static struct usbtest_info generic_info = { .name = "Generic USB device", .alt = -1, }; #endif static const struct usb_device_id id_table[] = { /*-------------------------------------------------------------*/ /* EZ-USB devices which download firmware to replace (or in our * case augment) the default device implementation. */ /* generic EZ-USB FX controller */ { USB_DEVICE(0x0547, 0x2235), .driver_info = (unsigned long) &ez1_info, }, /* CY3671 development board with EZ-USB FX */ { USB_DEVICE(0x0547, 0x0080), .driver_info = (unsigned long) &ez1_info, }, /* generic EZ-USB FX2 controller (or development board) */ { USB_DEVICE(0x04b4, 0x8613), .driver_info = (unsigned long) &ez2_info, }, /* re-enumerated usb test device firmware */ { USB_DEVICE(0xfff0, 0xfff0), .driver_info = (unsigned long) &fw_info, }, /* "Gadget Zero" firmware runs under Linux */ { USB_DEVICE(0x0525, 0xa4a0), .driver_info = (unsigned long) &gz_info, }, /* so does a user-mode variant */ { USB_DEVICE(0x0525, 0xa4a4), .driver_info = (unsigned long) &um_info, }, /* ... and a user-mode variant that talks iso */ { USB_DEVICE(0x0525, 0xa4a3), .driver_info = (unsigned long) &um2_info, }, #ifdef KEYSPAN_19Qi /* Keyspan 19qi uses an21xx (original EZ-USB) */ /* this does not coexist with the real Keyspan 19qi driver! */ { USB_DEVICE(0x06cd, 0x010b), .driver_info = (unsigned long) &ez1_info, }, #endif /*-------------------------------------------------------------*/ #ifdef IBOT2 /* iBOT2 makes a nice source of high speed bulk-in data */ /* this does not coexist with a real iBOT2 driver! */ { USB_DEVICE(0x0b62, 0x0059), .driver_info = (unsigned long) &ibot2_info, }, #endif /*-------------------------------------------------------------*/ #ifdef GENERIC /* module params can specify devices to use for control tests */ { .driver_info = (unsigned long) &generic_info, }, #endif /*-------------------------------------------------------------*/ { } }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver usbtest_driver = { .name = "usbtest", .id_table = id_table, .probe = usbtest_probe, .unlocked_ioctl = usbtest_ioctl, .disconnect = usbtest_disconnect, .suspend = usbtest_suspend, .resume = usbtest_resume, }; /*-------------------------------------------------------------------------*/ static int __init usbtest_init(void) { #ifdef GENERIC if (vendor) pr_debug("params: vend=0x%04x prod=0x%04x\n", vendor, product); #endif return usb_register(&usbtest_driver); } module_init(usbtest_init); static void __exit usbtest_exit(void) { usb_deregister(&usbtest_driver); } module_exit(usbtest_exit); MODULE_DESCRIPTION("USB Core/HCD Testing Driver"); MODULE_LICENSE("GPL"); |
5 5 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | // SPDX-License-Identifier: GPL-2.0-or-later /* * PTP virtual clock driver * * Copyright 2021 NXP */ #include <linux/slab.h> #include <linux/hashtable.h> #include "ptp_private.h" #define PTP_VCLOCK_CC_SHIFT 31 #define PTP_VCLOCK_CC_MULT (1 << PTP_VCLOCK_CC_SHIFT) #define PTP_VCLOCK_FADJ_SHIFT 9 #define PTP_VCLOCK_FADJ_DENOMINATOR 15625ULL #define PTP_VCLOCK_REFRESH_INTERVAL (HZ * 2) /* protects vclock_hash addition/deletion */ static DEFINE_SPINLOCK(vclock_hash_lock); static DEFINE_READ_MOSTLY_HASHTABLE(vclock_hash, 8); static void ptp_vclock_hash_add(struct ptp_vclock *vclock) { spin_lock(&vclock_hash_lock); hlist_add_head_rcu(&vclock->vclock_hash_node, &vclock_hash[vclock->clock->index % HASH_SIZE(vclock_hash)]); spin_unlock(&vclock_hash_lock); } static void ptp_vclock_hash_del(struct ptp_vclock *vclock) { spin_lock(&vclock_hash_lock); hlist_del_init_rcu(&vclock->vclock_hash_node); spin_unlock(&vclock_hash_lock); synchronize_rcu(); } static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { struct ptp_vclock *vclock = info_to_vclock(ptp); s64 adj; adj = (s64)scaled_ppm << PTP_VCLOCK_FADJ_SHIFT; adj = div_s64(adj, PTP_VCLOCK_FADJ_DENOMINATOR); if (mutex_lock_interruptible(&vclock->lock)) return -EINTR; timecounter_read(&vclock->tc); vclock->cc.mult = PTP_VCLOCK_CC_MULT + adj; mutex_unlock(&vclock->lock); return 0; } static int ptp_vclock_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct ptp_vclock *vclock = info_to_vclock(ptp); if (mutex_lock_interruptible(&vclock->lock)) return -EINTR; timecounter_adjtime(&vclock->tc, delta); mutex_unlock(&vclock->lock); return 0; } static int ptp_vclock_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { struct ptp_vclock *vclock = info_to_vclock(ptp); u64 ns; if (mutex_lock_interruptible(&vclock->lock)) return -EINTR; ns = timecounter_read(&vclock->tc); mutex_unlock(&vclock->lock); *ts = ns_to_timespec64(ns); return 0; } static int ptp_vclock_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, struct ptp_system_timestamp *sts) { struct ptp_vclock *vclock = info_to_vclock(ptp); struct ptp_clock *pptp = vclock->pclock; struct timespec64 pts; int err; u64 ns; err = pptp->info->getcyclesx64(pptp->info, &pts, sts); if (err) return err; if (mutex_lock_interruptible(&vclock->lock)) return -EINTR; ns = timecounter_cyc2time(&vclock->tc, timespec64_to_ns(&pts)); mutex_unlock(&vclock->lock); *ts = ns_to_timespec64(ns); return 0; } static int ptp_vclock_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { struct ptp_vclock *vclock = info_to_vclock(ptp); u64 ns = timespec64_to_ns(ts); if (mutex_lock_interruptible(&vclock->lock)) return -EINTR; timecounter_init(&vclock->tc, &vclock->cc, ns); mutex_unlock(&vclock->lock); return 0; } static int ptp_vclock_getcrosststamp(struct ptp_clock_info *ptp, struct system_device_crosststamp *xtstamp) { struct ptp_vclock *vclock = info_to_vclock(ptp); struct ptp_clock *pptp = vclock->pclock; int err; u64 ns; err = pptp->info->getcrosscycles(pptp->info, xtstamp); if (err) return err; if (mutex_lock_interruptible(&vclock->lock)) return -EINTR; ns = timecounter_cyc2time(&vclock->tc, ktime_to_ns(xtstamp->device)); mutex_unlock(&vclock->lock); xtstamp->device = ns_to_ktime(ns); return 0; } static long ptp_vclock_refresh(struct ptp_clock_info *ptp) { struct ptp_vclock *vclock = info_to_vclock(ptp); struct timespec64 ts; ptp_vclock_gettime(&vclock->info, &ts); return PTP_VCLOCK_REFRESH_INTERVAL; } static const struct ptp_clock_info ptp_vclock_info = { .owner = THIS_MODULE, .name = "ptp virtual clock", .max_adj = 500000000, .adjfine = ptp_vclock_adjfine, .adjtime = ptp_vclock_adjtime, .settime64 = ptp_vclock_settime, .do_aux_work = ptp_vclock_refresh, }; static u64 ptp_vclock_read(const struct cyclecounter *cc) { struct ptp_vclock *vclock = cc_to_vclock(cc); struct ptp_clock *ptp = vclock->pclock; struct timespec64 ts = {}; ptp->info->getcycles64(ptp->info, &ts); return timespec64_to_ns(&ts); } static const struct cyclecounter ptp_vclock_cc = { .read = ptp_vclock_read, .mask = CYCLECOUNTER_MASK(32), .mult = PTP_VCLOCK_CC_MULT, .shift = PTP_VCLOCK_CC_SHIFT, }; struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) { struct ptp_vclock *vclock; vclock = kzalloc(sizeof(*vclock), GFP_KERNEL); if (!vclock) return NULL; vclock->pclock = pclock; vclock->info = ptp_vclock_info; if (pclock->info->getcyclesx64) vclock->info.gettimex64 = ptp_vclock_gettimex; else vclock->info.gettime64 = ptp_vclock_gettime; if (pclock->info->getcrosscycles) vclock->info.getcrosststamp = ptp_vclock_getcrosststamp; vclock->cc = ptp_vclock_cc; snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt", pclock->index); INIT_HLIST_NODE(&vclock->vclock_hash_node); mutex_init(&vclock->lock); vclock->clock = ptp_clock_register(&vclock->info, &pclock->dev); if (IS_ERR_OR_NULL(vclock->clock)) { kfree(vclock); return NULL; } timecounter_init(&vclock->tc, &vclock->cc, 0); ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL); ptp_vclock_hash_add(vclock); return vclock; } void ptp_vclock_unregister(struct ptp_vclock *vclock) { ptp_vclock_hash_del(vclock); ptp_clock_unregister(vclock->clock); kfree(vclock); } #if IS_BUILTIN(CONFIG_PTP_1588_CLOCK) int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { char name[PTP_CLOCK_NAME_LEN] = ""; struct ptp_clock *ptp; struct device *dev; int num = 0; if (pclock_index < 0) return num; snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", pclock_index); dev = class_find_device_by_name(&ptp_class, name); if (!dev) return num; ptp = dev_get_drvdata(dev); if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) { put_device(dev); return num; } *vclock_index = kzalloc(sizeof(int) * ptp->n_vclocks, GFP_KERNEL); if (!(*vclock_index)) goto out; memcpy(*vclock_index, ptp->vclock_index, sizeof(int) * ptp->n_vclocks); num = ptp->n_vclocks; out: mutex_unlock(&ptp->n_vclocks_mux); put_device(dev); return num; } EXPORT_SYMBOL(ptp_get_vclocks_index); ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index) { unsigned int hash = vclock_index % HASH_SIZE(vclock_hash); struct ptp_vclock *vclock; u64 ns; u64 vclock_ns = 0; ns = ktime_to_ns(*hwtstamp); rcu_read_lock(); hlist_for_each_entry_rcu(vclock, &vclock_hash[hash], vclock_hash_node) { if (vclock->clock->index != vclock_index) continue; if (mutex_lock_interruptible(&vclock->lock)) break; vclock_ns = timecounter_cyc2time(&vclock->tc, ns); mutex_unlock(&vclock->lock); break; } rcu_read_unlock(); return ns_to_ktime(vclock_ns); } EXPORT_SYMBOL(ptp_convert_timestamp); #endif |
4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | // SPDX-License-Identifier: GPL-2.0-only /* IP tables module for matching the value of the IPv4/IPv6 DSCP field * * (C) 2002 by Harald Welte <laforge@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/dsfield.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_dscp.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: DSCP/TOS field match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_dscp"); MODULE_ALIAS("ip6t_dscp"); MODULE_ALIAS("ipt_tos"); MODULE_ALIAS("ip6t_tos"); static bool dscp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; return (dscp == info->dscp) ^ !!info->invert; } static bool dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; return (dscp == info->dscp) ^ !!info->invert; } static int dscp_mt_check(const struct xt_mtchk_param *par) { const struct xt_dscp_info *info = par->matchinfo; if (info->dscp > XT_DSCP_MAX) return -EDOM; return 0; } static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_tos_match_info *info = par->matchinfo; if (xt_family(par) == NFPROTO_IPV4) return ((ip_hdr(skb)->tos & info->tos_mask) == info->tos_value) ^ !!info->invert; else return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) == info->tos_value) ^ !!info->invert; } static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "dscp", .family = NFPROTO_IPV4, .checkentry = dscp_mt_check, .match = dscp_mt, .matchsize = sizeof(struct xt_dscp_info), .me = THIS_MODULE, }, { .name = "dscp", .family = NFPROTO_IPV6, .checkentry = dscp_mt_check, .match = dscp_mt6, .matchsize = sizeof(struct xt_dscp_info), .me = THIS_MODULE, }, { .name = "tos", .revision = 1, .family = NFPROTO_IPV4, .match = tos_mt, .matchsize = sizeof(struct xt_tos_match_info), .me = THIS_MODULE, }, { .name = "tos", .revision = 1, .family = NFPROTO_IPV6, .match = tos_mt, .matchsize = sizeof(struct xt_tos_match_info), .me = THIS_MODULE, }, }; static int __init dscp_mt_init(void) { return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg)); } static void __exit dscp_mt_exit(void) { xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg)); } module_init(dscp_mt_init); module_exit(dscp_mt_exit); |
6 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 | /* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/node.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ /* start node id of a node block dedicated to the given node id */ #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) /* node block offset on the NAT area dedicated to the given start node id */ #define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ #define FREE_NID_PAGES 8 #define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) /* size of free nid batch when shrinking */ #define SHRINK_NID_BATCH_SIZE 8 #define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */ /* maximum readahead size for node during getting data blocks */ #define MAX_RA_NODE 128 /* control the memory footprint threshold (10MB per 1GB ram) */ #define DEF_RAM_THRESHOLD 1 /* control dirty nats ratio threshold (default: 10% over max nid count) */ #define DEF_DIRTY_NAT_RATIO_THRESHOLD 10 /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 /* control total # of node writes used for roll-fowrad recovery */ #define DEF_RF_NODE_BLOCKS 0 /* vector size for gang look-up from nat cache that consists of radix tree */ #define NAT_VEC_SIZE 32 /* return value for read_node_page */ #define LOCKED_PAGE 1 /* check pinned file's alignment status of physical blocks */ #define FILE_NOT_ALIGNED 1 /* For flag in struct node_info */ enum { IS_CHECKPOINTED, /* is it checkpointed before? */ HAS_FSYNCED_INODE, /* is the inode fsynced before? */ HAS_LAST_FSYNC, /* has the latest node fsync mark? */ IS_DIRTY, /* this nat entry is dirty? */ IS_PREALLOC, /* nat entry is preallocated */ }; /* * For node information */ struct node_info { nid_t nid; /* node id */ nid_t ino; /* inode number of the node's owner */ block_t blk_addr; /* block address of the node */ unsigned char version; /* version of the node */ unsigned char flag; /* for node information bits */ }; struct nat_entry { struct list_head list; /* for clean or dirty nat list */ struct node_info ni; /* in-memory node information */ }; #define nat_get_nid(nat) ((nat)->ni.nid) #define nat_set_nid(nat, n) ((nat)->ni.nid = (n)) #define nat_get_blkaddr(nat) ((nat)->ni.blk_addr) #define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b)) #define nat_get_ino(nat) ((nat)->ni.ino) #define nat_set_ino(nat, i) ((nat)->ni.ino = (i)) #define nat_get_version(nat) ((nat)->ni.version) #define nat_set_version(nat, v) ((nat)->ni.version = (v)) #define inc_node_version(version) (++(version)) static inline void copy_node_info(struct node_info *dst, struct node_info *src) { dst->nid = src->nid; dst->ino = src->ino; dst->blk_addr = src->blk_addr; dst->version = src->version; /* should not copy flag here */ } static inline void set_nat_flag(struct nat_entry *ne, unsigned int type, bool set) { if (set) ne->ni.flag |= BIT(type); else ne->ni.flag &= ~BIT(type); } static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) { return ne->ni.flag & BIT(type); } static inline void nat_reset_flag(struct nat_entry *ne) { /* these states can be set only after checkpoint was done */ set_nat_flag(ne, IS_CHECKPOINTED, true); set_nat_flag(ne, HAS_FSYNCED_INODE, false); set_nat_flag(ne, HAS_LAST_FSYNC, true); } static inline void node_info_from_raw_nat(struct node_info *ni, struct f2fs_nat_entry *raw_ne) { ni->ino = le32_to_cpu(raw_ne->ino); ni->blk_addr = le32_to_cpu(raw_ne->block_addr); ni->version = raw_ne->version; } static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, struct node_info *ni) { raw_ne->ino = cpu_to_le32(ni->ino); raw_ne->block_addr = cpu_to_le32(ni->blk_addr); raw_ne->version = ni->version; } static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) { return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid * NM_I(sbi)->dirty_nats_ratio / 100; } static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) { return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; } enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ READ_EXTENT_CACHE, /* indicates read extent cache */ AGE_EXTENT_CACHE, /* indicates age extent cache */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */ BASE_CHECK, /* check kernel status */ }; struct nat_entry_set { struct list_head set_list; /* link with other nat sets */ struct list_head entry_list; /* link with dirty nat entries */ nid_t set; /* set number*/ unsigned int entry_cnt; /* the # of nat entries in set */ }; struct free_nid { struct list_head list; /* for free node id list */ nid_t nid; /* node id */ int state; /* in use or not: FREE_NID or PREALLOC_NID */ }; static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *fnid; spin_lock(&nm_i->nid_list_lock); if (nm_i->nid_cnt[FREE_NID] <= 0) { spin_unlock(&nm_i->nid_list_lock); return; } fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = fnid->nid; spin_unlock(&nm_i->nid_list_lock); } /* * inline functions */ static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) { struct f2fs_nm_info *nm_i = NM_I(sbi); #ifdef CONFIG_F2FS_CHECK_FS if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir, nm_i->bitmap_size)) f2fs_bug_on(sbi, 1); #endif memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); } static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) { struct f2fs_nm_info *nm_i = NM_I(sbi); pgoff_t block_off; pgoff_t block_addr; /* * block_off = segment_off * 512 + off_in_segment * OLD = (segment_off * 512) * 2 + off_in_segment * NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment */ block_off = NAT_BLOCK_OFFSET(start); block_addr = (pgoff_t)(nm_i->nat_blkaddr + (block_off << 1) - (block_off & (BLKS_PER_SEG(sbi) - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) block_addr += BLKS_PER_SEG(sbi); return block_addr; } static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, pgoff_t block_addr) { struct f2fs_nm_info *nm_i = NM_I(sbi); block_addr -= nm_i->nat_blkaddr; block_addr ^= BIT(sbi->log_blocks_per_seg); return block_addr + nm_i->nat_blkaddr; } static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); f2fs_change_bit(block_off, nm_i->nat_bitmap); #ifdef CONFIG_F2FS_CHECK_FS f2fs_change_bit(block_off, nm_i->nat_bitmap_mir); #endif } static inline nid_t ino_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.ino); } static inline nid_t nid_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.nid); } static inline unsigned int ofs_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } static inline __u64 cpver_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le64_to_cpu(rn->footer.cp_ver); } static inline block_t next_blkaddr_of_node(struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.next_blkaddr); } static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int old_flag = 0; if (reset) memset(rn, 0, sizeof(*rn)); else old_flag = le32_to_cpu(rn->footer.flag); rn->footer.nid = cpu_to_le32(nid); rn->footer.ino = cpu_to_le32(ino); /* should remain old flag bits such as COLD_BIT_SHIFT */ rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) | (old_flag & OFFSET_BIT_MASK)); } static inline void copy_node_footer(struct page *dst, struct page *src) { struct f2fs_node *src_rn = F2FS_NODE(src); struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); struct f2fs_node *rn = F2FS_NODE(page); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); rn->footer.cp_ver = cpu_to_le64(cp_ver); rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } static inline bool is_recoverable_dnode(struct page *page) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); __u64 cp_ver = cur_cp_version(ckpt); /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) return (cp_ver << 32) == (cpver_of_node(page) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); return cp_ver == cpver_of_node(page); } /* * f2fs assigns the following node offsets described as (num). * N = NIDS_PER_BLOCK * * Inode block (0) * |- direct node (1) * |- direct node (2) * |- indirect node (3) * | `- direct node (4 => 4 + N - 1) * |- indirect node (4 + N) * | `- direct node (5 + N => 5 + 2N - 1) * `- double indirect node (5 + 2N) * `- indirect node (6 + 2N) * `- direct node * ...... * `- indirect node ((6 + 2N) + x(N + 1)) * `- direct node * ...... * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) * `- direct node */ static inline bool IS_DNODE(struct page *node_page) { unsigned int ofs = ofs_of_node(node_page); if (f2fs_has_xattr_block(ofs)) return true; if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || ofs == 5 + 2 * NIDS_PER_BLOCK) return false; if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { ofs -= 6 + 2 * NIDS_PER_BLOCK; if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) return false; } return true; } static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); f2fs_wait_on_page_writeback(p, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); else rn->in.nid[off] = cpu_to_le32(nid); return set_page_dirty(p); } static inline nid_t get_nid(struct page *p, int off, bool i) { struct f2fs_node *rn = F2FS_NODE(p); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); return le32_to_cpu(rn->in.nid[off]); } /* * Coldness identification: * - Mark cold files in f2fs_inode_info * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ static inline int is_node(struct page *page, int type) { struct f2fs_node *rn = F2FS_NODE(page); return le32_to_cpu(rn->footer.flag) & BIT(type); } #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) #define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) flag &= ~BIT(COLD_BIT_SHIFT); else flag |= BIT(COLD_BIT_SHIFT); rn->footer.flag = cpu_to_le32(flag); } static inline void set_mark(struct page *page, int mark, int type) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); else flag &= ~BIT(type); rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS f2fs_inode_chksum_set(F2FS_P_SB(page), page); #endif } #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) |
190 189 5 5 5 3083 3085 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | // SPDX-License-Identifier: GPL-2.0 /* * Block rq-qos policy for assigning an I/O priority class to requests. * * Using an rq-qos policy for assigning I/O priority class has two advantages * over using the ioprio_set() system call: * * - This policy is cgroup based so it has all the advantages of cgroups. * - While ioprio_set() does not affect page cache writeback I/O, this rq-qos * controller affects page cache writeback I/O for filesystems that support * assiociating a cgroup with writeback I/O. See also * Documentation/admin-guide/cgroup-v2.rst. */ #include <linux/blk-mq.h> #include <linux/blk_types.h> #include <linux/kernel.h> #include <linux/module.h> #include "blk-cgroup.h" #include "blk-ioprio.h" #include "blk-rq-qos.h" /** * enum prio_policy - I/O priority class policy. * @POLICY_NO_CHANGE: (default) do not modify the I/O priority class. * @POLICY_PROMOTE_TO_RT: modify no-IOPRIO_CLASS_RT to IOPRIO_CLASS_RT. * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into * IOPRIO_CLASS_BE. * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE. * @POLICY_NONE_TO_RT: an alias for POLICY_PROMOTE_TO_RT. * * See also <linux/ioprio.h>. */ enum prio_policy { POLICY_NO_CHANGE = 0, POLICY_PROMOTE_TO_RT = 1, POLICY_RESTRICT_TO_BE = 2, POLICY_ALL_TO_IDLE = 3, POLICY_NONE_TO_RT = 4, }; static const char *policy_name[] = { [POLICY_NO_CHANGE] = "no-change", [POLICY_PROMOTE_TO_RT] = "promote-to-rt", [POLICY_RESTRICT_TO_BE] = "restrict-to-be", [POLICY_ALL_TO_IDLE] = "idle", [POLICY_NONE_TO_RT] = "none-to-rt", }; static struct blkcg_policy ioprio_policy; /** * struct ioprio_blkg - Per (cgroup, request queue) data. * @pd: blkg_policy_data structure. */ struct ioprio_blkg { struct blkg_policy_data pd; }; /** * struct ioprio_blkcg - Per cgroup data. * @cpd: blkcg_policy_data structure. * @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>. */ struct ioprio_blkcg { struct blkcg_policy_data cpd; enum prio_policy prio_policy; }; static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL; } static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg) { return container_of(blkcg_to_cpd(blkcg, &ioprio_policy), struct ioprio_blkcg, cpd); } static struct ioprio_blkcg * ioprio_blkcg_from_css(struct cgroup_subsys_state *css) { return blkcg_to_ioprio_blkcg(css_to_blkcg(css)); } static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio) { struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy); if (!pd) return NULL; return blkcg_to_ioprio_blkcg(pd->blkg->blkcg); } static int ioprio_show_prio_policy(struct seq_file *sf, void *v) { struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf)); seq_printf(sf, "%s\n", policy_name[blkcg->prio_policy]); return 0; } static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(of_css(of)); int ret; if (off != 0) return -EIO; /* kernfs_fop_write_iter() terminates 'buf' with '\0'. */ ret = sysfs_match_string(policy_name, buf); if (ret < 0) return ret; blkcg->prio_policy = ret; return nbytes; } static struct blkg_policy_data * ioprio_alloc_pd(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { struct ioprio_blkg *ioprio_blkg; ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp); if (!ioprio_blkg) return NULL; return &ioprio_blkg->pd; } static void ioprio_free_pd(struct blkg_policy_data *pd) { struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd); kfree(ioprio_blkg); } static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp) { struct ioprio_blkcg *blkcg; blkcg = kzalloc(sizeof(*blkcg), gfp); if (!blkcg) return NULL; blkcg->prio_policy = POLICY_NO_CHANGE; return &blkcg->cpd; } static void ioprio_free_cpd(struct blkcg_policy_data *cpd) { struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd); kfree(blkcg); } #define IOPRIO_ATTRS \ { \ .name = "prio.class", \ .seq_show = ioprio_show_prio_policy, \ .write = ioprio_set_prio_policy, \ }, \ { } /* sentinel */ /* cgroup v2 attributes */ static struct cftype ioprio_files[] = { IOPRIO_ATTRS }; /* cgroup v1 attributes */ static struct cftype ioprio_legacy_files[] = { IOPRIO_ATTRS }; static struct blkcg_policy ioprio_policy = { .dfl_cftypes = ioprio_files, .legacy_cftypes = ioprio_legacy_files, .cpd_alloc_fn = ioprio_alloc_cpd, .cpd_free_fn = ioprio_free_cpd, .pd_alloc_fn = ioprio_alloc_pd, .pd_free_fn = ioprio_free_pd, }; void blkcg_set_ioprio(struct bio *bio) { struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio); u16 prio; if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE) return; if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT || blkcg->prio_policy == POLICY_NONE_TO_RT) { /* * For RT threads, the default priority level is 4 because * task_nice is 0. By promoting non-RT io-priority to RT-class * and default level 4, those requests that are already * RT-class but need a higher io-priority can use ioprio_set() * to achieve this. */ if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) != IOPRIO_CLASS_RT) bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 4); return; } /* * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers * correspond to a lower priority. Hence, the max_t() below selects * the lower priority of bi_ioprio and the cgroup I/O priority class. * If the bio I/O priority equals IOPRIO_CLASS_NONE, the cgroup I/O * priority is assigned to the bio. */ prio = max_t(u16, bio->bi_ioprio, IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0)); if (prio > bio->bi_ioprio) bio->bi_ioprio = prio; } void blk_ioprio_exit(struct gendisk *disk) { blkcg_deactivate_policy(disk, &ioprio_policy); } int blk_ioprio_init(struct gendisk *disk) { return blkcg_activate_policy(disk, &ioprio_policy); } static int __init ioprio_init(void) { return blkcg_policy_register(&ioprio_policy); } static void __exit ioprio_exit(void) { blkcg_policy_unregister(&ioprio_policy); } module_init(ioprio_init); module_exit(ioprio_exit); |
2562 2558 21 16 16 1 17 16 16 16 16 5 5 16 16 21 1 1 5 5 1 1 1 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 | // SPDX-License-Identifier: GPL-2.0 /* * property.c - Unified device property interface. * * Copyright (C) 2014, Intel Corporation * Authors: Rafael J. Wysocki <rafael.j.wysocki@intel.com> * Mika Westerberg <mika.westerberg@linux.intel.com> */ #include <linux/device.h> #include <linux/err.h> #include <linux/export.h> #include <linux/kconfig.h> #include <linux/of.h> #include <linux/property.h> #include <linux/phy.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> struct fwnode_handle *__dev_fwnode(struct device *dev) { return IS_ENABLED(CONFIG_OF) && dev->of_node ? of_fwnode_handle(dev->of_node) : dev->fwnode; } EXPORT_SYMBOL_GPL(__dev_fwnode); const struct fwnode_handle *__dev_fwnode_const(const struct device *dev) { return IS_ENABLED(CONFIG_OF) && dev->of_node ? of_fwnode_handle(dev->of_node) : dev->fwnode; } EXPORT_SYMBOL_GPL(__dev_fwnode_const); /** * device_property_present - check if a property of a device is present * @dev: Device whose property is being checked * @propname: Name of the property * * Check if property @propname is present in the device firmware description. * * Return: true if property @propname is present. Otherwise, returns false. */ bool device_property_present(const struct device *dev, const char *propname) { return fwnode_property_present(dev_fwnode(dev), propname); } EXPORT_SYMBOL_GPL(device_property_present); /** * fwnode_property_present - check if a property of a firmware node is present * @fwnode: Firmware node whose property to check * @propname: Name of the property * * Return: true if property @propname is present. Otherwise, returns false. */ bool fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname) { bool ret; if (IS_ERR_OR_NULL(fwnode)) return false; ret = fwnode_call_bool_op(fwnode, property_present, propname); if (ret) return ret; return fwnode_call_bool_op(fwnode->secondary, property_present, propname); } EXPORT_SYMBOL_GPL(fwnode_property_present); /** * device_property_read_u8_array - return a u8 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u8 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u8() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u8_array(const struct device *dev, const char *propname, u8 *val, size_t nval) { return fwnode_property_read_u8_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u8_array); /** * device_property_read_u16_array - return a u16 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u16 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u16() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u16_array(const struct device *dev, const char *propname, u16 *val, size_t nval) { return fwnode_property_read_u16_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u16_array); /** * device_property_read_u32_array - return a u32 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u32 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u32() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u32_array(const struct device *dev, const char *propname, u32 *val, size_t nval) { return fwnode_property_read_u32_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u32_array); /** * device_property_read_u64_array - return a u64 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u64 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u64() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u64_array(const struct device *dev, const char *propname, u64 *val, size_t nval) { return fwnode_property_read_u64_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u64_array); /** * device_property_read_string_array - return a string array property of device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of string properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_string_array_count() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values read on success if @val is non-NULL, * number of values available on success if @val is NULL, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not an array of strings, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_string_array(const struct device *dev, const char *propname, const char **val, size_t nval) { return fwnode_property_read_string_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_string_array); /** * device_property_read_string - return a string property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The value is stored here * * Function reads property @propname from the device firmware description and * stores the value into @val if found. The value is checked to be a string. * * Return: %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property type is not a string. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_string(const struct device *dev, const char *propname, const char **val) { return fwnode_property_read_string(dev_fwnode(dev), propname, val); } EXPORT_SYMBOL_GPL(device_property_read_string); /** * device_property_match_string - find a string in an array and return index * @dev: Device to get the property of * @propname: Name of the property holding the array * @string: String to look for * * Find a given string in a string array and if it is found return the * index back. * * Return: index, starting from %0, if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of strings, * %-ENXIO if no suitable firmware interface is present. */ int device_property_match_string(const struct device *dev, const char *propname, const char *string) { return fwnode_property_match_string(dev_fwnode(dev), propname, string); } EXPORT_SYMBOL_GPL(device_property_match_string); static int fwnode_property_read_int_array(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, size_t nval) { int ret; if (IS_ERR_OR_NULL(fwnode)) return -EINVAL; ret = fwnode_call_int_op(fwnode, property_read_int_array, propname, elem_size, val, nval); if (ret != -EINVAL) return ret; return fwnode_call_int_op(fwnode->secondary, property_read_int_array, propname, elem_size, val, nval); } /** * fwnode_property_read_u8_array - return a u8 array property of firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u8 properties with @propname from @fwnode and stores them to * @val if found. * * It's recommended to call fwnode_property_count_u8() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode, const char *propname, u8 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u8), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array); /** * fwnode_property_read_u16_array - return a u16 array property of firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u16 properties with @propname from @fwnode and store them to * @val if found. * * It's recommended to call fwnode_property_count_u16() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u16_array(const struct fwnode_handle *fwnode, const char *propname, u16 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u16), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array); /** * fwnode_property_read_u32_array - return a u32 array property of firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u32 properties with @propname from @fwnode store them to * @val if found. * * It's recommended to call fwnode_property_count_u32() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u32_array(const struct fwnode_handle *fwnode, const char *propname, u32 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u32), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array); /** * fwnode_property_read_u64_array - return a u64 array property firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u64 properties with @propname from @fwnode and store them to * @val if found. * * It's recommended to call fwnode_property_count_u64() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u64_array(const struct fwnode_handle *fwnode, const char *propname, u64 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u64), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array); /** * fwnode_property_read_string_array - return string array property of a node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an string list property @propname from the given firmware node and store * them to @val if found. * * It's recommended to call fwnode_property_string_array_count() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values read on success if @val is non-NULL, * number of values available on success if @val is NULL, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not an array of strings, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval) { int ret; if (IS_ERR_OR_NULL(fwnode)) return -EINVAL; ret = fwnode_call_int_op(fwnode, property_read_string_array, propname, val, nval); if (ret != -EINVAL) return ret; return fwnode_call_int_op(fwnode->secondary, property_read_string_array, propname, val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_string_array); /** * fwnode_property_read_string - return a string property of a firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The value is stored here * * Read property @propname from the given firmware node and store the value into * @val if found. The value is checked to be a string. * * Return: %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not a string, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_string(const struct fwnode_handle *fwnode, const char *propname, const char **val) { int ret = fwnode_property_read_string_array(fwnode, propname, val, 1); return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(fwnode_property_read_string); /** * fwnode_property_match_string - find a string in an array and return index * @fwnode: Firmware node to get the property of * @propname: Name of the property holding the array * @string: String to look for * * Find a given string in a string array and if it is found return the * index back. * * Return: index, starting from %0, if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of strings, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_match_string(const struct fwnode_handle *fwnode, const char *propname, const char *string) { const char **values; int nval, ret; nval = fwnode_property_string_array_count(fwnode, propname); if (nval < 0) return nval; if (nval == 0) return -ENODATA; values = kcalloc(nval, sizeof(*values), GFP_KERNEL); if (!values) return -ENOMEM; ret = fwnode_property_read_string_array(fwnode, propname, values, nval); if (ret < 0) goto out_free; ret = match_string(values, nval, string); if (ret < 0) ret = -ENODATA; out_free: kfree(values); return ret; } EXPORT_SYMBOL_GPL(fwnode_property_match_string); /** * fwnode_property_match_property_string - find a property string value in an array and return index * @fwnode: Firmware node to get the property of * @propname: Name of the property holding the string value * @array: String array to search in * @n: Size of the @array * * Find a property string value in a given @array and if it is found return * the index back. * * Return: index, starting from %0, if the string value was found in the @array (success), * %-ENOENT when the string value was not found in the @array, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not a string, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_match_property_string(const struct fwnode_handle *fwnode, const char *propname, const char * const *array, size_t n) { const char *string; int ret; ret = fwnode_property_read_string(fwnode, propname, &string); if (ret) return ret; ret = match_string(array, n, string); if (ret < 0) ret = -ENOENT; return ret; } EXPORT_SYMBOL_GPL(fwnode_property_match_property_string); /** * fwnode_property_get_reference_args() - Find a reference with arguments * @fwnode: Firmware node where to look for the reference * @prop: The name of the property * @nargs_prop: The name of the property telling the number of * arguments in the referred node. NULL if @nargs is known, * otherwise @nargs is ignored. Only relevant on OF. * @nargs: Number of arguments. Ignored if @nargs_prop is non-NULL. * @index: Index of the reference, from zero onwards. * @args: Result structure with reference and integer arguments. * May be NULL. * * Obtain a reference based on a named property in an fwnode, with * integer arguments. * * The caller is responsible for calling fwnode_handle_put() on the returned * @args->fwnode pointer. * * Return: %0 on success * %-ENOENT when the index is out of bounds, the index has an empty * reference or the property was not found * %-EINVAL on parse error */ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args) { int ret; if (IS_ERR_OR_NULL(fwnode)) return -ENOENT; ret = fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop, nargs, index, args); if (ret == 0) return ret; if (IS_ERR_OR_NULL(fwnode->secondary)) return ret; return fwnode_call_int_op(fwnode->secondary, get_reference_args, prop, nargs_prop, nargs, index, args); } EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args); /** * fwnode_find_reference - Find named reference to a fwnode_handle * @fwnode: Firmware node where to look for the reference * @name: The name of the reference * @index: Index of the reference * * @index can be used when the named reference holds a table of references. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: a pointer to the reference fwnode, when found. Otherwise, * returns an error pointer. */ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, const char *name, unsigned int index) { struct fwnode_reference_args args; int ret; ret = fwnode_property_get_reference_args(fwnode, name, NULL, 0, index, &args); return ret ? ERR_PTR(ret) : args.fwnode; } EXPORT_SYMBOL_GPL(fwnode_find_reference); /** * fwnode_get_name - Return the name of a node * @fwnode: The firmware node * * Return: a pointer to the node name, or %NULL. */ const char *fwnode_get_name(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, get_name); } EXPORT_SYMBOL_GPL(fwnode_get_name); /** * fwnode_get_name_prefix - Return the prefix of node for printing purposes * @fwnode: The firmware node * * Return: the prefix of a node, intended to be printed right before the node. * The prefix works also as a separator between the nodes. */ const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, get_name_prefix); } /** * fwnode_name_eq - Return true if node name is equal * @fwnode: The firmware node * @name: The name to which to compare the node name * * Compare the name provided as an argument to the name of the node, stopping * the comparison at either NUL or '@' character, whichever comes first. This * function is generally used for comparing node names while ignoring the * possible unit address of the node. * * Return: true if the node name matches with the name provided in the @name * argument, false otherwise. */ bool fwnode_name_eq(const struct fwnode_handle *fwnode, const char *name) { const char *node_name; ptrdiff_t len; node_name = fwnode_get_name(fwnode); if (!node_name) return false; len = strchrnul(node_name, '@') - node_name; return str_has_prefix(node_name, name) == len; } EXPORT_SYMBOL_GPL(fwnode_name_eq); /** * fwnode_get_parent - Return parent firwmare node * @fwnode: Firmware whose parent is retrieved * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: parent firmware node of the given node if possible or %NULL if no * parent was available. */ struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, get_parent); } EXPORT_SYMBOL_GPL(fwnode_get_parent); /** * fwnode_get_next_parent - Iterate to the node's parent * @fwnode: Firmware whose parent is retrieved * * This is like fwnode_get_parent() except that it drops the refcount * on the passed node, making it suitable for iterating through a * node's parents. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @fwnode * unconditionally. * * Return: parent firmware node of the given node if possible or %NULL if no * parent was available. */ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode) { struct fwnode_handle *parent = fwnode_get_parent(fwnode); fwnode_handle_put(fwnode); return parent; } EXPORT_SYMBOL_GPL(fwnode_get_next_parent); /** * fwnode_count_parents - Return the number of parents a node has * @fwnode: The node the parents of which are to be counted * * Return: the number of parents a node has. */ unsigned int fwnode_count_parents(const struct fwnode_handle *fwnode) { struct fwnode_handle *parent; unsigned int count = 0; fwnode_for_each_parent_node(fwnode, parent) count++; return count; } EXPORT_SYMBOL_GPL(fwnode_count_parents); /** * fwnode_get_nth_parent - Return an nth parent of a node * @fwnode: The node the parent of which is requested * @depth: Distance of the parent from the node * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the nth parent of a node. If there is no parent at the requested * @depth, %NULL is returned. If @depth is 0, the functionality is equivalent to * fwnode_handle_get(). For @depth == 1, it is fwnode_get_parent() and so on. */ struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwnode, unsigned int depth) { struct fwnode_handle *parent; if (depth == 0) return fwnode_handle_get(fwnode); fwnode_for_each_parent_node(fwnode, parent) { if (--depth == 0) return parent; } return NULL; } EXPORT_SYMBOL_GPL(fwnode_get_nth_parent); /** * fwnode_get_next_child_node - Return the next child node handle for a node * @fwnode: Firmware node to find the next child node for. * @child: Handle to one of the node's child nodes or a %NULL handle. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @child * unconditionally. */ struct fwnode_handle * fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return fwnode_call_ptr_op(fwnode, get_next_child_node, child); } EXPORT_SYMBOL_GPL(fwnode_get_next_child_node); /** * fwnode_get_next_available_child_node - Return the next available child node handle for a node * @fwnode: Firmware node to find the next child node for. * @child: Handle to one of the node's child nodes or a %NULL handle. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @child * unconditionally. */ struct fwnode_handle * fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { struct fwnode_handle *next_child = child; if (IS_ERR_OR_NULL(fwnode)) return NULL; do { next_child = fwnode_get_next_child_node(fwnode, next_child); if (!next_child) return NULL; } while (!fwnode_device_is_available(next_child)); return next_child; } EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node); /** * device_get_next_child_node - Return the next child node handle for a device * @dev: Device to find the next child node for. * @child: Handle to one of the device's child nodes or a %NULL handle. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @child * unconditionally. */ struct fwnode_handle *device_get_next_child_node(const struct device *dev, struct fwnode_handle *child) { const struct fwnode_handle *fwnode = dev_fwnode(dev); struct fwnode_handle *next; if (IS_ERR_OR_NULL(fwnode)) return NULL; /* Try to find a child in primary fwnode */ next = fwnode_get_next_child_node(fwnode, child); if (next) return next; /* When no more children in primary, continue with secondary */ return fwnode_get_next_child_node(fwnode->secondary, child); } EXPORT_SYMBOL_GPL(device_get_next_child_node); /** * fwnode_get_named_child_node - Return first matching named child node handle * @fwnode: Firmware node to find the named child node for. * @childname: String to match child node name against. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname) { return fwnode_call_ptr_op(fwnode, get_named_child_node, childname); } EXPORT_SYMBOL_GPL(fwnode_get_named_child_node); /** * device_get_named_child_node - Return first matching named child node handle * @dev: Device to find the named child node for. * @childname: String to match child node name against. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle *device_get_named_child_node(const struct device *dev, const char *childname) { return fwnode_get_named_child_node(dev_fwnode(dev), childname); } EXPORT_SYMBOL_GPL(device_get_named_child_node); /** * fwnode_handle_get - Obtain a reference to a device node * @fwnode: Pointer to the device node to obtain the reference to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the fwnode handle. */ struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode) { if (!fwnode_has_op(fwnode, get)) return fwnode; return fwnode_call_ptr_op(fwnode, get); } EXPORT_SYMBOL_GPL(fwnode_handle_get); /** * fwnode_handle_put - Drop reference to a device node * @fwnode: Pointer to the device node to drop the reference to. * * This has to be used when terminating device_for_each_child_node() iteration * with break or return to prevent stale device node references from being left * behind. */ void fwnode_handle_put(struct fwnode_handle *fwnode) { fwnode_call_void_op(fwnode, put); } EXPORT_SYMBOL_GPL(fwnode_handle_put); /** * fwnode_device_is_available - check if a device is available for use * @fwnode: Pointer to the fwnode of the device. * * Return: true if device is available for use. Otherwise, returns false. * * For fwnode node types that don't implement the .device_is_available() * operation, this function returns true. */ bool fwnode_device_is_available(const struct fwnode_handle *fwnode) { if (IS_ERR_OR_NULL(fwnode)) return false; if (!fwnode_has_op(fwnode, device_is_available)) return true; return fwnode_call_bool_op(fwnode, device_is_available); } EXPORT_SYMBOL_GPL(fwnode_device_is_available); /** * device_get_child_node_count - return the number of child nodes for device * @dev: Device to cound the child nodes for * * Return: the number of child nodes for a given device. */ unsigned int device_get_child_node_count(const struct device *dev) { struct fwnode_handle *child; unsigned int count = 0; device_for_each_child_node(dev, child) count++; return count; } EXPORT_SYMBOL_GPL(device_get_child_node_count); bool device_dma_supported(const struct device *dev) { return fwnode_call_bool_op(dev_fwnode(dev), device_dma_supported); } EXPORT_SYMBOL_GPL(device_dma_supported); enum dev_dma_attr device_get_dma_attr(const struct device *dev) { if (!fwnode_has_op(dev_fwnode(dev), device_get_dma_attr)) return DEV_DMA_NOT_SUPPORTED; return fwnode_call_int_op(dev_fwnode(dev), device_get_dma_attr); } EXPORT_SYMBOL_GPL(device_get_dma_attr); /** * fwnode_get_phy_mode - Get phy mode for given firmware node * @fwnode: Pointer to the given node * * The function gets phy interface string from property 'phy-mode' or * 'phy-connection-type', and return its index in phy_modes table, or errno in * error case. */ int fwnode_get_phy_mode(const struct fwnode_handle *fwnode) { const char *pm; int err, i; err = fwnode_property_read_string(fwnode, "phy-mode", &pm); if (err < 0) err = fwnode_property_read_string(fwnode, "phy-connection-type", &pm); if (err < 0) return err; for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) if (!strcasecmp(pm, phy_modes(i))) return i; return -ENODEV; } EXPORT_SYMBOL_GPL(fwnode_get_phy_mode); /** * device_get_phy_mode - Get phy mode for given device * @dev: Pointer to the given device * * The function gets phy interface string from property 'phy-mode' or * 'phy-connection-type', and return its index in phy_modes table, or errno in * error case. */ int device_get_phy_mode(struct device *dev) { return fwnode_get_phy_mode(dev_fwnode(dev)); } EXPORT_SYMBOL_GPL(device_get_phy_mode); /** * fwnode_iomap - Maps the memory mapped IO for a given fwnode * @fwnode: Pointer to the firmware node * @index: Index of the IO range * * Return: a pointer to the mapped memory. */ void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index) { return fwnode_call_ptr_op(fwnode, iomap, index); } EXPORT_SYMBOL(fwnode_iomap); /** * fwnode_irq_get - Get IRQ directly from a fwnode * @fwnode: Pointer to the firmware node * @index: Zero-based index of the IRQ * * Return: Linux IRQ number on success. Negative errno on failure. */ int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index) { int ret; ret = fwnode_call_int_op(fwnode, irq_get, index); /* We treat mapping errors as invalid case */ if (ret == 0) return -EINVAL; return ret; } EXPORT_SYMBOL(fwnode_irq_get); /** * fwnode_irq_get_byname - Get IRQ from a fwnode using its name * @fwnode: Pointer to the firmware node * @name: IRQ name * * Description: * Find a match to the string @name in the 'interrupt-names' string array * in _DSD for ACPI, or of_node for Device Tree. Then get the Linux IRQ * number of the IRQ resource corresponding to the index of the matched * string. * * Return: Linux IRQ number on success, or negative errno otherwise. */ int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name) { int index; if (!name) return -EINVAL; index = fwnode_property_match_string(fwnode, "interrupt-names", name); if (index < 0) return index; return fwnode_irq_get(fwnode, index); } EXPORT_SYMBOL(fwnode_irq_get_byname); /** * fwnode_graph_get_next_endpoint - Get next endpoint firmware node * @fwnode: Pointer to the parent firmware node * @prev: Previous endpoint node or %NULL to get the first * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @prev * unconditionally. * * Return: an endpoint firmware node pointer or %NULL if no more endpoints * are available. */ struct fwnode_handle * fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { struct fwnode_handle *ep, *port_parent = NULL; const struct fwnode_handle *parent; /* * If this function is in a loop and the previous iteration returned * an endpoint from fwnode->secondary, then we need to use the secondary * as parent rather than @fwnode. */ if (prev) { port_parent = fwnode_graph_get_port_parent(prev); parent = port_parent; } else { parent = fwnode; } if (IS_ERR_OR_NULL(parent)) return NULL; ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev); if (ep) goto out_put_port_parent; ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL); out_put_port_parent: fwnode_handle_put(port_parent); return ep; } EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint); /** * fwnode_graph_get_port_parent - Return the device fwnode of a port endpoint * @endpoint: Endpoint firmware node of the port * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the firmware node of the device the @endpoint belongs to. */ struct fwnode_handle * fwnode_graph_get_port_parent(const struct fwnode_handle *endpoint) { struct fwnode_handle *port, *parent; port = fwnode_get_parent(endpoint); parent = fwnode_call_ptr_op(port, graph_get_port_parent); fwnode_handle_put(port); return parent; } EXPORT_SYMBOL_GPL(fwnode_graph_get_port_parent); /** * fwnode_graph_get_remote_port_parent - Return fwnode of a remote device * @fwnode: Endpoint firmware node pointing to the remote endpoint * * Extracts firmware node of a remote device the @fwnode points to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_graph_get_remote_port_parent(const struct fwnode_handle *fwnode) { struct fwnode_handle *endpoint, *parent; endpoint = fwnode_graph_get_remote_endpoint(fwnode); parent = fwnode_graph_get_port_parent(endpoint); fwnode_handle_put(endpoint); return parent; } EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent); /** * fwnode_graph_get_remote_port - Return fwnode of a remote port * @fwnode: Endpoint firmware node pointing to the remote endpoint * * Extracts firmware node of a remote port the @fwnode points to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_graph_get_remote_port(const struct fwnode_handle *fwnode) { return fwnode_get_next_parent(fwnode_graph_get_remote_endpoint(fwnode)); } EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port); /** * fwnode_graph_get_remote_endpoint - Return fwnode of a remote endpoint * @fwnode: Endpoint firmware node pointing to the remote endpoint * * Extracts firmware node of a remote endpoint the @fwnode points to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, graph_get_remote_endpoint); } EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint); static bool fwnode_graph_remote_available(struct fwnode_handle *ep) { struct fwnode_handle *dev_node; bool available; dev_node = fwnode_graph_get_remote_port_parent(ep); available = fwnode_device_is_available(dev_node); fwnode_handle_put(dev_node); return available; } /** * fwnode_graph_get_endpoint_by_id - get endpoint by port and endpoint numbers * @fwnode: parent fwnode_handle containing the graph * @port: identifier of the port node * @endpoint: identifier of the endpoint node under the port node * @flags: fwnode lookup flags * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the fwnode handle of the local endpoint corresponding the port and * endpoint IDs or %NULL if not found. * * If FWNODE_GRAPH_ENDPOINT_NEXT is passed in @flags and the specified endpoint * has not been found, look for the closest endpoint ID greater than the * specified one and return the endpoint that corresponds to it, if present. * * Does not return endpoints that belong to disabled devices or endpoints that * are unconnected, unless FWNODE_GRAPH_DEVICE_DISABLED is passed in @flags. */ struct fwnode_handle * fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode, u32 port, u32 endpoint, unsigned long flags) { struct fwnode_handle *ep, *best_ep = NULL; unsigned int best_ep_id = 0; bool endpoint_next = flags & FWNODE_GRAPH_ENDPOINT_NEXT; bool enabled_only = !(flags & FWNODE_GRAPH_DEVICE_DISABLED); fwnode_graph_for_each_endpoint(fwnode, ep) { struct fwnode_endpoint fwnode_ep = { 0 }; int ret; if (enabled_only && !fwnode_graph_remote_available(ep)) continue; ret = fwnode_graph_parse_endpoint(ep, &fwnode_ep); if (ret < 0) continue; if (fwnode_ep.port != port) continue; if (fwnode_ep.id == endpoint) return ep; if (!endpoint_next) continue; /* * If the endpoint that has just been found is not the first * matching one and the ID of the one found previously is closer * to the requested endpoint ID, skip it. */ if (fwnode_ep.id < endpoint || (best_ep && best_ep_id < fwnode_ep.id)) continue; fwnode_handle_put(best_ep); best_ep = fwnode_handle_get(ep); best_ep_id = fwnode_ep.id; } return best_ep; } EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_by_id); /** * fwnode_graph_get_endpoint_count - Count endpoints on a device node * @fwnode: The node related to a device * @flags: fwnode lookup flags * Count endpoints in a device node. * * If FWNODE_GRAPH_DEVICE_DISABLED flag is specified, also unconnected endpoints * and endpoints connected to disabled devices are counted. */ unsigned int fwnode_graph_get_endpoint_count(const struct fwnode_handle *fwnode, unsigned long flags) { struct fwnode_handle *ep; unsigned int count = 0; fwnode_graph_for_each_endpoint(fwnode, ep) { if (flags & FWNODE_GRAPH_DEVICE_DISABLED || fwnode_graph_remote_available(ep)) count++; } return count; } EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_count); /** * fwnode_graph_parse_endpoint - parse common endpoint node properties * @fwnode: pointer to endpoint fwnode_handle * @endpoint: pointer to the fwnode endpoint data structure * * Parse @fwnode representing a graph endpoint node and store the * information in @endpoint. The caller must hold a reference to * @fwnode. */ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint) { memset(endpoint, 0, sizeof(*endpoint)); return fwnode_call_int_op(fwnode, graph_parse_endpoint, endpoint); } EXPORT_SYMBOL(fwnode_graph_parse_endpoint); const void *device_get_match_data(const struct device *dev) { return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev); } EXPORT_SYMBOL_GPL(device_get_match_data); static unsigned int fwnode_graph_devcon_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len) { struct fwnode_handle *node; struct fwnode_handle *ep; unsigned int count = 0; void *ret; fwnode_graph_for_each_endpoint(fwnode, ep) { if (matches && count >= matches_len) { fwnode_handle_put(ep); break; } node = fwnode_graph_get_remote_port_parent(ep); if (!fwnode_device_is_available(node)) { fwnode_handle_put(node); continue; } ret = match(node, con_id, data); fwnode_handle_put(node); if (ret) { if (matches) matches[count] = ret; count++; } } return count; } static unsigned int fwnode_devcon_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len) { struct fwnode_handle *node; unsigned int count = 0; unsigned int i; void *ret; for (i = 0; ; i++) { if (matches && count >= matches_len) break; node = fwnode_find_reference(fwnode, con_id, i); if (IS_ERR(node)) break; ret = match(node, NULL, data); fwnode_handle_put(node); if (ret) { if (matches) matches[count] = ret; count++; } } return count; } /** * fwnode_connection_find_match - Find connection from a device node * @fwnode: Device node with the connection * @con_id: Identifier for the connection * @data: Data for the match function * @match: Function to check and convert the connection description * * Find a connection with unique identifier @con_id between @fwnode and another * device node. @match will be used to convert the connection description to * data the caller is expecting to be returned. */ void *fwnode_connection_find_match(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match) { unsigned int count; void *ret; if (!fwnode || !match) return NULL; count = fwnode_graph_devcon_matches(fwnode, con_id, data, match, &ret, 1); if (count) return ret; count = fwnode_devcon_matches(fwnode, con_id, data, match, &ret, 1); return count ? ret : NULL; } EXPORT_SYMBOL_GPL(fwnode_connection_find_match); /** * fwnode_connection_find_matches - Find connections from a device node * @fwnode: Device node with the connection * @con_id: Identifier for the connection * @data: Data for the match function * @match: Function to check and convert the connection description * @matches: (Optional) array of pointers to fill with matches * @matches_len: Length of @matches * * Find up to @matches_len connections with unique identifier @con_id between * @fwnode and other device nodes. @match will be used to convert the * connection description to data the caller is expecting to be returned * through the @matches array. * * If @matches is %NULL @matches_len is ignored and the total number of resolved * matches is returned. * * Return: Number of matches resolved, or negative errno. */ int fwnode_connection_find_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len) { unsigned int count_graph; unsigned int count_ref; if (!fwnode || !match) return -EINVAL; count_graph = fwnode_graph_devcon_matches(fwnode, con_id, data, match, matches, matches_len); if (matches) { matches += count_graph; matches_len -= count_graph; } count_ref = fwnode_devcon_matches(fwnode, con_id, data, match, matches, matches_len); return count_graph + count_ref; } EXPORT_SYMBOL_GPL(fwnode_connection_find_matches); |
13 5 162 161 40 9 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_TLBFLUSH_H #define _ASM_X86_TLBFLUSH_H #include <linux/mm_types.h> #include <linux/mmu_notifier.h> #include <linux/sched.h> #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> #include <asm/smp.h> #include <asm/invpcid.h> #include <asm/pti.h> #include <asm/processor-flags.h> #include <asm/pgtable.h> DECLARE_PER_CPU(u64, tlbstate_untag_mask); void __flush_tlb_all(void); #define TLB_FLUSH_ALL -1UL #define TLB_GENERATION_INVALID 0 void cr4_update_irqsoff(unsigned long set, unsigned long clear); unsigned long cr4_read_shadow(void); /* Set in this cpu's CR4. */ static inline void cr4_set_bits_irqsoff(unsigned long mask) { cr4_update_irqsoff(mask, 0); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits_irqsoff(unsigned long mask) { cr4_update_irqsoff(0, mask); } /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { unsigned long flags; local_irq_save(flags); cr4_set_bits_irqsoff(mask); local_irq_restore(flags); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits(unsigned long mask) { unsigned long flags; local_irq_save(flags); cr4_clear_bits_irqsoff(mask); local_irq_restore(flags); } #ifndef MODULE /* * 6 because 6 should be plenty and struct tlb_state will fit in two cache * lines. */ #define TLB_NR_DYN_ASIDS 6 struct tlb_context { u64 ctx_id; u64 tlb_gen; }; struct tlb_state { /* * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts * are on. This means that it may not match current->active_mm, * which will contain the previous user mm when we're in lazy TLB * mode even if we've already switched back to swapper_pg_dir. * * During switch_mm_irqs_off(), loaded_mm will be set to * LOADED_MM_SWITCHING during the brief interrupts-off window * when CR3 and loaded_mm would otherwise be inconsistent. This * is for nmi_uaccess_okay()'s benefit. */ struct mm_struct *loaded_mm; #define LOADED_MM_SWITCHING ((struct mm_struct *)1UL) /* Last user mm for optimizing IBPB */ union { struct mm_struct *last_user_mm; unsigned long last_user_mm_spec; }; u16 loaded_mm_asid; u16 next_asid; /* * If set we changed the page tables in such a way that we * needed an invalidation of all contexts (aka. PCIDs / ASIDs). * This tells us to go invalidate all the non-loaded ctxs[] * on the next context switch. * * The current ctx was kept up-to-date as it ran and does not * need to be invalidated. */ bool invalidate_other; #ifdef CONFIG_ADDRESS_MASKING /* * Active LAM mode. * * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM * disabled. */ u8 lam; #endif /* * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate * the corresponding user PCID needs a flush next time we * switch to it; see SWITCH_TO_USER_CR3. */ unsigned short user_pcid_flush_mask; /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. */ unsigned long cr4; /* * This is a list of all contexts that might exist in the TLB. * There is one per ASID that we use, and the ASID (what the * CPU calls PCID) is the index into ctxts. * * For each context, ctx_id indicates which mm the TLB's user * entries came from. As an invariant, the TLB will never * contain entries that are out-of-date as when that mm reached * the tlb_gen in the list. * * To be clear, this means that it's legal for the TLB code to * flush the TLB without updating tlb_gen. This can happen * (for now, at least) due to paravirt remote flushes. * * NB: context 0 is a bit special, since it's also used by * various bits of init code. This is fine -- code that * isn't aware of PCID will end up harmlessly flushing * context 0. */ struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; }; DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate); struct tlb_state_shared { /* * We can be in one of several states: * * - Actively using an mm. Our CPU's bit will be set in * mm_cpumask(loaded_mm) and is_lazy == false; * * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit * will not be set in mm_cpumask(&init_mm) and is_lazy == false. * * - Lazily using a real mm. loaded_mm != &init_mm, our bit * is set in mm_cpumask(loaded_mm), but is_lazy == true. * We're heuristically guessing that the CR3 load we * skipped more than makes up for the overhead added by * lazy mode. */ bool is_lazy; }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); bool nmi_uaccess_okay(void); #define nmi_uaccess_okay nmi_uaccess_okay /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; extern void initialize_tlbstate_and_flush(void); /* * TLB flushing: * * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ struct flush_tlb_info { /* * We support several kinds of flushes. * * - Fully flush a single mm. .mm will be set, .end will be * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to * which the IPI sender is trying to catch us up. * * - Partially flush a single mm. .mm will be set, .start and * .end will indicate the range, and .new_tlb_gen will be set * such that the changes between generation .new_tlb_gen-1 and * .new_tlb_gen are entirely contained in the indicated range. * * - Fully flush all mms whose tlb_gens have been updated. .mm * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen * will be zero. */ struct mm_struct *mm; unsigned long start; unsigned long end; u64 new_tlb_gen; unsigned int initiating_cpu; u8 stride_shift; u8 freed_tables; }; void flush_tlb_local(void); void flush_tlb_one_user(unsigned long addr); void flush_tlb_one_kernel(unsigned long addr); void flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info); #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #endif #define flush_tlb_mm(mm) \ flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) #define flush_tlb_range(vma, start, end) \ flush_tlb_mm_range((vma)->vm_mm, start, end, \ ((vma)->vm_flags & VM_HUGETLB) \ ? huge_page_shift(hstate_vma(vma)) \ : PAGE_SHIFT, false) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int stride_shift, bool freed_tables); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) { flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); } static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) { bool should_defer = false; /* If remote CPUs need to be flushed then defer batch the flush */ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids) should_defer = true; put_cpu(); return should_defer; } static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { /* * Bump the generation count. This also serves as a full barrier * that synchronizes with switch_mm(): callers are required to order * their read of mm_cpumask after their writes to the paging * structures. */ return atomic64_inc_return(&mm->context.tlb_gen); } static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm, unsigned long uaddr) { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) { flush_tlb_mm(mm); } extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); static inline bool pte_flags_need_flush(unsigned long oldflags, unsigned long newflags, bool ignore_access) { /* * Flags that require a flush when cleared but not when they are set. * Only include flags that would not trigger spurious page-faults. * Non-present entries are not cached. Hardware would set the * dirty/access bit if needed without a fault. */ const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED; const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3 | _PAGE_SOFTW4 | _PAGE_SAVED_DIRTY; const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX; unsigned long diff = oldflags ^ newflags; BUILD_BUG_ON(flush_on_clear & software_flags); BUILD_BUG_ON(flush_on_clear & flush_on_change); BUILD_BUG_ON(flush_on_change & software_flags); /* Ignore software flags */ diff &= ~software_flags; if (ignore_access) diff &= ~_PAGE_ACCESSED; /* * Did any of the 'flush_on_clear' flags was clleared set from between * 'oldflags' and 'newflags'? */ if (diff & oldflags & flush_on_clear) return true; /* Flush on modified flags. */ if (diff & flush_on_change) return true; /* Ensure there are no flags that were left behind */ if (IS_ENABLED(CONFIG_DEBUG_VM) && (diff & ~(flush_on_clear | software_flags | flush_on_change))) { VM_WARN_ON_ONCE(1); return true; } return false; } /* * pte_needs_flush() checks whether permissions were demoted and require a * flush. It should only be used for userspace PTEs. */ static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) { /* !PRESENT -> * ; no need for flush */ if (!(pte_flags(oldpte) & _PAGE_PRESENT)) return false; /* PFN changed ; needs flush */ if (pte_pfn(oldpte) != pte_pfn(newpte)) return true; /* * check PTE flags; ignore access-bit; see comment in * ptep_clear_flush_young(). */ return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte), true); } #define pte_needs_flush pte_needs_flush /* * huge_pmd_needs_flush() checks whether permissions were demoted and require a * flush. It should only be used for userspace huge PMDs. */ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) { /* !PRESENT -> * ; no need for flush */ if (!(pmd_flags(oldpmd) & _PAGE_PRESENT)) return false; /* PFN changed ; needs flush */ if (pmd_pfn(oldpmd) != pmd_pfn(newpmd)) return true; /* * check PMD flags; do not ignore access-bit; see * pmdp_clear_flush_young(). */ return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd), false); } #define huge_pmd_needs_flush huge_pmd_needs_flush #ifdef CONFIG_ADDRESS_MASKING static inline u64 tlbstate_lam_cr3_mask(void) { u64 lam = this_cpu_read(cpu_tlbstate.lam); return lam << X86_CR3_LAM_U57_BIT; } static inline void set_tlbstate_lam_mode(struct mm_struct *mm) { this_cpu_write(cpu_tlbstate.lam, mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT); this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask); } #else static inline u64 tlbstate_lam_cr3_mask(void) { return 0; } static inline void set_tlbstate_lam_mode(struct mm_struct *mm) { } #endif #endif /* !MODULE */ static inline void __native_tlb_flush_global(unsigned long cr4) { native_write_cr4(cr4 ^ X86_CR4_PGE); native_write_cr4(cr4); } #endif /* _ASM_X86_TLBFLUSH_H */ |
7 7 7 2 7 2 7 2 7 2 2 9 9 9 5 5 5 5 6 6 6 5 5 5 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_log.h" #include "xfs_rmap_btree.h" #include "xfs_refcount_btree.h" #include "xfs_da_format.h" #include "xfs_health.h" #include "xfs_ag.h" #include "xfs_rtbitmap.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. */ /* * Check that all the V4 feature bits that the V5 filesystem format requires are * correctly set. */ static bool xfs_sb_validate_v5_features( struct xfs_sb *sbp) { /* We must not have any unknown V4 feature bits set */ if (sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) return false; /* * The CRC bit is considered an invalid V4 flag, so we have to add it * manually to the OKBITS mask. */ if (sbp->sb_features2 & ~(XFS_SB_VERSION2_OKBITS | XFS_SB_VERSION2_CRCBIT)) return false; /* Now check all the required V4 feature flags are set. */ #define V5_VERS_FLAGS (XFS_SB_VERSION_NLINKBIT | \ XFS_SB_VERSION_ALIGNBIT | \ XFS_SB_VERSION_LOGV2BIT | \ XFS_SB_VERSION_EXTFLGBIT | \ XFS_SB_VERSION_DIRV2BIT | \ XFS_SB_VERSION_MOREBITSBIT) #define V5_FEAT_FLAGS (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ XFS_SB_VERSION2_ATTR2BIT | \ XFS_SB_VERSION2_PROJID32BIT | \ XFS_SB_VERSION2_CRCBIT) if ((sbp->sb_versionnum & V5_VERS_FLAGS) != V5_VERS_FLAGS) return false; if ((sbp->sb_features2 & V5_FEAT_FLAGS) != V5_FEAT_FLAGS) return false; return true; } /* * We current support XFS v5 formats with known features and v4 superblocks with * at least V2 directories. */ bool xfs_sb_good_version( struct xfs_sb *sbp) { /* * All v5 filesystems are supported, but we must check that all the * required v4 feature flags are enabled correctly as the code checks * those flags and not for v5 support. */ if (xfs_sb_is_v5(sbp)) return xfs_sb_validate_v5_features(sbp); /* versions prior to v4 are not supported */ if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_4) return false; /* We must not have any unknown v4 feature bits set */ if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) return false; /* V4 filesystems need v2 directories and unwritten extents */ if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) return false; if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) return false; /* It's a supported v4 filesystem */ return true; } uint64_t xfs_sb_version_to_features( struct xfs_sb *sbp) { uint64_t features = 0; /* optional V4 features */ if (sbp->sb_rblocks > 0) features |= XFS_FEAT_REALTIME; if (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT) features |= XFS_FEAT_NLINK; if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT) features |= XFS_FEAT_ATTR; if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT) features |= XFS_FEAT_QUOTA; if (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT) features |= XFS_FEAT_ALIGN; if (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT) features |= XFS_FEAT_LOGV2; if (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT) features |= XFS_FEAT_DALIGN; if (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT) features |= XFS_FEAT_EXTFLG; if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) features |= XFS_FEAT_SECTOR; if (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT) features |= XFS_FEAT_ASCIICI; if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) { if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT) features |= XFS_FEAT_LAZYSBCOUNT; if (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT) features |= XFS_FEAT_ATTR2; if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT) features |= XFS_FEAT_PROJID32; if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE) features |= XFS_FEAT_FTYPE; } if (!xfs_sb_is_v5(sbp)) return features; /* Always on V5 features */ features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG | XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_ATTR2 | XFS_FEAT_PROJID32 | XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO; /* Optional V5 features */ if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT) features |= XFS_FEAT_FINOBT; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT) features |= XFS_FEAT_RMAPBT; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK) features |= XFS_FEAT_REFLINK; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) features |= XFS_FEAT_INOBTCNT; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) features |= XFS_FEAT_FTYPE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) features |= XFS_FEAT_SPINODES; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) features |= XFS_FEAT_META_UUID; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME) features |= XFS_FEAT_BIGTIME; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) features |= XFS_FEAT_NEEDSREPAIR; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NREXT64) features |= XFS_FEAT_NREXT64; return features; } /* Check all the superblock fields we care about when reading one in. */ STATIC int xfs_validate_sb_read( struct xfs_mount *mp, struct xfs_sb *sbp) { if (!xfs_sb_is_v5(sbp)) return 0; /* * Version 5 superblock feature mask validation. Reject combinations * the kernel cannot support up front before checking anything else. */ if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { xfs_warn(mp, "Superblock has unknown compatible features (0x%x) enabled.", (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); xfs_warn(mp, "Using a more recent kernel is recommended."); } if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Superblock has unknown read-only compatible features (0x%x) enabled.", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); if (!xfs_is_readonly(mp)) { xfs_warn(mp, "Attempted to mount read-only compatible filesystem read-write."); xfs_warn(mp, "Filesystem can only be safely mounted read only."); return -EINVAL; } } if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { xfs_warn(mp, "Superblock has unknown incompatible features (0x%x) enabled.", (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_UNKNOWN)); xfs_warn(mp, "Filesystem cannot be safely mounted by this kernel."); return -EINVAL; } return 0; } /* Check all the superblock fields we care about when writing one out. */ STATIC int xfs_validate_sb_write( struct xfs_mount *mp, struct xfs_buf *bp, struct xfs_sb *sbp) { /* * Carry out additional sb summary counter sanity checks when we write * the superblock. We skip this in the read validator because there * could be newer superblocks in the log and if the values are garbage * even after replay we'll recalculate them at the end of log mount. * * mkfs has traditionally written zeroed counters to inprogress and * secondary superblocks, so allow this usage to continue because * we never read counters from such superblocks. */ if (xfs_buf_daddr(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && (sbp->sb_fdblocks > sbp->sb_dblocks || !xfs_verify_icount(mp, sbp->sb_icount) || sbp->sb_ifree > sbp->sb_icount)) { xfs_warn(mp, "SB summary counter sanity check failed"); return -EFSCORRUPTED; } if (!xfs_sb_is_v5(sbp)) return 0; /* * Version 5 superblock feature mask validation. Reject combinations * the kernel cannot support since we checked for unsupported bits in * the read verifier, which means that memory is corrupt. */ if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { xfs_warn(mp, "Corruption detected in superblock compatible features (0x%x)!", (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); return -EFSCORRUPTED; } if (!xfs_is_readonly(mp) && xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Corruption detected in superblock read-only compatible features (0x%x)!", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); return -EFSCORRUPTED; } if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { xfs_warn(mp, "Corruption detected in superblock incompatible features (0x%x)!", (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_UNKNOWN)); return -EFSCORRUPTED; } if (xfs_sb_has_incompat_log_feature(sbp, XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { xfs_warn(mp, "Corruption detected in superblock incompatible log features (0x%x)!", (sbp->sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); return -EFSCORRUPTED; } /* * We can't read verify the sb LSN because the read verifier is called * before the log is allocated and processed. We know the log is set up * before write verifier calls, so check it here. */ if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) return -EFSCORRUPTED; return 0; } /* Check the validity of the SB. */ STATIC int xfs_validate_sb_common( struct xfs_mount *mp, struct xfs_buf *bp, struct xfs_sb *sbp) { struct xfs_dsb *dsb = bp->b_addr; uint32_t agcount = 0; uint32_t rem; bool has_dalign; if (!xfs_verify_magic(bp, dsb->sb_magicnum)) { xfs_warn(mp, "Superblock has bad magic number 0x%x. Not an XFS filesystem?", be32_to_cpu(dsb->sb_magicnum)); return -EWRONGFS; } if (!xfs_sb_good_version(sbp)) { xfs_warn(mp, "Superblock has unknown features enabled or corrupted feature masks."); return -EWRONGFS; } /* * Validate feature flags and state */ if (xfs_sb_is_v5(sbp)) { if (sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { xfs_notice(mp, "Block size (%u bytes) too small for Version 5 superblock (minimum %d bytes)", sbp->sb_blocksize, XFS_MIN_CRC_BLOCKSIZE); return -EFSCORRUPTED; } /* V5 has a separate project quota inode */ if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, "Version 5 of Super block has XFS_OQUOTA bits."); return -EFSCORRUPTED; } /* * Full inode chunks must be aligned to inode chunk size when * sparse inodes are enabled to support the sparse chunk * allocation algorithm and prevent overlapping inode records. */ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) { uint32_t align; align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize >> sbp->sb_blocklog; if (sbp->sb_inoalignmt != align) { xfs_warn(mp, "Inode block alignment (%u) must match chunk size (%u) for sparse inodes.", sbp->sb_inoalignmt, align); return -EINVAL; } } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, "Superblock earlier than Version 5 has XFS_{P|G}QUOTA_{ENFD|CHKD} bits."); return -EFSCORRUPTED; } if (unlikely( sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { xfs_warn(mp, "filesystem is marked as having an external log; " "specify logdev on the mount command line."); return -EINVAL; } if (unlikely( sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { xfs_warn(mp, "filesystem is marked as having an internal log; " "do not specify logdev on the mount command line."); return -EINVAL; } /* Compute agcount for this number of dblocks and agblocks */ if (sbp->sb_agblocks) { agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); if (rem) agcount++; } /* * More sanity checking. Most of these were stolen directly from * xfs_repair. */ if (unlikely( sbp->sb_agcount <= 0 || sbp->sb_sectsize < XFS_MIN_SECTORSIZE || sbp->sb_sectsize > XFS_MAX_SECTORSIZE || sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || sbp->sb_sectsize != (1 << sbp->sb_sectlog) || sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || agcount == 0 || agcount != sbp->sb_agcount || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || sbp->sb_dblocks == 0 || sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || sbp->sb_shared_vn != 0)) { xfs_notice(mp, "SB sanity check failed"); return -EFSCORRUPTED; } /* * Logs that are too large are not supported at all. Reject them * outright. Logs that are too small are tolerated on v4 filesystems, * but we can only check that when mounting the log. Hence we skip * those checks here. */ if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) { xfs_notice(mp, "Log size 0x%x blocks too large, maximum size is 0x%llx blocks", sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS); return -EFSCORRUPTED; } if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) { xfs_warn(mp, "log size 0x%llx bytes too large, maximum size is 0x%llx bytes", XFS_FSB_TO_B(mp, sbp->sb_logblocks), XFS_MAX_LOG_BYTES); return -EFSCORRUPTED; } /* * Do not allow filesystems with corrupted log sector or stripe units to * be mounted. We cannot safely size the iclogs or write to the log if * the log stripe unit is not valid. */ if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) { if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) { xfs_notice(mp, "log sector size in bytes/log2 (0x%x/0x%x) must match", sbp->sb_logsectsize, 1U << sbp->sb_logsectlog); return -EFSCORRUPTED; } } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) { xfs_notice(mp, "log sector size in bytes/log2 (0x%x/0x%x) are not zero", sbp->sb_logsectsize, sbp->sb_logsectlog); return -EFSCORRUPTED; } if (sbp->sb_logsunit > 1) { if (sbp->sb_logsunit % sbp->sb_blocksize) { xfs_notice(mp, "log stripe unit 0x%x bytes must be a multiple of block size", sbp->sb_logsunit); return -EFSCORRUPTED; } if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) { xfs_notice(mp, "log stripe unit 0x%x bytes over maximum size (0x%x bytes)", sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE); return -EFSCORRUPTED; } } /* Validate the realtime geometry; stolen from xfs_repair */ if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { xfs_notice(mp, "realtime extent sanity check failed"); return -EFSCORRUPTED; } if (sbp->sb_rblocks == 0) { if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) { xfs_notice(mp, "realtime zeroed geometry check failed"); return -EFSCORRUPTED; } } else { uint64_t rexts; uint64_t rbmblocks; rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize); rbmblocks = howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize); if (!xfs_validate_rtextents(rexts) || sbp->sb_rextents != rexts || sbp->sb_rextslog != xfs_compute_rextslog(rexts) || sbp->sb_rbmblocks != rbmblocks) { xfs_notice(mp, "realtime geometry sanity check failed"); return -EFSCORRUPTED; } } /* * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) * would imply the image is corrupted. */ has_dalign = sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT; if (!!sbp->sb_unit ^ has_dalign) { xfs_notice(mp, "SB stripe alignment sanity check failed"); return -EFSCORRUPTED; } if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), XFS_FSB_TO_B(mp, sbp->sb_width), 0, xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) return -EFSCORRUPTED; /* * Currently only very few inode sizes are supported. */ switch (sbp->sb_inodesize) { case 256: case 512: case 1024: case 2048: break; default: xfs_warn(mp, "inode size of %d bytes not supported", sbp->sb_inodesize); return -ENOSYS; } return 0; } void xfs_sb_quota_from_disk(struct xfs_sb *sbp) { /* * older mkfs doesn't initialize quota inodes to NULLFSINO. This * leads to in-core values having two different values for a quota * inode to be invalid: 0 and NULLFSINO. Change it to a single value * NULLFSINO. * * Note that this change affect only the in-core values. These * values are not written back to disk unless any quota information * is written to the disk. Even in that case, sb_pquotino field is * not written to disk unless the superblock supports pquotino. */ if (sbp->sb_uquotino == 0) sbp->sb_uquotino = NULLFSINO; if (sbp->sb_gquotino == 0) sbp->sb_gquotino = NULLFSINO; if (sbp->sb_pquotino == 0) sbp->sb_pquotino = NULLFSINO; /* * We need to do these manipilations only if we are working * with an older version of on-disk superblock. */ if (xfs_sb_is_v5(sbp)) return; if (sbp->sb_qflags & XFS_OQUOTA_ENFD) sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; if (sbp->sb_qflags & XFS_OQUOTA_CHKD) sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); if (sbp->sb_qflags & XFS_PQUOTA_ACCT && sbp->sb_gquotino != NULLFSINO) { /* * In older version of superblock, on-disk superblock only * has sb_gquotino, and in-core superblock has both sb_gquotino * and sb_pquotino. But, only one of them is supported at any * point of time. So, if PQUOTA is set in disk superblock, * copy over sb_gquotino to sb_pquotino. The NULLFSINO test * above is to make sure we don't do this twice and wipe them * both out! */ sbp->sb_pquotino = sbp->sb_gquotino; sbp->sb_gquotino = NULLFSINO; } } static void __xfs_sb_from_disk( struct xfs_sb *to, struct xfs_dsb *from, bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); to->sb_blocksize = be32_to_cpu(from->sb_blocksize); to->sb_dblocks = be64_to_cpu(from->sb_dblocks); to->sb_rblocks = be64_to_cpu(from->sb_rblocks); to->sb_rextents = be64_to_cpu(from->sb_rextents); memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); to->sb_logstart = be64_to_cpu(from->sb_logstart); to->sb_rootino = be64_to_cpu(from->sb_rootino); to->sb_rbmino = be64_to_cpu(from->sb_rbmino); to->sb_rsumino = be64_to_cpu(from->sb_rsumino); to->sb_rextsize = be32_to_cpu(from->sb_rextsize); to->sb_agblocks = be32_to_cpu(from->sb_agblocks); to->sb_agcount = be32_to_cpu(from->sb_agcount); to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks); to->sb_logblocks = be32_to_cpu(from->sb_logblocks); to->sb_versionnum = be16_to_cpu(from->sb_versionnum); to->sb_sectsize = be16_to_cpu(from->sb_sectsize); to->sb_inodesize = be16_to_cpu(from->sb_inodesize); to->sb_inopblock = be16_to_cpu(from->sb_inopblock); memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); to->sb_blocklog = from->sb_blocklog; to->sb_sectlog = from->sb_sectlog; to->sb_inodelog = from->sb_inodelog; to->sb_inopblog = from->sb_inopblog; to->sb_agblklog = from->sb_agblklog; to->sb_rextslog = from->sb_rextslog; to->sb_inprogress = from->sb_inprogress; to->sb_imax_pct = from->sb_imax_pct; to->sb_icount = be64_to_cpu(from->sb_icount); to->sb_ifree = be64_to_cpu(from->sb_ifree); to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks); to->sb_frextents = be64_to_cpu(from->sb_frextents); to->sb_uquotino = be64_to_cpu(from->sb_uquotino); to->sb_gquotino = be64_to_cpu(from->sb_gquotino); to->sb_qflags = be16_to_cpu(from->sb_qflags); to->sb_flags = from->sb_flags; to->sb_shared_vn = from->sb_shared_vn; to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt); to->sb_unit = be32_to_cpu(from->sb_unit); to->sb_width = be32_to_cpu(from->sb_width); to->sb_dirblklog = from->sb_dirblklog; to->sb_logsectlog = from->sb_logsectlog; to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); to->sb_logsunit = be32_to_cpu(from->sb_logsunit); to->sb_features2 = be32_to_cpu(from->sb_features2); to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); to->sb_features_compat = be32_to_cpu(from->sb_features_compat); to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat); to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); to->sb_features_log_incompat = be32_to_cpu(from->sb_features_log_incompat); /* crc is only used on disk, not in memory; just init to 0 here. */ to->sb_crc = 0; to->sb_spino_align = be32_to_cpu(from->sb_spino_align); to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); /* * sb_meta_uuid is only on disk if it differs from sb_uuid and the * feature flag is set; if not set we keep it only in memory. */ if (xfs_sb_is_v5(to) && (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); else uuid_copy(&to->sb_meta_uuid, &from->sb_uuid); /* Convert on-disk flags to in-memory flags? */ if (convert_xquota) xfs_sb_quota_from_disk(to); } void xfs_sb_from_disk( struct xfs_sb *to, struct xfs_dsb *from) { __xfs_sb_from_disk(to, from, true); } static void xfs_sb_quota_to_disk( struct xfs_dsb *to, struct xfs_sb *from) { uint16_t qflags = from->sb_qflags; to->sb_uquotino = cpu_to_be64(from->sb_uquotino); /* * The in-memory superblock quota state matches the v5 on-disk format so * just write them out and return */ if (xfs_sb_is_v5(from)) { to->sb_qflags = cpu_to_be16(from->sb_qflags); to->sb_gquotino = cpu_to_be64(from->sb_gquotino); to->sb_pquotino = cpu_to_be64(from->sb_pquotino); return; } /* * For older superblocks (v4), the in-core version of sb_qflags do not * have XFS_OQUOTA_* flags, whereas the on-disk version does. So, * convert incore XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. */ qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); if (from->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) qflags |= XFS_OQUOTA_ENFD; if (from->sb_qflags & (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) qflags |= XFS_OQUOTA_CHKD; to->sb_qflags = cpu_to_be16(qflags); /* * GQUOTINO and PQUOTINO cannot be used together in versions * of superblock that do not have pquotino. from->sb_flags * tells us which quota is active and should be copied to * disk. If neither are active, we should NULL the inode. * * In all cases, the separate pquotino must remain 0 because it * is beyond the "end" of the valid non-pquotino superblock. */ if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); else if (from->sb_qflags & XFS_PQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_pquotino); else { /* * We can't rely on just the fields being logged to tell us * that it is safe to write NULLFSINO - we should only do that * if quotas are not actually enabled. Hence only write * NULLFSINO if both in-core quota inodes are NULL. */ if (from->sb_gquotino == NULLFSINO && from->sb_pquotino == NULLFSINO) to->sb_gquotino = cpu_to_be64(NULLFSINO); } to->sb_pquotino = 0; } void xfs_sb_to_disk( struct xfs_dsb *to, struct xfs_sb *from) { xfs_sb_quota_to_disk(to, from); to->sb_magicnum = cpu_to_be32(from->sb_magicnum); to->sb_blocksize = cpu_to_be32(from->sb_blocksize); to->sb_dblocks = cpu_to_be64(from->sb_dblocks); to->sb_rblocks = cpu_to_be64(from->sb_rblocks); to->sb_rextents = cpu_to_be64(from->sb_rextents); memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); to->sb_logstart = cpu_to_be64(from->sb_logstart); to->sb_rootino = cpu_to_be64(from->sb_rootino); to->sb_rbmino = cpu_to_be64(from->sb_rbmino); to->sb_rsumino = cpu_to_be64(from->sb_rsumino); to->sb_rextsize = cpu_to_be32(from->sb_rextsize); to->sb_agblocks = cpu_to_be32(from->sb_agblocks); to->sb_agcount = cpu_to_be32(from->sb_agcount); to->sb_rbmblocks = cpu_to_be32(from->sb_rbmblocks); to->sb_logblocks = cpu_to_be32(from->sb_logblocks); to->sb_versionnum = cpu_to_be16(from->sb_versionnum); to->sb_sectsize = cpu_to_be16(from->sb_sectsize); to->sb_inodesize = cpu_to_be16(from->sb_inodesize); to->sb_inopblock = cpu_to_be16(from->sb_inopblock); memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); to->sb_blocklog = from->sb_blocklog; to->sb_sectlog = from->sb_sectlog; to->sb_inodelog = from->sb_inodelog; to->sb_inopblog = from->sb_inopblog; to->sb_agblklog = from->sb_agblklog; to->sb_rextslog = from->sb_rextslog; to->sb_inprogress = from->sb_inprogress; to->sb_imax_pct = from->sb_imax_pct; to->sb_icount = cpu_to_be64(from->sb_icount); to->sb_ifree = cpu_to_be64(from->sb_ifree); to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks); to->sb_frextents = cpu_to_be64(from->sb_frextents); to->sb_flags = from->sb_flags; to->sb_shared_vn = from->sb_shared_vn; to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt); to->sb_unit = cpu_to_be32(from->sb_unit); to->sb_width = cpu_to_be32(from->sb_width); to->sb_dirblklog = from->sb_dirblklog; to->sb_logsectlog = from->sb_logsectlog; to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize); to->sb_logsunit = cpu_to_be32(from->sb_logsunit); /* * We need to ensure that bad_features2 always matches features2. * Hence we enforce that here rather than having to remember to do it * everywhere else that updates features2. */ from->sb_bad_features2 = from->sb_features2; to->sb_features2 = cpu_to_be32(from->sb_features2); to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); if (!xfs_sb_is_v5(from)) return; to->sb_features_compat = cpu_to_be32(from->sb_features_compat); to->sb_features_ro_compat = cpu_to_be32(from->sb_features_ro_compat); to->sb_features_incompat = cpu_to_be32(from->sb_features_incompat); to->sb_features_log_incompat = cpu_to_be32(from->sb_features_log_incompat); to->sb_spino_align = cpu_to_be32(from->sb_spino_align); to->sb_lsn = cpu_to_be64(from->sb_lsn); if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); } /* * If the superblock has the CRC feature bit set or the CRC field is non-null, * check that the CRC is valid. We check the CRC field is non-null because a * single bit error could clear the feature bit and unused parts of the * superblock are supposed to be zero. Hence a non-null crc field indicates that * we've potentially lost a feature bit and we should check it anyway. * * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the * last field in V4 secondary superblocks. So for secondary superblocks, * we are more forgiving, and ignore CRC failures if the primary doesn't * indicate that the fs version is V5. */ static void xfs_sb_read_verify( struct xfs_buf *bp) { struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; struct xfs_dsb *dsb = bp->b_addr; int error; /* * open code the version check to avoid needing to convert the entire * superblock from disk order just to check the version number */ if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) && (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) == XFS_SB_VERSION_5) || dsb->sb_crc != 0)) { if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { /* Only fail bad secondaries on a known V5 filesystem */ if (xfs_buf_daddr(bp) == XFS_SB_DADDR || xfs_has_crc(mp)) { error = -EFSBADCRC; goto out_error; } } } /* * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; error = xfs_validate_sb_read(mp, &sb); out_error: if (error == -EFSCORRUPTED || error == -EFSBADCRC) xfs_verifier_error(bp, error, __this_address); else if (error) xfs_buf_ioerror(bp, error); } /* * We may be probed for a filesystem match, so we may not want to emit * messages when the superblock buffer is not actually an XFS superblock. * If we find an XFS superblock, then run a normal, noisy mount because we are * really going to mount it and want to know about errors. */ static void xfs_sb_quiet_read_verify( struct xfs_buf *bp) { struct xfs_dsb *dsb = bp->b_addr; if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ xfs_sb_read_verify(bp); return; } /* quietly fail */ xfs_buf_ioerror(bp, -EWRONGFS); } static void xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dsb *dsb = bp->b_addr; int error; /* * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; error = xfs_validate_sb_write(mp, bp, &sb); if (error) goto out_error; if (!xfs_sb_is_v5(&sb)) return; if (bip) dsb->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); return; out_error: xfs_verifier_error(bp, error, __this_address); } const struct xfs_buf_ops xfs_sb_buf_ops = { .name = "xfs_sb", .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, .verify_read = xfs_sb_read_verify, .verify_write = xfs_sb_write_verify, }; const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { .name = "xfs_sb_quiet", .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, .verify_read = xfs_sb_quiet_read_verify, .verify_write = xfs_sb_write_verify, }; /* * xfs_mount_common * * Mount initialization code establishing various mount * fields from the superblock associated with the given * mount structure. * * Inode geometry are calculated in xfs_ialloc_setup_geometry. */ void xfs_sb_mount_common( struct xfs_mount *mp, struct xfs_sb *sbp) { mp->m_agfrotor = 0; atomic_set(&mp->m_agirotor, 0); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1); mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0); mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true); mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false); mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; mp->m_bsize = XFS_FSB_TO_BB(mp, 1); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); } /* * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock * into the superblock buffer to be logged. It does not provide the higher * level of locking that is needed to protect the in-core superblock from * concurrent access. */ void xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *bp = xfs_trans_getsb(tp); /* * Lazy sb counters don't update the in-core superblock so do that now. * If this is at unmount, the counters will be exactly correct, but at * any other time they will only be ballpark correct because of * reservations that have been taken out percpu counters. If we have an * unclean shutdown, this will be corrected by log recovery rebuilding * the counters from the AGF block counts. * * Do not update sb_frextents here because it is not part of the lazy * sb counters, despite having a percpu counter. It is always kept * consistent with the ondisk rtbitmap by xfs_trans_apply_sb_deltas() * and hence we don't need have to update it here. */ if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); mp->m_sb.sb_ifree = min_t(uint64_t, percpu_counter_sum(&mp->m_ifree), mp->m_sb.sb_icount); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); } xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); } /* * xfs_sync_sb * * Sync the superblock to disk. * * Note that the caller is responsible for checking the frozen state of the * filesystem. This procedure uses the non-blocking transaction allocator and * thus will allow modifications to a frozen fs. This is required because this * code can be called during the process of freezing where use of the high-level * allocator would deadlock. */ int xfs_sync_sb( struct xfs_mount *mp, bool wait) { struct xfs_trans *tp; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, XFS_TRANS_NO_WRITECOUNT, &tp); if (error) return error; xfs_log_sb(tp); if (wait) xfs_trans_set_sync(tp); return xfs_trans_commit(tp); } /* * Update all the secondary superblocks to match the new state of the primary. * Because we are completely overwriting all the existing fields in the * secondary superblock buffers, there is no need to read them in from disk. * Just get a new buffer, stamp it and write it. * * The sb buffers need to be cached here so that we serialise against other * operations that access the secondary superblocks, but we don't want to keep * them in memory once it is written so we mark it as a one-shot buffer. */ int xfs_update_secondary_sbs( struct xfs_mount *mp) { struct xfs_perag *pag; xfs_agnumber_t agno = 1; int saved_error = 0; int error = 0; LIST_HEAD (buffer_list); /* update secondary superblocks. */ for_each_perag_from(mp, agno, pag) { struct xfs_buf *bp; error = xfs_buf_get(mp->m_ddev_targp, XFS_AG_DADDR(mp, pag->pag_agno, XFS_SB_DADDR), XFS_FSS_TO_BB(mp, 1), &bp); /* * If we get an error reading or writing alternate superblocks, * continue. xfs_repair chooses the "best" superblock based * on most matches; if we break early, we'll leave more * superblocks un-updated than updated, and xfs_repair may * pick them over the properly-updated primary. */ if (error) { xfs_warn(mp, "error allocating secondary superblock for ag %d", pag->pag_agno); if (!saved_error) saved_error = error; continue; } bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_buf_delwri_queue(bp, &buffer_list); xfs_buf_relse(bp); /* don't hold too many buffers at once */ if (agno % 16) continue; error = xfs_buf_delwri_submit(&buffer_list); if (error) { xfs_warn(mp, "write error %d updating a secondary superblock near ag %d", error, pag->pag_agno); if (!saved_error) saved_error = error; continue; } } error = xfs_buf_delwri_submit(&buffer_list); if (error) { xfs_warn(mp, "write error %d updating a secondary superblock near ag %d", error, agno); } return saved_error ? saved_error : error; } /* * Same behavior as xfs_sync_sb, except that it is always synchronous and it * also writes the superblock buffer to disk sector 0 immediately. */ int xfs_sync_sb_buf( struct xfs_mount *mp) { struct xfs_trans *tp; struct xfs_buf *bp; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp); if (error) return error; bp = xfs_trans_getsb(tp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); if (error) goto out; /* * write out the sb buffer to get the changes to disk */ error = xfs_bwrite(bp); out: xfs_buf_relse(bp); return error; } void xfs_fs_geometry( struct xfs_mount *mp, struct xfs_fsop_geom *geo, int struct_version) { struct xfs_sb *sbp = &mp->m_sb; memset(geo, 0, sizeof(struct xfs_fsop_geom)); geo->blocksize = sbp->sb_blocksize; geo->rtextsize = sbp->sb_rextsize; geo->agblocks = sbp->sb_agblocks; geo->agcount = sbp->sb_agcount; geo->logblocks = sbp->sb_logblocks; geo->sectsize = sbp->sb_sectsize; geo->inodesize = sbp->sb_inodesize; geo->imaxpct = sbp->sb_imax_pct; geo->datablocks = sbp->sb_dblocks; geo->rtblocks = sbp->sb_rblocks; geo->rtextents = sbp->sb_rextents; geo->logstart = sbp->sb_logstart; BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); if (struct_version < 2) return; geo->sunit = sbp->sb_unit; geo->swidth = sbp->sb_width; if (struct_version < 3) return; geo->version = XFS_FSOP_GEOM_VERSION; geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | XFS_FSOP_GEOM_FLAGS_DIRV2 | XFS_FSOP_GEOM_FLAGS_EXTFLG; if (xfs_has_attr(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; if (xfs_has_quota(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; if (xfs_has_align(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; if (xfs_has_dalign(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; if (xfs_has_asciici(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; if (xfs_has_lazysbcount(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; if (xfs_has_attr2(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; if (xfs_has_projid32(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; if (xfs_has_crc(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; if (xfs_has_ftype(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; if (xfs_has_finobt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; if (xfs_has_sparseinodes(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; if (xfs_has_rmapbt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; if (xfs_has_reflink(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; if (xfs_has_bigtime(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; if (xfs_has_inobtcounts(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT; if (xfs_has_sector(mp)) { geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; geo->logsectsize = sbp->sb_logsectsize; } else { geo->logsectsize = BBSIZE; } if (xfs_has_large_extent_counts(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); if (struct_version < 4) return; if (xfs_has_logv2(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; geo->logsunit = sbp->sb_logsunit; if (struct_version < 5) return; geo->version = XFS_FSOP_GEOM_VERSION_V5; } /* Read a secondary superblock. */ int xfs_sb_read_secondary( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp) { struct xfs_buf *bp; int error; ASSERT(agno != 0 && agno != NULLAGNUMBER); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); if (xfs_metadata_is_sick(error)) xfs_agno_mark_sick(mp, agno, XFS_SICK_AG_SB); if (error) return error; xfs_buf_set_ref(bp, XFS_SSB_REF); *bpp = bp; return 0; } /* Get an uninitialised secondary superblock buffer. */ int xfs_sb_get_secondary( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp) { struct xfs_buf *bp; int error; ASSERT(agno != 0 && agno != NULLAGNUMBER); error = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp); if (error) return error; bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); *bpp = bp; return 0; } /* * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users * won't be confused by values in error messages. This function returns false * if the stripe geometry is invalid and the caller is unable to repair the * stripe configuration later in the mount process. */ bool xfs_validate_stripe_geometry( struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, bool silent) { if (swidth > INT_MAX) { if (!silent) xfs_notice(mp, "stripe width (%lld) is too large", swidth); goto check_override; } if (sunit > swidth) { if (!silent) xfs_notice(mp, "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); goto check_override; } if (sectorsize && (int)sunit % sectorsize) { if (!silent) xfs_notice(mp, "stripe unit (%lld) must be a multiple of the sector size (%d)", sunit, sectorsize); goto check_override; } if (sunit && !swidth) { if (!silent) xfs_notice(mp, "invalid stripe unit (%lld) and stripe width of 0", sunit); goto check_override; } if (!sunit && swidth) { if (!silent) xfs_notice(mp, "invalid stripe width (%lld) and stripe unit of 0", swidth); goto check_override; } if (sunit && (int)swidth % (int)sunit) { if (!silent) xfs_notice(mp, "stripe width (%lld) must be a multiple of the stripe unit (%lld)", swidth, sunit); goto check_override; } return true; check_override: if (!may_repair) return false; /* * During mount, mp->m_dalign will not be set unless the sunit mount * option was set. If it was set, ignore the bad stripe alignment values * and allow the validation and overwrite later in the mount process to * attempt to overwrite the bad stripe alignment values with the values * supplied by mount options. */ if (!mp->m_dalign) return false; if (!silent) xfs_notice(mp, "Will try to correct with specified mount options sunit (%d) and swidth (%d)", BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); return true; } /* * Compute the maximum level number of the realtime summary file, as defined by * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct * use of rt volumes with more than 2^32 extents. */ uint8_t xfs_compute_rextslog( xfs_rtbxlen_t rtextents) { if (!rtextents) return 0; return xfs_highbit64(rtextents); } |
6 1 1 3 1 2 8 1 6 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | // SPDX-License-Identifier: GPL-2.0-only /* * Xtables module for matching the value of the IPv4/IPv6 and TCP ECN bits * * (C) 2002 by Harald Welte <laforge@gnumonks.org> * (C) 2011 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/ip.h> #include <net/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/tcp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_ecn.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_ecn"); MODULE_ALIAS("ip6t_ecn"); static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ecn_info *einfo = par->matchinfo; struct tcphdr _tcph; const struct tcphdr *th; /* In practice, TCP match does this, so can't fail. But let's * be good citizens. */ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) return false; if (einfo->operation & XT_ECN_OP_MATCH_ECE) { if (einfo->invert & XT_ECN_OP_MATCH_ECE) { if (th->ece == 1) return false; } else { if (th->ece == 0) return false; } } if (einfo->operation & XT_ECN_OP_MATCH_CWR) { if (einfo->invert & XT_ECN_OP_MATCH_CWR) { if (th->cwr == 1) return false; } else { if (th->cwr == 0) return false; } } return true; } static inline bool match_ip(const struct sk_buff *skb, const struct xt_ecn_info *einfo) { return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^ !!(einfo->invert & XT_ECN_OP_MATCH_IP); } static bool ecn_mt4(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ecn_info *info = par->matchinfo; if (info->operation & XT_ECN_OP_MATCH_IP && !match_ip(skb, info)) return false; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && !match_tcp(skb, par)) return false; return true; } static int ecn_mt_check4(const struct xt_mtchk_param *par) { const struct xt_ecn_info *info = par->matchinfo; const struct ipt_ip *ip = par->entryinfo; if (info->operation & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->invert & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } return 0; } static inline bool match_ipv6(const struct sk_buff *skb, const struct xt_ecn_info *einfo) { return (((ipv6_hdr(skb)->flow_lbl[0] >> 4) & XT_ECN_IP_MASK) == einfo->ip_ect) ^ !!(einfo->invert & XT_ECN_OP_MATCH_IP); } static bool ecn_mt6(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ecn_info *info = par->matchinfo; if (info->operation & XT_ECN_OP_MATCH_IP && !match_ipv6(skb, info)) return false; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && !match_tcp(skb, par)) return false; return true; } static int ecn_mt_check6(const struct xt_mtchk_param *par) { const struct xt_ecn_info *info = par->matchinfo; const struct ip6t_ip6 *ip = par->entryinfo; if (info->operation & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->invert & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) { pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } return 0; } static struct xt_match ecn_mt_reg[] __read_mostly = { { .name = "ecn", .family = NFPROTO_IPV4, .match = ecn_mt4, .matchsize = sizeof(struct xt_ecn_info), .checkentry = ecn_mt_check4, .me = THIS_MODULE, }, { .name = "ecn", .family = NFPROTO_IPV6, .match = ecn_mt6, .matchsize = sizeof(struct xt_ecn_info), .checkentry = ecn_mt_check6, .me = THIS_MODULE, }, }; static int __init ecn_mt_init(void) { return xt_register_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); } static void __exit ecn_mt_exit(void) { xt_unregister_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); } module_init(ecn_mt_init); module_exit(ecn_mt_exit); |
2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #include <linux/iommufd.h> #include <linux/slab.h> #include <linux/iommu.h> #include <uapi/linux/iommufd.h> #include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" static bool allow_unsafe_interrupts; module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC( allow_unsafe_interrupts, "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); static void iommufd_group_release(struct kref *kref) { struct iommufd_group *igroup = container_of(kref, struct iommufd_group, ref); WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, NULL, GFP_KERNEL); iommu_group_put(igroup->group); mutex_destroy(&igroup->lock); kfree(igroup); } static void iommufd_put_group(struct iommufd_group *group) { kref_put(&group->ref, iommufd_group_release); } static bool iommufd_group_try_get(struct iommufd_group *igroup, struct iommu_group *group) { if (!igroup) return false; /* * group ID's cannot be re-used until the group is put back which does * not happen if we could get an igroup pointer under the xa_lock. */ if (WARN_ON(igroup->group != group)) return false; return kref_get_unless_zero(&igroup->ref); } /* * iommufd needs to store some more data for each iommu_group, we keep a * parallel xarray indexed by iommu_group id to hold this instead of putting it * in the core structure. To keep things simple the iommufd_group memory is * unique within the iommufd_ctx. This makes it easy to check there are no * memory leaks. */ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, struct device *dev) { struct iommufd_group *new_igroup; struct iommufd_group *cur_igroup; struct iommufd_group *igroup; struct iommu_group *group; unsigned int id; group = iommu_group_get(dev); if (!group) return ERR_PTR(-ENODEV); id = iommu_group_id(group); xa_lock(&ictx->groups); igroup = xa_load(&ictx->groups, id); if (iommufd_group_try_get(igroup, group)) { xa_unlock(&ictx->groups); iommu_group_put(group); return igroup; } xa_unlock(&ictx->groups); new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); if (!new_igroup) { iommu_group_put(group); return ERR_PTR(-ENOMEM); } kref_init(&new_igroup->ref); mutex_init(&new_igroup->lock); INIT_LIST_HEAD(&new_igroup->device_list); new_igroup->sw_msi_start = PHYS_ADDR_MAX; /* group reference moves into new_igroup */ new_igroup->group = group; /* * The ictx is not additionally refcounted here becase all objects using * an igroup must put it before their destroy completes. */ new_igroup->ictx = ictx; /* * We dropped the lock so igroup is invalid. NULL is a safe and likely * value to assume for the xa_cmpxchg algorithm. */ cur_igroup = NULL; xa_lock(&ictx->groups); while (true) { igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, GFP_KERNEL); if (xa_is_err(igroup)) { xa_unlock(&ictx->groups); iommufd_put_group(new_igroup); return ERR_PTR(xa_err(igroup)); } /* new_group was successfully installed */ if (cur_igroup == igroup) { xa_unlock(&ictx->groups); return new_igroup; } /* Check again if the current group is any good */ if (iommufd_group_try_get(igroup, group)) { xa_unlock(&ictx->groups); iommufd_put_group(new_igroup); return igroup; } cur_igroup = igroup; } } void iommufd_device_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj); iommu_device_release_dma_owner(idev->dev); iommufd_put_group(idev->igroup); if (!iommufd_selftest_is_mock_dev(idev->dev)) iommufd_ctx_put(idev->ictx); } /** * iommufd_device_bind - Bind a physical device to an iommu fd * @ictx: iommufd file descriptor * @dev: Pointer to a physical device struct * @id: Output ID number to return to userspace for this device * * A successful bind establishes an ownership over the device and returns * struct iommufd_device pointer, otherwise returns error pointer. * * A driver using this API must set driver_managed_dma and must not touch * the device until this routine succeeds and establishes ownership. * * Binding a PCI device places the entire RID under iommufd control. * * The caller must undo this with iommufd_device_unbind() */ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id) { struct iommufd_device *idev; struct iommufd_group *igroup; int rc; /* * iommufd always sets IOMMU_CACHE because we offer no way for userspace * to restore cache coherency. */ if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) return ERR_PTR(-EINVAL); igroup = iommufd_get_group(ictx, dev); if (IS_ERR(igroup)) return ERR_CAST(igroup); /* * For historical compat with VFIO the insecure interrupt path is * allowed if the module parameter is set. Secure/Isolated means that a * MemWr operation from the device (eg a simple DMA) cannot trigger an * interrupt outside this iommufd context. */ if (!iommufd_selftest_is_mock_dev(dev) && !iommu_group_has_isolated_msi(igroup->group)) { if (!allow_unsafe_interrupts) { rc = -EPERM; goto out_group_put; } dev_warn( dev, "MSI interrupts are not secure, they cannot be isolated by the platform. " "Check that platform features like interrupt remapping are enabled. " "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); } rc = iommu_device_claim_dma_owner(dev, ictx); if (rc) goto out_group_put; idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); if (IS_ERR(idev)) { rc = PTR_ERR(idev); goto out_release_owner; } idev->ictx = ictx; if (!iommufd_selftest_is_mock_dev(dev)) iommufd_ctx_get(ictx); idev->dev = dev; idev->enforce_cache_coherency = device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); /* The calling driver is a user until iommufd_device_unbind() */ refcount_inc(&idev->obj.users); /* igroup refcount moves into iommufd_device */ idev->igroup = igroup; /* * If the caller fails after this success it must call * iommufd_unbind_device() which is safe since we hold this refcount. * This also means the device is a leaf in the graph and no other object * can take a reference on it. */ iommufd_object_finalize(ictx, &idev->obj); *id = idev->obj.id; return idev; out_release_owner: iommu_device_release_dma_owner(dev); out_group_put: iommufd_put_group(igroup); return ERR_PTR(rc); } EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); /** * iommufd_ctx_has_group - True if any device within the group is bound * to the ictx * @ictx: iommufd file descriptor * @group: Pointer to a physical iommu_group struct * * True if any device within the group has been bound to this ictx, ex. via * iommufd_device_bind(), therefore implying ictx ownership of the group. */ bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) { struct iommufd_object *obj; unsigned long index; if (!ictx || !group) return false; xa_lock(&ictx->objects); xa_for_each(&ictx->objects, index, obj) { if (obj->type == IOMMUFD_OBJ_DEVICE && container_of(obj, struct iommufd_device, obj) ->igroup->group == group) { xa_unlock(&ictx->objects); return true; } } xa_unlock(&ictx->objects); return false; } EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); /** * iommufd_device_unbind - Undo iommufd_device_bind() * @idev: Device returned by iommufd_device_bind() * * Release the device from iommufd control. The DMA ownership will return back * to unowned with DMA controlled by the DMA API. This invalidates the * iommufd_device pointer, other APIs that consume it must not be called * concurrently. */ void iommufd_device_unbind(struct iommufd_device *idev) { iommufd_object_destroy_user(idev->ictx, &idev->obj); } EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) { return idev->ictx; } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); u32 iommufd_device_to_id(struct iommufd_device *idev) { return idev->obj.id; } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { phys_addr_t sw_msi_start = igroup->sw_msi_start; int rc; /* * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to * call iommu_get_msi_cookie() on its behalf. This is necessary to setup * the MSI window so iommu_dma_prepare_msi() can install pages into our * domain after request_irq(). If it is not done interrupts will not * work on this domain. * * FIXME: This is conceptually broken for iommufd since we want to allow * userspace to change the domains, eg switch from an identity IOAS to a * DMA IOAS. There is currently no way to create a MSI window that * matches what the IRQ layer actually expects in a newly created * domain. */ if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { rc = iommu_get_msi_cookie(hwpt_paging->common.domain, sw_msi_start); if (rc) return rc; /* * iommu_get_msi_cookie() can only be called once per domain, * it returns -EBUSY on later calls. */ hwpt_paging->msi_cookie = true; } return 0; } static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, struct iommufd_device *idev) { int rc; lockdep_assert_held(&idev->igroup->lock); rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, idev->dev, &idev->igroup->sw_msi_start); if (rc) return rc; if (list_empty(&idev->igroup->device_list)) { rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); if (rc) { iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); return rc; } } return 0; } int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { int rc; mutex_lock(&idev->igroup->lock); if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { rc = -EINVAL; goto err_unlock; } if (hwpt_is_paging(hwpt)) { rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev); if (rc) goto err_unlock; } /* * Only attach to the group once for the first device that is in the * group. All the other devices will follow this attachment. The user * should attach every device individually to the hwpt as the per-device * reserved regions are only updated during individual device * attachment. */ if (list_empty(&idev->igroup->device_list)) { rc = iommu_attach_group(hwpt->domain, idev->igroup->group); if (rc) goto err_unresv; idev->igroup->hwpt = hwpt; } refcount_inc(&hwpt->obj.users); list_add_tail(&idev->group_item, &idev->igroup->device_list); mutex_unlock(&idev->igroup->lock); return 0; err_unresv: if (hwpt_is_paging(hwpt)) iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, idev->dev); err_unlock: mutex_unlock(&idev->igroup->lock); return rc; } struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev) { struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; mutex_lock(&idev->igroup->lock); list_del(&idev->group_item); if (list_empty(&idev->igroup->device_list)) { iommu_detach_group(hwpt->domain, idev->igroup->group); idev->igroup->hwpt = NULL; } if (hwpt_is_paging(hwpt)) iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, idev->dev); mutex_unlock(&idev->igroup->lock); /* Caller must destroy hwpt */ return hwpt; } static struct iommufd_hw_pagetable * iommufd_device_do_attach(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt) { int rc; rc = iommufd_hw_pagetable_attach(hwpt, idev); if (rc) return ERR_PTR(rc); return NULL; } static void iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_device *cur; lockdep_assert_held(&igroup->lock); list_for_each_entry(cur, &igroup->device_list, group_item) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } static int iommufd_group_do_replace_paging(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; struct iommufd_device *cur; int rc; lockdep_assert_held(&igroup->lock); if (!hwpt_is_paging(old_hwpt) || hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) { list_for_each_entry(cur, &igroup->device_list, group_item) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) goto err_unresv; } } rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) goto err_unresv; return 0; err_unresv: iommufd_group_remove_reserved_iova(igroup, hwpt_paging); return rc; } static struct iommufd_hw_pagetable * iommufd_device_do_replace(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt) { struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; unsigned int num_devices; int rc; mutex_lock(&idev->igroup->lock); if (igroup->hwpt == NULL) { rc = -EINVAL; goto err_unlock; } if (hwpt == igroup->hwpt) { mutex_unlock(&idev->igroup->lock); return NULL; } old_hwpt = igroup->hwpt; if (hwpt_is_paging(hwpt)) { rc = iommufd_group_do_replace_paging(igroup, to_hwpt_paging(hwpt)); if (rc) goto err_unlock; } rc = iommu_group_replace_domain(igroup->group, hwpt->domain); if (rc) goto err_unresv; if (hwpt_is_paging(old_hwpt) && (!hwpt_is_paging(hwpt) || to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas)) iommufd_group_remove_reserved_iova(igroup, to_hwpt_paging(old_hwpt)); igroup->hwpt = hwpt; num_devices = list_count_nodes(&igroup->device_list); /* * Move the refcounts held by the device_list to the new hwpt. Retain a * refcount for this thread as the caller will free it. */ refcount_add(num_devices, &hwpt->obj.users); if (num_devices > 1) WARN_ON(refcount_sub_and_test(num_devices - 1, &old_hwpt->obj.users)); mutex_unlock(&idev->igroup->lock); /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, to_hwpt_paging(old_hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); } typedef struct iommufd_hw_pagetable *(*attach_fn)( struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); /* * When automatically managing the domains we search for a compatible domain in * the iopt and if one is found use it, otherwise create a new domain. * Automatic domain selection will never pick a manually created domain. */ static struct iommufd_hw_pagetable * iommufd_device_auto_get_domain(struct iommufd_device *idev, struct iommufd_ioas *ioas, u32 *pt_id, attach_fn do_attach) { /* * iommufd_hw_pagetable_attach() is called by * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as * iommufd_device_do_attach(). So if we are in this mode then we prefer * to use the immediate_attach path as it supports drivers that can't * directly allocate a domain. */ bool immediate_attach = do_attach == iommufd_device_do_attach; struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; /* * There is no differentiation when domains are allocated, so any domain * that is willing to attach to the device is interchangeable with any * other. */ mutex_lock(&ioas->mutex); list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { if (!hwpt_paging->auto_domain) continue; hwpt = &hwpt_paging->common; if (!iommufd_lock_obj(&hwpt->obj)) continue; destroy_hwpt = (*do_attach)(idev, hwpt); if (IS_ERR(destroy_hwpt)) { iommufd_put_object(idev->ictx, &hwpt->obj); /* * -EINVAL means the domain is incompatible with the * device. Other error codes should propagate to * userspace as failure. Success means the domain is * attached. */ if (PTR_ERR(destroy_hwpt) == -EINVAL) continue; goto out_unlock; } *pt_id = hwpt->obj.id; iommufd_put_object(idev->ictx, &hwpt->obj); goto out_unlock; } hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, immediate_attach, NULL); if (IS_ERR(hwpt_paging)) { destroy_hwpt = ERR_CAST(hwpt_paging); goto out_unlock; } hwpt = &hwpt_paging->common; if (!immediate_attach) { destroy_hwpt = (*do_attach)(idev, hwpt); if (IS_ERR(destroy_hwpt)) goto out_abort; } else { destroy_hwpt = NULL; } hwpt_paging->auto_domain = true; *pt_id = hwpt->obj.id; iommufd_object_finalize(idev->ictx, &hwpt->obj); mutex_unlock(&ioas->mutex); return destroy_hwpt; out_abort: iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); out_unlock: mutex_unlock(&ioas->mutex); return destroy_hwpt; } static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, attach_fn do_attach) { struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); if (IS_ERR(pt_obj)) return PTR_ERR(pt_obj); switch (pt_obj->type) { case IOMMUFD_OBJ_HWPT_NESTED: case IOMMUFD_OBJ_HWPT_PAGING: { struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); destroy_hwpt = (*do_attach)(idev, hwpt); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } case IOMMUFD_OBJ_IOAS: { struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, do_attach); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } default: destroy_hwpt = ERR_PTR(-EINVAL); goto out_put_pt_obj; } iommufd_put_object(idev->ictx, pt_obj); /* This destruction has to be after we unlock everything */ if (destroy_hwpt) iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); return 0; out_put_pt_obj: iommufd_put_object(idev->ictx, pt_obj); return PTR_ERR(destroy_hwpt); } /** * iommufd_device_attach - Connect a device to an iommu_domain * @idev: device to attach * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * * This connects the device to an iommu_domain, either automatically or manually * selected. Once this completes the device could do DMA. * * The caller should return the resulting pt_id back to userspace. * This function is undone by calling iommufd_device_detach(). */ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) { int rc; rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); if (rc) return rc; /* * Pairs with iommufd_device_detach() - catches caller bugs attempting * to destroy a device with an attachment. */ refcount_inc(&idev->obj.users); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); /** * iommufd_device_replace - Change the device's iommu_domain * @idev: device to change * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * * This is the same as:: * * iommufd_device_detach(); * iommufd_device_attach(); * * If it fails then no change is made to the attachment. The iommu driver may * implement this so there is no disruption in translation. This can only be * called if iommufd_device_attach() has already succeeded. */ int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) { return iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_replace); } EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); /** * iommufd_device_detach - Disconnect a device to an iommu_domain * @idev: device to detach * * Undo iommufd_device_attach(). This disconnects the idev from the previously * attached pt_id. The device returns back to a blocked DMA translation. */ void iommufd_device_detach(struct iommufd_device *idev) { struct iommufd_hw_pagetable *hwpt; hwpt = iommufd_hw_pagetable_detach(idev); iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); /* * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. */ static int iommufd_access_change_ioas(struct iommufd_access *access, struct iommufd_ioas *new_ioas) { u32 iopt_access_list_id = access->iopt_access_list_id; struct iommufd_ioas *cur_ioas = access->ioas; int rc; lockdep_assert_held(&access->ioas_lock); /* We are racing with a concurrent detach, bail */ if (cur_ioas != access->ioas_unpin) return -EBUSY; if (cur_ioas == new_ioas) return 0; /* * Set ioas to NULL to block any further iommufd_access_pin_pages(). * iommufd_access_unpin_pages() can continue using access->ioas_unpin. */ access->ioas = NULL; if (new_ioas) { rc = iopt_add_access(&new_ioas->iopt, access); if (rc) { access->ioas = cur_ioas; return rc; } refcount_inc(&new_ioas->obj.users); } if (cur_ioas) { if (access->ops->unmap) { mutex_unlock(&access->ioas_lock); access->ops->unmap(access->data, 0, ULONG_MAX); mutex_lock(&access->ioas_lock); } iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); refcount_dec(&cur_ioas->obj.users); } access->ioas = new_ioas; access->ioas_unpin = new_ioas; return 0; } static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) { struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); int rc; if (IS_ERR(ioas)) return PTR_ERR(ioas); rc = iommufd_access_change_ioas(access, ioas); iommufd_put_object(access->ictx, &ioas->obj); return rc; } void iommufd_access_destroy_object(struct iommufd_object *obj) { struct iommufd_access *access = container_of(obj, struct iommufd_access, obj); mutex_lock(&access->ioas_lock); if (access->ioas) WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); iommufd_ctx_put(access->ictx); } /** * iommufd_access_create - Create an iommufd_access * @ictx: iommufd file descriptor * @ops: Driver's ops to associate with the access * @data: Opaque data to pass into ops functions * @id: Output ID number to return to userspace for this access * * An iommufd_access allows a driver to read/write to the IOAS without using * DMA. The underlying CPU memory can be accessed using the * iommufd_access_pin_pages() or iommufd_access_rw() functions. * * The provided ops are required to use iommufd_access_pin_pages(). */ struct iommufd_access * iommufd_access_create(struct iommufd_ctx *ictx, const struct iommufd_access_ops *ops, void *data, u32 *id) { struct iommufd_access *access; /* * There is no uAPI for the access object, but to keep things symmetric * use the object infrastructure anyhow. */ access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); if (IS_ERR(access)) return access; access->data = data; access->ops = ops; if (ops->needs_pin_pages) access->iova_alignment = PAGE_SIZE; else access->iova_alignment = 1; /* The calling driver is a user until iommufd_access_destroy() */ refcount_inc(&access->obj.users); access->ictx = ictx; iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; mutex_init(&access->ioas_lock); return access; } EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); /** * iommufd_access_destroy - Destroy an iommufd_access * @access: The access to destroy * * The caller must stop using the access before destroying it. */ void iommufd_access_destroy(struct iommufd_access *access) { iommufd_object_destroy_user(access->ictx, &access->obj); } EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); void iommufd_access_detach(struct iommufd_access *access) { mutex_lock(&access->ioas_lock); if (WARN_ON(!access->ioas)) { mutex_unlock(&access->ioas_lock); return; } WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { int rc; mutex_lock(&access->ioas_lock); if (WARN_ON(access->ioas)) { mutex_unlock(&access->ioas_lock); return -EINVAL; } rc = iommufd_access_change_ioas_id(access, ioas_id); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) { int rc; mutex_lock(&access->ioas_lock); if (!access->ioas) { mutex_unlock(&access->ioas_lock); return -ENOENT; } rc = iommufd_access_change_ioas_id(access, ioas_id); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); /** * iommufd_access_notify_unmap - Notify users of an iopt to stop using it * @iopt: iopt to work on * @iova: Starting iova in the iopt * @length: Number of bytes * * After this function returns there should be no users attached to the pages * linked to this iopt that intersect with iova,length. Anyone that has attached * a user through iopt_access_pages() needs to detach it through * iommufd_access_unpin_pages() before this function returns. * * iommufd_access_destroy() will wait for any outstanding unmap callback to * complete. Once iommufd_access_destroy() no unmap ops are running or will * run in the future. Due to this a driver must not create locking that prevents * unmap to complete while iommufd_access_destroy() is running. */ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, unsigned long length) { struct iommufd_ioas *ioas = container_of(iopt, struct iommufd_ioas, iopt); struct iommufd_access *access; unsigned long index; xa_lock(&ioas->iopt.access_list); xa_for_each(&ioas->iopt.access_list, index, access) { if (!iommufd_lock_obj(&access->obj)) continue; xa_unlock(&ioas->iopt.access_list); access->ops->unmap(access->data, iova, length); iommufd_put_object(access->ictx, &access->obj); xa_lock(&ioas->iopt.access_list); } xa_unlock(&ioas->iopt.access_list); } /** * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages * @access: IOAS access to act on * @iova: Starting IOVA * @length: Number of bytes to access * * Return the struct page's. The caller must stop accessing them before calling * this. The iova/length must exactly match the one provided to access_pages. */ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; if (WARN_ON(!length) || WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) return; mutex_lock(&access->ioas_lock); /* * The driver must be doing something wrong if it calls this before an * iommufd_access_attach() or after an iommufd_access_detach(). */ if (WARN_ON(!access->ioas_unpin)) { mutex_unlock(&access->ioas_lock); return; } iopt = &access->ioas_unpin->iopt; down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( area, iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, min(last_iova, iopt_area_last_iova(area)))); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) { if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) return false; if (!iopt_area_contig_done(iter) && (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % PAGE_SIZE) != (PAGE_SIZE - 1)) return false; return true; } static bool check_area_prot(struct iopt_area *area, unsigned int flags) { if (flags & IOMMUFD_ACCESS_RW_WRITE) return area->iommu_prot & IOMMU_WRITE; return area->iommu_prot & IOMMU_READ; } /** * iommufd_access_pin_pages() - Return a list of pages under the iova * @access: IOAS access to act on * @iova: Starting IOVA * @length: Number of bytes to access * @out_pages: Output page list * @flags: IOPMMUFD_ACCESS_RW_* flags * * Reads @length bytes starting at iova and returns the struct page * pointers. * These can be kmap'd by the caller for CPU access. * * The caller must perform iommufd_access_unpin_pages() when done to balance * this. * * This API always requires a page aligned iova. This happens naturally if the * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However * smaller alignments have corner cases where this API can fail on otherwise * aligned iova. */ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; int rc; /* Driver's ops don't support pin_pages */ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) return -EINVAL; if (!length) return -EINVAL; if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; mutex_lock(&access->ioas_lock); if (!access->ioas) { mutex_unlock(&access->ioas_lock); return -ENOENT; } iopt = &access->ioas->iopt; down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); unsigned long last_index = iopt_area_iova_to_index(area, last); unsigned long index = iopt_area_iova_to_index(area, iter.cur_iova); if (area->prevent_access || !iopt_area_contig_is_aligned(&iter)) { rc = -EINVAL; goto err_remove; } if (!check_area_prot(area, flags)) { rc = -EPERM; goto err_remove; } rc = iopt_area_add_access(area, index, last_index, out_pages, flags); if (rc) goto err_remove; out_pages += last_index - index + 1; } if (!iopt_area_contig_done(&iter)) { rc = -ENOENT; goto err_remove; } up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); return 0; err_remove: if (iova < iter.cur_iova) { last_iova = iter.cur_iova - 1; iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( area, iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, min(last_iova, iopt_area_last_iova(area)))); } up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); /** * iommufd_access_rw - Read or write data under the iova * @access: IOAS access to act on * @iova: Starting IOVA * @data: Kernel buffer to copy to/from * @length: Number of bytes to access * @flags: IOMMUFD_ACCESS_RW_* flags * * Copy kernel to/from data into the range given by IOVA/length. If flags * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized * by changing it into copy_to/from_user(). */ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t length, unsigned int flags) { struct iopt_area_contig_iter iter; struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; int rc; if (!length) return -EINVAL; if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; mutex_lock(&access->ioas_lock); if (!access->ioas) { mutex_unlock(&access->ioas_lock); return -ENOENT; } iopt = &access->ioas->iopt; down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); unsigned long bytes = (last - iter.cur_iova) + 1; if (area->prevent_access) { rc = -EINVAL; goto err_out; } if (!check_area_prot(area, flags)) { rc = -EPERM; goto err_out; } rc = iopt_pages_rw_access( area->pages, iopt_area_start_byte(area, iter.cur_iova), data, bytes, flags); if (rc) goto err_out; data += bytes; } if (!iopt_area_contig_done(&iter)) rc = -ENOENT; err_out: up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) { struct iommu_hw_info *cmd = ucmd->cmd; void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); const struct iommu_ops *ops; struct iommufd_device *idev; unsigned int data_len; unsigned int copy_len; void *data; int rc; if (cmd->flags || cmd->__reserved) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); if (IS_ERR(idev)) return PTR_ERR(idev); ops = dev_iommu_ops(idev->dev); if (ops->hw_info) { data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); if (IS_ERR(data)) { rc = PTR_ERR(data); goto out_put; } /* * drivers that have hw_info callback should have a unique * iommu_hw_info_type. */ if (WARN_ON_ONCE(cmd->out_data_type == IOMMU_HW_INFO_TYPE_NONE)) { rc = -ENODEV; goto out_free; } } else { cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; data_len = 0; data = NULL; } copy_len = min(cmd->data_len, data_len); if (copy_to_user(user_ptr, data, copy_len)) { rc = -EFAULT; goto out_free; } /* * Zero the trailing bytes if the user buffer is bigger than the * data size kernel actually has. */ if (copy_len < cmd->data_len) { if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { rc = -EFAULT; goto out_free; } } /* * We return the length the kernel supports so userspace may know what * the kernel capability is. It could be larger than the input buffer. */ cmd->data_len = data_len; cmd->out_capabilities = 0; if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); out_put: iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } |
83 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_STRUCT_H #define _LINUX_FS_STRUCT_H #include <linux/path.h> #include <linux/spinlock.h> #include <linux/seqlock.h> struct fs_struct { int users; spinlock_t lock; seqcount_spinlock_t seq; int umask; int in_exec; struct path root, pwd; } __randomize_layout; extern struct kmem_cache *fs_cachep; extern void exit_fs(struct task_struct *); extern void set_fs_root(struct fs_struct *, const struct path *); extern void set_fs_pwd(struct fs_struct *, const struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void free_fs_struct(struct fs_struct *); extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { spin_lock(&fs->lock); *root = fs->root; path_get(root); spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); spin_unlock(&fs->lock); } extern bool current_chrooted(void); #endif /* _LINUX_FS_STRUCT_H */ |
58 17 2 15 28 28 15 1 8 7 7 8 8 3 1 2 16 75 13 1 66 16 3 1 13 14 30 40 3 2 1 1 45 45 14 33 48 1 34 15 2 45 1 10 1 9 14 1 11 1 1 11 1 1 9 23 23 63 63 1 1 1 1 14 2 10 2 10 1 10 1 2 8 17 2 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 | /* * linux/fs/hfs/inode.c * * Copyright (C) 1995-1997 Paul H. Hargrove * (C) 2003 Ardis Technologies <roman@ardistech.com> * This file may be distributed under the terms of the GNU General Public License. * * This file contains inode-related functions which do not depend on * which scheme is being used to represent forks. * * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds */ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/uio.h> #include <linux/xattr.h> #include <linux/blkdev.h> #include "hfs_fs.h" #include "btree.h" static const struct file_operations hfs_file_operations; static const struct inode_operations hfs_file_inode_operations; /*================ Variable-like macros ================*/ #define HFS_VALID_MODE_BITS (S_IFREG | S_IFDIR | S_IRWXUGO) static int hfs_read_folio(struct file *file, struct folio *folio) { return block_read_full_folio(folio, hfs_get_block); } static void hfs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; if (to > inode->i_size) { truncate_pagecache(inode, inode->i_size); hfs_file_truncate(inode); } } int hfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata) { int ret; *pagep = NULL; ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata, hfs_get_block, &HFS_I(mapping->host)->phys_size); if (unlikely(ret)) hfs_write_failed(mapping, pos + len); return ret; } static sector_t hfs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, hfs_get_block); } static bool hfs_release_folio(struct folio *folio, gfp_t mask) { struct inode *inode = folio->mapping->host; struct super_block *sb = inode->i_sb; struct hfs_btree *tree; struct hfs_bnode *node; u32 nidx; int i; bool res = true; switch (inode->i_ino) { case HFS_EXT_CNID: tree = HFS_SB(sb)->ext_tree; break; case HFS_CAT_CNID: tree = HFS_SB(sb)->cat_tree; break; default: BUG(); return false; } if (!tree) return false; if (tree->node_size >= PAGE_SIZE) { nidx = folio->index >> (tree->node_size_shift - PAGE_SHIFT); spin_lock(&tree->hash_lock); node = hfs_bnode_findhash(tree, nidx); if (!node) ; else if (atomic_read(&node->refcnt)) res = false; if (res && node) { hfs_bnode_unhash(node); hfs_bnode_free(node); } spin_unlock(&tree->hash_lock); } else { nidx = folio->index << (PAGE_SHIFT - tree->node_size_shift); i = 1 << (PAGE_SHIFT - tree->node_size_shift); spin_lock(&tree->hash_lock); do { node = hfs_bnode_findhash(tree, nidx++); if (!node) continue; if (atomic_read(&node->refcnt)) { res = false; break; } hfs_bnode_unhash(node); hfs_bnode_free(node); } while (--i && nidx < tree->node_count); spin_unlock(&tree->hash_lock); } return res ? try_to_free_buffers(folio) : false; } static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); ssize_t ret; ret = blockdev_direct_IO(iocb, inode, iter, hfs_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = iocb->ki_pos + count; if (end > isize) hfs_write_failed(mapping, end); } return ret; } static int hfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { return mpage_writepages(mapping, wbc, hfs_get_block); } const struct address_space_operations hfs_btree_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = hfs_read_folio, .writepages = hfs_writepages, .write_begin = hfs_write_begin, .write_end = generic_write_end, .migrate_folio = buffer_migrate_folio, .bmap = hfs_bmap, .release_folio = hfs_release_folio, }; const struct address_space_operations hfs_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = hfs_read_folio, .write_begin = hfs_write_begin, .write_end = generic_write_end, .bmap = hfs_bmap, .direct_IO = hfs_direct_IO, .writepages = hfs_writepages, .migrate_folio = buffer_migrate_folio, }; /* * hfs_new_inode */ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t mode) { struct super_block *sb = dir->i_sb; struct inode *inode = new_inode(sb); if (!inode) return NULL; mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); spin_lock_init(&HFS_I(inode)->open_dir_lock); hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); inode->i_ino = HFS_SB(sb)->next_id++; inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); set_nlink(inode, 1); simple_inode_init_ts(inode); HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; HFS_I(inode)->fs_blocks = 0; if (S_ISDIR(mode)) { inode->i_size = 2; HFS_SB(sb)->folder_count++; if (dir->i_ino == HFS_ROOT_CNID) HFS_SB(sb)->root_dirs++; inode->i_op = &hfs_dir_inode_operations; inode->i_fop = &hfs_dir_operations; inode->i_mode |= S_IRWXUGO; inode->i_mode &= ~HFS_SB(inode->i_sb)->s_dir_umask; } else if (S_ISREG(mode)) { HFS_I(inode)->clump_blocks = HFS_SB(sb)->clumpablks; HFS_SB(sb)->file_count++; if (dir->i_ino == HFS_ROOT_CNID) HFS_SB(sb)->root_files++; inode->i_op = &hfs_file_inode_operations; inode->i_fop = &hfs_file_operations; inode->i_mapping->a_ops = &hfs_aops; inode->i_mode |= S_IRUGO|S_IXUGO; if (mode & S_IWUSR) inode->i_mode |= S_IWUGO; inode->i_mode &= ~HFS_SB(inode->i_sb)->s_file_umask; HFS_I(inode)->phys_size = 0; HFS_I(inode)->alloc_blocks = 0; HFS_I(inode)->first_blocks = 0; HFS_I(inode)->cached_start = 0; HFS_I(inode)->cached_blocks = 0; memset(HFS_I(inode)->first_extents, 0, sizeof(hfs_extent_rec)); memset(HFS_I(inode)->cached_extents, 0, sizeof(hfs_extent_rec)); } insert_inode_hash(inode); mark_inode_dirty(inode); set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); hfs_mark_mdb_dirty(sb); return inode; } void hfs_delete_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; hfs_dbg(INODE, "delete_inode: %lu\n", inode->i_ino); if (S_ISDIR(inode->i_mode)) { HFS_SB(sb)->folder_count--; if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) HFS_SB(sb)->root_dirs--; set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); hfs_mark_mdb_dirty(sb); return; } HFS_SB(sb)->file_count--; if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) HFS_SB(sb)->root_files--; if (S_ISREG(inode->i_mode)) { if (!inode->i_nlink) { inode->i_size = 0; hfs_file_truncate(inode); } } set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); hfs_mark_mdb_dirty(sb); } void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, __be32 __log_size, __be32 phys_size, u32 clump_size) { struct super_block *sb = inode->i_sb; u32 log_size = be32_to_cpu(__log_size); u16 count; int i; memcpy(HFS_I(inode)->first_extents, ext, sizeof(hfs_extent_rec)); for (count = 0, i = 0; i < 3; i++) count += be16_to_cpu(ext[i].count); HFS_I(inode)->first_blocks = count; inode->i_size = HFS_I(inode)->phys_size = log_size; HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; inode_set_bytes(inode, HFS_I(inode)->fs_blocks << sb->s_blocksize_bits); HFS_I(inode)->alloc_blocks = be32_to_cpu(phys_size) / HFS_SB(sb)->alloc_blksz; HFS_I(inode)->clump_blocks = clump_size / HFS_SB(sb)->alloc_blksz; if (!HFS_I(inode)->clump_blocks) HFS_I(inode)->clump_blocks = HFS_SB(sb)->clumpablks; } struct hfs_iget_data { struct hfs_cat_key *key; hfs_cat_rec *rec; }; static int hfs_test_inode(struct inode *inode, void *data) { struct hfs_iget_data *idata = data; hfs_cat_rec *rec; rec = idata->rec; switch (rec->type) { case HFS_CDR_DIR: return inode->i_ino == be32_to_cpu(rec->dir.DirID); case HFS_CDR_FIL: return inode->i_ino == be32_to_cpu(rec->file.FlNum); default: BUG(); return 1; } } /* * hfs_read_inode */ static int hfs_read_inode(struct inode *inode, void *data) { struct hfs_iget_data *idata = data; struct hfs_sb_info *hsb = HFS_SB(inode->i_sb); hfs_cat_rec *rec; HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); spin_lock_init(&HFS_I(inode)->open_dir_lock); /* Initialize the inode */ inode->i_uid = hsb->s_uid; inode->i_gid = hsb->s_gid; set_nlink(inode, 1); if (idata->key) HFS_I(inode)->cat_key = *idata->key; else HFS_I(inode)->flags |= HFS_FLG_RSRC; HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60; rec = idata->rec; switch (rec->type) { case HFS_CDR_FIL: if (!HFS_IS_RSRC(inode)) { hfs_inode_read_fork(inode, rec->file.ExtRec, rec->file.LgLen, rec->file.PyLen, be16_to_cpu(rec->file.ClpSize)); } else { hfs_inode_read_fork(inode, rec->file.RExtRec, rec->file.RLgLen, rec->file.RPyLen, be16_to_cpu(rec->file.ClpSize)); } inode->i_ino = be32_to_cpu(rec->file.FlNum); inode->i_mode = S_IRUGO | S_IXUGO; if (!(rec->file.Flags & HFS_FIL_LOCK)) inode->i_mode |= S_IWUGO; inode->i_mode &= ~hsb->s_file_umask; inode->i_mode |= S_IFREG; inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, hfs_m_to_utime(rec->file.MdDat)))); inode->i_op = &hfs_file_inode_operations; inode->i_fop = &hfs_file_operations; inode->i_mapping->a_ops = &hfs_aops; break; case HFS_CDR_DIR: inode->i_ino = be32_to_cpu(rec->dir.DirID); inode->i_size = be16_to_cpu(rec->dir.Val) + 2; HFS_I(inode)->fs_blocks = 0; inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask); inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, hfs_m_to_utime(rec->dir.MdDat)))); inode->i_op = &hfs_dir_inode_operations; inode->i_fop = &hfs_dir_operations; break; default: make_bad_inode(inode); } return 0; } /* * __hfs_iget() * * Given the MDB for a HFS filesystem, a 'key' and an 'entry' in * the catalog B-tree and the 'type' of the desired file return the * inode for that file/directory or NULL. Note that 'type' indicates * whether we want the actual file or directory, or the corresponding * metadata (AppleDouble header file or CAP metadata file). */ struct inode *hfs_iget(struct super_block *sb, struct hfs_cat_key *key, hfs_cat_rec *rec) { struct hfs_iget_data data = { key, rec }; struct inode *inode; u32 cnid; switch (rec->type) { case HFS_CDR_DIR: cnid = be32_to_cpu(rec->dir.DirID); break; case HFS_CDR_FIL: cnid = be32_to_cpu(rec->file.FlNum); break; default: return NULL; } inode = iget5_locked(sb, cnid, hfs_test_inode, hfs_read_inode, &data); if (inode && (inode->i_state & I_NEW)) unlock_new_inode(inode); return inode; } void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext, __be32 *log_size, __be32 *phys_size) { memcpy(ext, HFS_I(inode)->first_extents, sizeof(hfs_extent_rec)); if (log_size) *log_size = cpu_to_be32(inode->i_size); if (phys_size) *phys_size = cpu_to_be32(HFS_I(inode)->alloc_blocks * HFS_SB(inode->i_sb)->alloc_blksz); } int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct inode *main_inode = inode; struct hfs_find_data fd; hfs_cat_rec rec; int res; hfs_dbg(INODE, "hfs_write_inode: %lu\n", inode->i_ino); res = hfs_ext_write_extent(inode); if (res) return res; if (inode->i_ino < HFS_FIRSTUSER_CNID) { switch (inode->i_ino) { case HFS_ROOT_CNID: break; case HFS_EXT_CNID: hfs_btree_write(HFS_SB(inode->i_sb)->ext_tree); return 0; case HFS_CAT_CNID: hfs_btree_write(HFS_SB(inode->i_sb)->cat_tree); return 0; default: BUG(); return -EIO; } } if (HFS_IS_RSRC(inode)) main_inode = HFS_I(inode)->rsrc_inode; if (!main_inode->i_nlink) return 0; if (hfs_find_init(HFS_SB(main_inode->i_sb)->cat_tree, &fd)) /* panic? */ return -EIO; res = -EIO; if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) goto out; fd.search_key->cat = HFS_I(main_inode)->cat_key; if (hfs_brec_find(&fd)) goto out; if (S_ISDIR(main_inode->i_mode)) { if (fd.entrylength < sizeof(struct hfs_cat_dir)) goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || be32_to_cpu(rec.dir.DirID) != inode->i_ino) { } rec.dir.MdDat = hfs_u_to_mtime(inode_get_mtime(inode)); rec.dir.Val = cpu_to_be16(inode->i_size - 2); hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); } else if (HFS_IS_RSRC(inode)) { if (fd.entrylength < sizeof(struct hfs_cat_file)) goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); hfs_inode_write_fork(inode, rec.file.RExtRec, &rec.file.RLgLen, &rec.file.RPyLen); hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } else { if (fd.entrylength < sizeof(struct hfs_cat_file)) goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || be32_to_cpu(rec.file.FlNum) != inode->i_ino) { } if (inode->i_mode & S_IWUSR) rec.file.Flags &= ~HFS_FIL_LOCK; else rec.file.Flags |= HFS_FIL_LOCK; hfs_inode_write_fork(inode, rec.file.ExtRec, &rec.file.LgLen, &rec.file.PyLen); rec.file.MdDat = hfs_u_to_mtime(inode_get_mtime(inode)); hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } res = 0; out: hfs_find_exit(&fd); return res; } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; hfs_cat_rec rec; struct hfs_find_data fd; int res; if (HFS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) goto out; inode = HFS_I(dir)->rsrc_inode; if (inode) goto out; inode = new_inode(dir->i_sb); if (!inode) return ERR_PTR(-ENOMEM); res = hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); if (res) { iput(inode); return ERR_PTR(res); } fd.search_key->cat = HFS_I(dir)->cat_key; res = hfs_brec_read(&fd, &rec, sizeof(rec)); if (!res) { struct hfs_iget_data idata = { NULL, &rec }; hfs_read_inode(inode, &idata); } hfs_find_exit(&fd); if (res) { iput(inode); return ERR_PTR(res); } HFS_I(inode)->rsrc_inode = dir; HFS_I(dir)->rsrc_inode = inode; igrab(dir); inode_fake_hash(inode); mark_inode_dirty(inode); dont_mount(dentry); out: return d_splice_alias(inode, dentry); } void hfs_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; iput(HFS_I(inode)->rsrc_inode); } } static int hfs_file_open(struct inode *inode, struct file *file) { if (HFS_IS_RSRC(inode)) inode = HFS_I(inode)->rsrc_inode; atomic_inc(&HFS_I(inode)->opencnt); return 0; } static int hfs_file_release(struct inode *inode, struct file *file) { //struct super_block *sb = inode->i_sb; if (HFS_IS_RSRC(inode)) inode = HFS_I(inode)->rsrc_inode; if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { inode_lock(inode); hfs_file_truncate(inode); //if (inode->i_flags & S_DEAD) { // hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); // hfs_delete_inode(inode); //} inode_unlock(inode); } return 0; } /* * hfs_notify_change() * * Based very closely on fs/msdos/inode.c by Werner Almesberger * * This is the notify_change() field in the super_operations structure * for HFS file systems. The purpose is to take that changes made to * an inode and apply then in a filesystem-dependent manner. In this * case the process has a few of tasks to do: * 1) prevent changes to the i_uid and i_gid fields. * 2) map file permissions to the closest allowable permissions * 3) Since multiple Linux files can share the same on-disk inode under * HFS (for instance the data and resource forks of a file) a change * to permissions must be applied to all other in-core inodes which * correspond to the same HFS file. */ int hfs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct hfs_sb_info *hsb = HFS_SB(inode->i_sb); int error; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); /* basic permission checks */ if (error) return error; /* no uig/gid changes and limit which mode bits can be set */ if (((attr->ia_valid & ATTR_UID) && (!uid_eq(attr->ia_uid, hsb->s_uid))) || ((attr->ia_valid & ATTR_GID) && (!gid_eq(attr->ia_gid, hsb->s_gid))) || ((attr->ia_valid & ATTR_MODE) && ((S_ISDIR(inode->i_mode) && (attr->ia_mode != inode->i_mode)) || (attr->ia_mode & ~HFS_VALID_MODE_BITS)))) { return hsb->s_quiet ? 0 : error; } if (attr->ia_valid & ATTR_MODE) { /* Only the 'w' bits can ever change and only all together. */ if (attr->ia_mode & S_IWUSR) attr->ia_mode = inode->i_mode | S_IWUGO; else attr->ia_mode = inode->i_mode & ~S_IWUGO; attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; } if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; truncate_setsize(inode, attr->ia_size); hfs_file_truncate(inode); simple_inode_init_ts(inode); } setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *inode = filp->f_mapping->host; struct super_block * sb; int ret, err; ret = file_write_and_wait_range(filp, start, end); if (ret) return ret; inode_lock(inode); /* sync the inode to buffers */ ret = write_inode_now(inode, 0); /* sync the superblock to buffers */ sb = inode->i_sb; flush_delayed_work(&HFS_SB(sb)->mdb_work); /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); if (!ret) ret = err; inode_unlock(inode); return ret; } static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = filemap_splice_read, .fsync = hfs_file_fsync, .open = hfs_file_open, .release = hfs_file_release, }; static const struct inode_operations hfs_file_inode_operations = { .lookup = hfs_file_lookup, .setattr = hfs_inode_setattr, .listxattr = generic_listxattr, }; |
4 4 4 13 13 5 1 3 1 1 1 5 2 2 1 1 8 3 3 3 2 1 3 3 3 3 3 2 2 2 2 2 2 2 2 5 5 5 2 2 2 2 2 2 4 2 2 9 5 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/bio.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/sched/mm.h> #include <crypto/hash.h> #include "messages.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "bio.h" #include "compression.h" #include "fs.h" #include "accessors.h" #include "file-item.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ size) - 1)) #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ PAGE_SIZE)) /* * Set inode's size according to filesystem options. * * @inode: inode we want to update the disk_i_size for * @new_i_size: i_size we want to set to, 0 if we use i_size * * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read() * returns as it is perfectly fine with a file that has holes without hole file * extent items. * * However without NO_HOLES we need to only return the area that is contiguous * from the 0 offset of the file. Otherwise we could end up adjust i_size up * to an extent that has a gap in between. * * Finally new_i_size should only be set in the case of truncate where we're not * ready to use i_size_read() as the limiter yet. */ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) { struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 start, end, i_size; int ret; spin_lock(&inode->lock); i_size = new_i_size ?: i_size_read(&inode->vfs_inode); if (btrfs_fs_incompat(fs_info, NO_HOLES)) { inode->disk_i_size = i_size; goto out_unlock; } ret = find_contiguous_extent_bit(inode->file_extent_tree, 0, &start, &end, EXTENT_DIRTY); if (!ret && start == 0) i_size = min(i_size, end + 1); else i_size = 0; inode->disk_i_size = i_size; out_unlock: spin_unlock(&inode->lock); } /* * Mark range within a file as having a new extent inserted. * * @inode: inode being modified * @start: start file offset of the file extent we've inserted * @len: logical length of the file extent item * * Call when we are inserting a new file extent where there was none before. * Does not need to call this in the case where we're replacing an existing file * extent, however if not sure it's fine to call this multiple times. * * The start and len must match the file extent item, so thus must be sectorsize * aligned. */ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, u64 len) { if (len == 0) return 0; ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) return 0; return set_extent_bit(inode->file_extent_tree, start, start + len - 1, EXTENT_DIRTY, NULL); } /* * Mark an inode range as not having a backing extent. * * @inode: inode being modified * @start: start file offset of the file extent we've inserted * @len: logical length of the file extent item * * Called when we drop a file extent, for example when we truncate. Doesn't * need to be called for cases where we're replacing a file extent, like when * we've COWed a file extent. * * The start and len must match the file extent item, so thus must be sectorsize * aligned. */ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, u64 len) { if (len == 0) return 0; ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || len == (u64)-1); if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) return 0; return clear_extent_bit(inode->file_extent_tree, start, start + len - 1, EXTENT_DIRTY, NULL); } static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes) { ASSERT(IS_ALIGNED(bytes, fs_info->sectorsize)); return (bytes >> fs_info->sectorsize_bits) * fs_info->csum_size; } static size_t csum_size_to_bytes(const struct btrfs_fs_info *fs_info, u32 csum_size) { ASSERT(IS_ALIGNED(csum_size, fs_info->csum_size)); return (csum_size / fs_info->csum_size) << fs_info->sectorsize_bits; } static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info) { u32 max_csum_size = round_down(PAGE_SIZE - sizeof(struct btrfs_ordered_sum), fs_info->csum_size); return csum_size_to_bytes(fs_info, max_csum_size); } /* * Calculate the total size needed to allocate for an ordered sum structure * spanning @bytes in the file. */ static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes) { return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes); } int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 num_bytes) { int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; struct btrfs_path *path; struct extent_buffer *leaf; path = btrfs_alloc_path(); if (!path) return -ENOMEM; file_key.objectid = objectid; file_key.offset = pos; file_key.type = BTRFS_EXTENT_DATA_KEY; ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); if (ret < 0) goto out; leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_bytenr(leaf, item, 0); btrfs_set_file_extent_disk_num_bytes(leaf, item, 0); btrfs_set_file_extent_offset(leaf, item, 0); btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, item, num_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_compression(leaf, item, 0); btrfs_set_file_extent_encryption(leaf, item, 0); btrfs_set_file_extent_other_encoding(leaf, item, 0); btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); return ret; } static struct btrfs_csum_item * btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, int cow) { struct btrfs_fs_info *fs_info = root->fs_info; int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_csum_item *item; struct extent_buffer *leaf; u64 csum_offset = 0; const u32 csum_size = fs_info->csum_size; int csums_in_item; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; file_key.offset = bytenr; file_key.type = BTRFS_EXTENT_CSUM_KEY; ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; leaf = path->nodes[0]; if (ret > 0) { ret = 1; if (path->slots[0] == 0) goto fail; path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.type != BTRFS_EXTENT_CSUM_KEY) goto fail; csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; csums_in_item = btrfs_item_size(leaf, path->slots[0]); csums_in_item /= csum_size; if (csum_offset == csums_in_item) { ret = -EFBIG; goto fail; } else if (csum_offset > csums_in_item) { goto fail; } } item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * csum_size); return item; fail: if (ret > 0) ret = -ENOENT; return ERR_PTR(ret); } int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, u64 offset, int mod) { struct btrfs_key file_key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; file_key.objectid = objectid; file_key.offset = offset; file_key.type = BTRFS_EXTENT_DATA_KEY; return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); } /* * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and * store the result to @dst. * * Return >0 for the number of sectors we found. * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum * for it. Caller may want to try next sector until one range is hit. * Return <0 for fatal error. */ static int search_csum_tree(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 disk_bytenr, u64 len, u8 *dst) { struct btrfs_root *csum_root; struct btrfs_csum_item *item = NULL; struct btrfs_key key; const u32 sectorsize = fs_info->sectorsize; const u32 csum_size = fs_info->csum_size; u32 itemsize; int ret; u64 csum_start; u64 csum_len; ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) && IS_ALIGNED(len, sectorsize)); /* Check if the current csum item covers disk_bytenr */ if (path->nodes[0]) { item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_csum_item); btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); itemsize = btrfs_item_size(path->nodes[0], path->slots[0]); csum_start = key.offset; csum_len = (itemsize / csum_size) * sectorsize; if (in_range(disk_bytenr, csum_start, csum_len)) goto found; } /* Current item doesn't contain the desired range, search again */ btrfs_release_path(path); csum_root = btrfs_csum_root(fs_info, disk_bytenr); item = btrfs_lookup_csum(NULL, csum_root, path, disk_bytenr, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); goto out; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); itemsize = btrfs_item_size(path->nodes[0], path->slots[0]); csum_start = key.offset; csum_len = (itemsize / csum_size) * sectorsize; ASSERT(in_range(disk_bytenr, csum_start, csum_len)); found: ret = (min(csum_start + csum_len, disk_bytenr + len) - disk_bytenr) >> fs_info->sectorsize_bits; read_extent_buffer(path->nodes[0], dst, (unsigned long)item, ret * csum_size); out: if (ret == -ENOENT || ret == -EFBIG) ret = 0; return ret; } /* * Lookup the checksum for the read bio in csum tree. * * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. */ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) { struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct bio *bio = &bbio->bio; struct btrfs_path *path; const u32 sectorsize = fs_info->sectorsize; const u32 csum_size = fs_info->csum_size; u32 orig_len = bio->bi_iter.bi_size; u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; blk_status_t ret = BLK_STS_OK; u32 bio_offset = 0; if ((inode->flags & BTRFS_INODE_NODATASUM) || test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) return BLK_STS_OK; /* * This function is only called for read bio. * * This means two things: * - All our csums should only be in csum tree * No ordered extents csums, as ordered extents are only for write * path. * - No need to bother any other info from bvec * Since we're looking up csums, the only important info is the * disk_bytenr and the length, which can be extracted from bi_iter * directly. */ ASSERT(bio_op(bio) == REQ_OP_READ); path = btrfs_alloc_path(); if (!path) return BLK_STS_RESOURCE; if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); if (!bbio->csum) { btrfs_free_path(path); return BLK_STS_RESOURCE; } } else { bbio->csum = bbio->csum_inline; } /* * If requested number of sectors is larger than one leaf can contain, * kick the readahead for csum tree. */ if (nblocks > fs_info->csums_per_leaf) path->reada = READA_FORWARD; /* * the free space stuff is only read when it hasn't been * updated in the current transaction. So, we can safely * read from the commit root and sidestep a nasty deadlock * between reading the free space cache and updating the csum tree. */ if (btrfs_is_free_space_inode(inode)) { path->search_commit_root = 1; path->skip_locking = 1; } while (bio_offset < orig_len) { int count; u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset; u8 *csum_dst = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) * csum_size; count = search_csum_tree(fs_info, path, cur_disk_bytenr, orig_len - bio_offset, csum_dst); if (count < 0) { ret = errno_to_blk_status(count); if (bbio->csum != bbio->csum_inline) kfree(bbio->csum); bbio->csum = NULL; break; } /* * We didn't find a csum for this range. We need to make sure * we complain loudly about this, because we are not NODATASUM. * * However for the DATA_RELOC inode we could potentially be * relocating data extents for a NODATASUM inode, so the inode * itself won't be marked with NODATASUM, but the extent we're * copying is in fact NODATASUM. If we don't find a csum we * assume this is the case. */ if (count == 0) { memset(csum_dst, 0, csum_size); count = 1; if (btrfs_root_id(inode->root) == BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 file_offset = bbio->file_offset + bio_offset; set_extent_bit(&inode->io_tree, file_offset, file_offset + sectorsize - 1, EXTENT_NODATASUM, NULL); } else { btrfs_warn_rl(fs_info, "csum hole found for disk bytenr range [%llu, %llu)", cur_disk_bytenr, cur_disk_bytenr + sectorsize); } } bio_offset += count * sectorsize; } btrfs_free_path(path); return ret; } /* * Search for checksums for a given logical range. * * @root: The root where to look for checksums. * @start: Logical address of target checksum range. * @end: End offset (inclusive) of the target checksum range. * @list: List for adding each checksum that was found. * Can be NULL in case the caller only wants to check if * there any checksums for the range. * @nowait: Indicate if the search must be non-blocking or not. * * Return < 0 on error, 0 if no checksums were found, or 1 if checksums were * found. */ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, bool nowait) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_ordered_sum *sums; struct btrfs_csum_item *item; int ret; bool found_csums = false; ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && IS_ALIGNED(end + 1, fs_info->sectorsize)); path = btrfs_alloc_path(); if (!path) return -ENOMEM; path->nowait = nowait; key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.offset = start; key.type = BTRFS_EXTENT_CSUM_KEY; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; if (ret > 0 && path->slots[0] > 0) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); /* * There are two cases we can hit here for the previous csum * item: * * |<- search range ->| * |<- csum item ->| * * Or * |<- search range ->| * |<- csum item ->| * * Check if the previous csum item covers the leading part of * the search range. If so we have to start from previous csum * item. */ if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && key.type == BTRFS_EXTENT_CSUM_KEY) { if (bytes_to_csum_size(fs_info, start - key.offset) < btrfs_item_size(leaf, path->slots[0] - 1)) path->slots[0]--; } } while (start <= end) { u64 csum_end; leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto out; if (ret > 0) break; leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || key.type != BTRFS_EXTENT_CSUM_KEY || key.offset > end) break; if (key.offset > start) start = key.offset; csum_end = key.offset + csum_size_to_bytes(fs_info, btrfs_item_size(leaf, path->slots[0])); if (csum_end <= start) { path->slots[0]++; continue; } found_csums = true; if (!list) goto out; csum_end = min(csum_end, end + 1); item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_csum_item); while (start < csum_end) { unsigned long offset; size_t size; size = min_t(size_t, csum_end - start, max_ordered_sum_bytes(fs_info)); sums = kzalloc(btrfs_ordered_sum_size(fs_info, size), GFP_NOFS); if (!sums) { ret = -ENOMEM; goto out; } sums->logical = start; sums->len = size; offset = bytes_to_csum_size(fs_info, start - key.offset); read_extent_buffer(path->nodes[0], sums->sums, ((unsigned long)item) + offset, bytes_to_csum_size(fs_info, size)); start += size; list_add_tail(&sums->list, list); } path->slots[0]++; } out: btrfs_free_path(path); if (ret < 0) { if (list) { struct btrfs_ordered_sum *tmp_sums; list_for_each_entry_safe(sums, tmp_sums, list, list) kfree(sums); } return ret; } return found_csums ? 1 : 0; } /* * Do the same work as btrfs_lookup_csums_list(), the difference is in how * we return the result. * * This version will set the corresponding bits in @csum_bitmap to represent * that there is a csum found. * Each bit represents a sector. Thus caller should ensure @csum_buf passed * in is large enough to contain all csums. */ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path, u64 start, u64 end, u8 *csum_buf, unsigned long *csum_bitmap) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_csum_item *item; const u64 orig_start = start; bool free_path = false; int ret; ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && IS_ALIGNED(end + 1, fs_info->sectorsize)); if (!path) { path = btrfs_alloc_path(); if (!path) return -ENOMEM; free_path = true; } /* Check if we can reuse the previous path. */ if (path->nodes[0]) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && key.type == BTRFS_EXTENT_CSUM_KEY && key.offset <= start) goto search_forward; btrfs_release_path(path); } key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.type = BTRFS_EXTENT_CSUM_KEY; key.offset = start; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto fail; if (ret > 0 && path->slots[0] > 0) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); /* * There are two cases we can hit here for the previous csum * item: * * |<- search range ->| * |<- csum item ->| * * Or * |<- search range ->| * |<- csum item ->| * * Check if the previous csum item covers the leading part of * the search range. If so we have to start from previous csum * item. */ if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && key.type == BTRFS_EXTENT_CSUM_KEY) { if (bytes_to_csum_size(fs_info, start - key.offset) < btrfs_item_size(leaf, path->slots[0] - 1)) path->slots[0]--; } } search_forward: while (start <= end) { u64 csum_end; leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto fail; if (ret > 0) break; leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || key.type != BTRFS_EXTENT_CSUM_KEY || key.offset > end) break; if (key.offset > start) start = key.offset; csum_end = key.offset + csum_size_to_bytes(fs_info, btrfs_item_size(leaf, path->slots[0])); if (csum_end <= start) { path->slots[0]++; continue; } csum_end = min(csum_end, end + 1); item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_csum_item); while (start < csum_end) { unsigned long offset; size_t size; u8 *csum_dest = csum_buf + bytes_to_csum_size(fs_info, start - orig_start); size = min_t(size_t, csum_end - start, end + 1 - start); offset = bytes_to_csum_size(fs_info, start - key.offset); read_extent_buffer(path->nodes[0], csum_dest, ((unsigned long)item) + offset, bytes_to_csum_size(fs_info, size)); bitmap_set(csum_bitmap, (start - orig_start) >> fs_info->sectorsize_bits, size >> fs_info->sectorsize_bits); start += size; } path->slots[0]++; } ret = 0; fail: if (free_path) btrfs_free_path(path); return ret; } /* * Calculate checksums of the data contained inside a bio. */ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) { struct btrfs_ordered_extent *ordered = bbio->ordered; struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct bio *bio = &bbio->bio; struct btrfs_ordered_sum *sums; char *data; struct bvec_iter iter; struct bio_vec bvec; int index; unsigned int blockcount; int i; unsigned nofs_flag; nofs_flag = memalloc_nofs_save(); sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), GFP_KERNEL); memalloc_nofs_restore(nofs_flag); if (!sums) return BLK_STS_RESOURCE; sums->len = bio->bi_iter.bi_size; INIT_LIST_HEAD(&sums->list); sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; index = 0; shash->tfm = fs_info->csum_shash; bio_for_each_segment(bvec, bio, iter) { blockcount = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len + fs_info->sectorsize - 1); for (i = 0; i < blockcount; i++) { data = bvec_kmap_local(&bvec); crypto_shash_digest(shash, data + (i * fs_info->sectorsize), fs_info->sectorsize, sums->sums + index); kunmap_local(data); index += fs_info->csum_size; } } bbio->sums = sums; btrfs_add_ordered_sum(ordered, sums); return 0; } /* * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to * record the updated logical address on Zone Append completion. * Allocate just the structure with an empty sums array here for that case. */ blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) { bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS); if (!bbio->sums) return BLK_STS_RESOURCE; bbio->sums->len = bbio->bio.bi_iter.bi_size; bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; btrfs_add_ordered_sum(bbio->ordered, bbio->sums); return 0; } /* * Remove one checksum overlapping a range. * * This expects the key to describe the csum pointed to by the path, and it * expects the csum to overlap the range [bytenr, len] * * The csum should not be entirely contained in the range and the range should * not be entirely contained in the csum. * * This calls btrfs_truncate_item with the correct args based on the overlap, * and fixes up the key as required. */ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_key *key, u64 bytenr, u64 len) { struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_buffer *leaf; const u32 csum_size = fs_info->csum_size; u64 csum_end; u64 end_byte = bytenr + len; u32 blocksize_bits = fs_info->sectorsize_bits; leaf = path->nodes[0]; csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size; csum_end <<= blocksize_bits; csum_end += key->offset; if (key->offset < bytenr && csum_end <= end_byte) { /* * [ bytenr - len ] * [ ] * [csum ] * A simple truncate off the end of the item */ u32 new_size = (bytenr - key->offset) >> blocksize_bits; new_size *= csum_size; btrfs_truncate_item(trans, path, new_size, 1); } else if (key->offset >= bytenr && csum_end > end_byte && end_byte > key->offset) { /* * [ bytenr - len ] * [ ] * [csum ] * we need to truncate from the beginning of the csum */ u32 new_size = (csum_end - end_byte) >> blocksize_bits; new_size *= csum_size; btrfs_truncate_item(trans, path, new_size, 0); key->offset = end_byte; btrfs_set_item_key_safe(trans, path, key); } else { BUG(); } } /* * Delete the csum items from the csum tree for a given range of bytes. */ int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_path *path; struct btrfs_key key; u64 end_byte = bytenr + len; u64 csum_end; struct extent_buffer *leaf; int ret = 0; const u32 csum_size = fs_info->csum_size; u32 blocksize_bits = fs_info->sectorsize_bits; ASSERT(btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID || btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID); path = btrfs_alloc_path(); if (!path) return -ENOMEM; while (1) { key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.offset = end_byte - 1; key.type = BTRFS_EXTENT_CSUM_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; } else if (ret < 0) { break; } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || key.type != BTRFS_EXTENT_CSUM_KEY) { break; } if (key.offset >= end_byte) break; csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size; csum_end <<= blocksize_bits; csum_end += key.offset; /* this csum ends before we start, we're done */ if (csum_end <= bytenr) break; /* delete the entire item, it is inside our range */ if (key.offset >= bytenr && csum_end <= end_byte) { int del_nr = 1; /* * Check how many csum items preceding this one in this * leaf correspond to our range and then delete them all * at once. */ if (key.offset > bytenr && path->slots[0] > 0) { int slot = path->slots[0] - 1; while (slot >= 0) { struct btrfs_key pk; btrfs_item_key_to_cpu(leaf, &pk, slot); if (pk.offset < bytenr || pk.type != BTRFS_EXTENT_CSUM_KEY || pk.objectid != BTRFS_EXTENT_CSUM_OBJECTID) break; path->slots[0] = slot; del_nr++; key.offset = pk.offset; slot--; } } ret = btrfs_del_items(trans, root, path, path->slots[0], del_nr); if (ret) break; if (key.offset == bytenr) break; } else if (key.offset < bytenr && csum_end > end_byte) { unsigned long offset; unsigned long shift_len; unsigned long item_offset; /* * [ bytenr - len ] * [csum ] * * Our bytes are in the middle of the csum, * we need to split this item and insert a new one. * * But we can't drop the path because the * csum could change, get removed, extended etc. * * The trick here is the max size of a csum item leaves * enough room in the tree block for a single * item header. So, we split the item in place, * adding a new header pointing to the existing * bytes. Then we loop around again and we have * a nicely formed csum item that we can neatly * truncate. */ offset = (bytenr - key.offset) >> blocksize_bits; offset *= csum_size; shift_len = (len >> blocksize_bits) * csum_size; item_offset = btrfs_item_ptr_offset(leaf, path->slots[0]); memzero_extent_buffer(leaf, item_offset + offset, shift_len); key.offset = bytenr; /* * btrfs_split_item returns -EAGAIN when the * item changed size or key */ ret = btrfs_split_item(trans, root, path, &key, offset); if (ret && ret != -EAGAIN) { btrfs_abort_transaction(trans, ret); break; } ret = 0; key.offset = end_byte - 1; } else { truncate_one_csum(trans, path, &key, bytenr, len); if (key.offset < bytenr) break; } btrfs_release_path(path); } btrfs_free_path(path); return ret; } static int find_next_csum_offset(struct btrfs_root *root, struct btrfs_path *path, u64 *next_offset) { const u32 nritems = btrfs_header_nritems(path->nodes[0]); struct btrfs_key found_key; int slot = path->slots[0] + 1; int ret; if (nritems == 0 || slot >= nritems) { ret = btrfs_next_leaf(root, path); if (ret < 0) { return ret; } else if (ret > 0) { *next_offset = (u64)-1; return 0; } slot = path->slots[0]; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || found_key.type != BTRFS_EXTENT_CSUM_KEY) *next_offset = (u64)-1; else *next_offset = found_key.offset; return 0; } int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; u64 next_offset; u64 total_bytes = 0; u64 csum_offset; u64 bytenr; u32 ins_size; int index = 0; int found_next; int ret; const u32 csum_size = fs_info->csum_size; path = btrfs_alloc_path(); if (!path) return -ENOMEM; again: next_offset = (u64)-1; found_next = 0; bytenr = sums->logical + total_bytes; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; file_key.offset = bytenr; file_key.type = BTRFS_EXTENT_CSUM_KEY; item = btrfs_lookup_csum(trans, root, path, bytenr, 1); if (!IS_ERR(item)) { ret = 0; leaf = path->nodes[0]; item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); item_end = (struct btrfs_csum_item *)((char *)item_end + btrfs_item_size(leaf, path->slots[0])); goto found; } ret = PTR_ERR(item); if (ret != -EFBIG && ret != -ENOENT) goto out; if (ret == -EFBIG) { u32 item_size; /* we found one, but it isn't big enough yet */ leaf = path->nodes[0]; item_size = btrfs_item_size(leaf, path->slots[0]); if ((item_size / csum_size) >= MAX_CSUM_ITEMS(fs_info, csum_size)) { /* already at max size, make a new one */ goto insert; } } else { /* We didn't find a csum item, insert one. */ ret = find_next_csum_offset(root, path, &next_offset); if (ret < 0) goto out; found_next = 1; goto insert; } /* * At this point, we know the tree has a checksum item that ends at an * offset matching the start of the checksum range we want to insert. * We try to extend that item as much as possible and then add as many * checksums to it as they fit. * * First check if the leaf has enough free space for at least one * checksum. If it has go directly to the item extension code, otherwise * release the path and do a search for insertion before the extension. */ if (btrfs_leaf_free_space(leaf) >= csum_size) { btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; goto extend_csum; } btrfs_release_path(path); path->search_for_extension = 1; ret = btrfs_search_slot(trans, root, &file_key, path, csum_size, 1); path->search_for_extension = 0; if (ret < 0) goto out; if (ret > 0) { if (path->slots[0] == 0) goto insert; path->slots[0]--; } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; if (found_key.type != BTRFS_EXTENT_CSUM_KEY || found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) { goto insert; } extend_csum: if (csum_offset == btrfs_item_size(leaf, path->slots[0]) / csum_size) { int extend_nr; u64 tmp; u32 diff; tmp = sums->len - total_bytes; tmp >>= fs_info->sectorsize_bits; WARN_ON(tmp < 1); extend_nr = max_t(int, 1, tmp); /* * A log tree can already have checksum items with a subset of * the checksums we are trying to log. This can happen after * doing a sequence of partial writes into prealloc extents and * fsyncs in between, with a full fsync logging a larger subrange * of an extent for which a previous fast fsync logged a smaller * subrange. And this happens in particular due to merging file * extent items when we complete an ordered extent for a range * covered by a prealloc extent - this is done at * btrfs_mark_extent_written(). * * So if we try to extend the previous checksum item, which has * a range that ends at the start of the range we want to insert, * make sure we don't extend beyond the start offset of the next * checksum item. If we are at the last item in the leaf, then * forget the optimization of extending and add a new checksum * item - it is not worth the complexity of releasing the path, * getting the first key for the next leaf, repeat the btree * search, etc, because log trees are temporary anyway and it * would only save a few bytes of leaf space. */ if (btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID) { if (path->slots[0] + 1 >= btrfs_header_nritems(path->nodes[0])) { ret = find_next_csum_offset(root, path, &next_offset); if (ret < 0) goto out; found_next = 1; goto insert; } ret = find_next_csum_offset(root, path, &next_offset); if (ret < 0) goto out; tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits; if (tmp <= INT_MAX) extend_nr = min_t(int, extend_nr, tmp); } diff = (csum_offset + extend_nr) * csum_size; diff = min(diff, MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size); diff = diff - btrfs_item_size(leaf, path->slots[0]); diff = min_t(u32, btrfs_leaf_free_space(leaf), diff); diff /= csum_size; diff *= csum_size; btrfs_extend_item(trans, path, diff); ret = 0; goto csum; } insert: btrfs_release_path(path); csum_offset = 0; if (found_next) { u64 tmp; tmp = sums->len - total_bytes; tmp >>= fs_info->sectorsize_bits; tmp = min(tmp, (next_offset - file_key.offset) >> fs_info->sectorsize_bits); tmp = max_t(u64, 1, tmp); tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size)); ins_size = csum_size * tmp; } else { ins_size = csum_size; } ret = btrfs_insert_empty_item(trans, root, path, &file_key, ins_size); if (ret < 0) goto out; leaf = path->nodes[0]; csum: item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); item_end = (struct btrfs_csum_item *)((unsigned char *)item + btrfs_item_size(leaf, path->slots[0])); item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * csum_size); found: ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits; ins_size *= csum_size; ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, ins_size); write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, ins_size); index += ins_size; ins_size /= csum_size; total_bytes += ins_size * fs_info->sectorsize; btrfs_mark_buffer_dirty(trans, path->nodes[0]); if (total_bytes < sums->len) { btrfs_release_path(path); cond_resched(); goto again; } out: btrfs_free_path(path); return ret; } void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, const struct btrfs_path *path, struct btrfs_file_extent_item *fi, struct extent_map *em) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf = path->nodes[0]; const int slot = path->slots[0]; struct btrfs_key key; u64 extent_start; u64 bytenr; u8 type = btrfs_file_extent_type(leaf, fi); int compress_type = btrfs_file_extent_compression(leaf, fi); btrfs_item_key_to_cpu(leaf, &key, slot); extent_start = key.offset; em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); em->generation = btrfs_file_extent_generation(leaf, fi); if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { em->start = extent_start; em->len = btrfs_file_extent_end(path) - extent_start; em->orig_start = extent_start - btrfs_file_extent_offset(leaf, fi); em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); if (bytenr == 0) { em->block_start = EXTENT_MAP_HOLE; return; } if (compress_type != BTRFS_COMPRESS_NONE) { extent_map_set_compression(em, compress_type); em->block_start = bytenr; em->block_len = em->orig_block_len; } else { bytenr += btrfs_file_extent_offset(leaf, fi); em->block_start = bytenr; em->block_len = em->len; if (type == BTRFS_FILE_EXTENT_PREALLOC) em->flags |= EXTENT_FLAG_PREALLOC; } } else if (type == BTRFS_FILE_EXTENT_INLINE) { /* Tree-checker has ensured this. */ ASSERT(extent_start == 0); em->block_start = EXTENT_MAP_INLINE; em->start = 0; em->len = fs_info->sectorsize; /* * Initialize orig_start and block_len with the same values * as in inode.c:btrfs_get_extent(). */ em->orig_start = EXTENT_MAP_HOLE; em->block_len = (u64)-1; extent_map_set_compression(em, compress_type); } else { btrfs_err(fs_info, "unknown file extent item type %d, inode %llu, offset %llu, " "root %llu", type, btrfs_ino(inode), extent_start, btrfs_root_id(root)); } } /* * Returns the end offset (non inclusive) of the file extent item the given path * points to. If it points to an inline extent, the returned offset is rounded * up to the sector size. */ u64 btrfs_file_extent_end(const struct btrfs_path *path) { const struct extent_buffer *leaf = path->nodes[0]; const int slot = path->slots[0]; struct btrfs_file_extent_item *fi; struct btrfs_key key; u64 end; btrfs_item_key_to_cpu(leaf, &key, slot); ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) end = leaf->fs_info->sectorsize; else end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); return end; } |
1 4 1 6 6 3 1 2 2 2 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | // SPDX-License-Identifier: GPL-2.0-or-later /* * SQ905C subdriver * * Copyright (C) 2009 Theodore Kilgore */ /* * * This driver uses work done in * libgphoto2/camlibs/digigr8, Copyright (C) Theodore Kilgore. * * This driver has also used as a base the sq905c driver * and may contain code fragments from it. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sq905c" #include <linux/workqueue.h> #include <linux/slab.h> #include "gspca.h" MODULE_AUTHOR("Theodore Kilgore <kilgota@auburn.edu>"); MODULE_DESCRIPTION("GSPCA/SQ905C USB Camera Driver"); MODULE_LICENSE("GPL"); /* Default timeouts, in ms */ #define SQ905C_CMD_TIMEOUT 500 #define SQ905C_DATA_TIMEOUT 1000 /* Maximum transfer size to use. */ #define SQ905C_MAX_TRANSFER 0x8000 #define FRAME_HEADER_LEN 0x50 /* Commands. These go in the "value" slot. */ #define SQ905C_CLEAR 0xa0 /* clear everything */ #define SQ905C_GET_ID 0x14f4 /* Read version number */ #define SQ905C_CAPTURE_LOW 0xa040 /* Starts capture at 160x120 */ #define SQ905C_CAPTURE_MED 0x1440 /* Starts capture at 320x240 */ #define SQ905C_CAPTURE_HI 0x2840 /* Starts capture at 320x240 */ /* For capture, this must go in the "index" slot. */ #define SQ905C_CAPTURE_INDEX 0x110f /* Structure to hold all of our device specific stuff */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ const struct v4l2_pix_format *cap_mode; /* Driver stuff */ struct work_struct work_struct; struct workqueue_struct *work_thread; }; /* * Most of these cameras will do 640x480 and 320x240. 160x120 works * in theory but gives very poor output. Therefore, not supported. * The 0x2770:0x9050 cameras have max resolution of 320x240. */ static struct v4l2_pix_format sq905c_mode[] = { { 320, 240, V4L2_PIX_FMT_SQ905C, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, { 640, 480, V4L2_PIX_FMT_SQ905C, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0} }; /* Send a command to the camera. */ static int sq905c_command(struct gspca_dev *gspca_dev, u16 command, u16 index) { int ret; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, command, index, NULL, 0, SQ905C_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } return 0; } static int sq905c_read(struct gspca_dev *gspca_dev, u16 command, u16 index, int size) { int ret; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, command, index, gspca_dev->usb_buf, size, SQ905C_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } return 0; } /* * This function is called as a workqueue function and runs whenever the camera * is streaming data. Because it is a workqueue function it is allowed to sleep * so we can use synchronous USB calls. To avoid possible collisions with other * threads attempting to use gspca_dev->usb_buf we take the usb_lock when * performing USB operations using it. In practice we don't really need this * as the camera doesn't provide any controls. */ static void sq905c_dostream(struct work_struct *work) { struct sd *dev = container_of(work, struct sd, work_struct); struct gspca_dev *gspca_dev = &dev->gspca_dev; int bytes_left; /* bytes remaining in current frame. */ int data_len; /* size to use for the next read. */ int act_len; int packet_type; int ret; u8 *buffer; buffer = kmalloc(SQ905C_MAX_TRANSFER, GFP_KERNEL); if (!buffer) { pr_err("Couldn't allocate USB buffer\n"); goto quit_stream; } while (gspca_dev->present && gspca_dev->streaming) { #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif /* Request the header, which tells the size to download */ ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), buffer, FRAME_HEADER_LEN, &act_len, SQ905C_DATA_TIMEOUT); gspca_dbg(gspca_dev, D_STREAM, "Got %d bytes out of %d for header\n", act_len, FRAME_HEADER_LEN); if (ret < 0 || act_len < FRAME_HEADER_LEN) goto quit_stream; /* size is read from 4 bytes starting 0x40, little endian */ bytes_left = buffer[0x40]|(buffer[0x41]<<8)|(buffer[0x42]<<16) |(buffer[0x43]<<24); gspca_dbg(gspca_dev, D_STREAM, "bytes_left = 0x%x\n", bytes_left); /* We keep the header. It has other information, too. */ packet_type = FIRST_PACKET; gspca_frame_add(gspca_dev, packet_type, buffer, FRAME_HEADER_LEN); while (bytes_left > 0 && gspca_dev->present) { data_len = bytes_left > SQ905C_MAX_TRANSFER ? SQ905C_MAX_TRANSFER : bytes_left; ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), buffer, data_len, &act_len, SQ905C_DATA_TIMEOUT); if (ret < 0 || act_len < data_len) goto quit_stream; gspca_dbg(gspca_dev, D_STREAM, "Got %d bytes out of %d for frame\n", data_len, bytes_left); bytes_left -= data_len; if (bytes_left == 0) packet_type = LAST_PACKET; else packet_type = INTER_PACKET; gspca_frame_add(gspca_dev, packet_type, buffer, data_len); } } quit_stream: if (gspca_dev->present) { mutex_lock(&gspca_dev->usb_lock); sq905c_command(gspca_dev, SQ905C_CLEAR, 0); mutex_unlock(&gspca_dev->usb_lock); } kfree(buffer); } /* This function is called at probe time just before sd_init */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam = &gspca_dev->cam; struct sd *dev = (struct sd *) gspca_dev; int ret; gspca_dbg(gspca_dev, D_PROBE, "SQ9050 camera detected (vid/pid 0x%04X:0x%04X)\n", id->idVendor, id->idProduct); ret = sq905c_command(gspca_dev, SQ905C_GET_ID, 0); if (ret < 0) { gspca_err(gspca_dev, "Get version command failed\n"); return ret; } ret = sq905c_read(gspca_dev, 0xf5, 0, 20); if (ret < 0) { gspca_err(gspca_dev, "Reading version command failed\n"); return ret; } /* Note we leave out the usb id and the manufacturing date */ gspca_dbg(gspca_dev, D_PROBE, "SQ9050 ID string: %02x - %*ph\n", gspca_dev->usb_buf[3], 6, gspca_dev->usb_buf + 14); cam->cam_mode = sq905c_mode; cam->nmodes = 2; if (gspca_dev->usb_buf[15] == 0) cam->nmodes = 1; /* We don't use the buffer gspca allocates so make it small. */ cam->bulk_size = 32; cam->bulk = 1; INIT_WORK(&dev->work_struct, sq905c_dostream); return 0; } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; /* wait for the work queue to terminate */ mutex_unlock(&gspca_dev->usb_lock); /* This waits for sq905c_dostream to finish */ destroy_workqueue(dev->work_thread); dev->work_thread = NULL; mutex_lock(&gspca_dev->usb_lock); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { /* connect to the camera and reset it. */ return sq905c_command(gspca_dev, SQ905C_CLEAR, 0); } /* Set up for getting frames. */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; int ret; dev->cap_mode = gspca_dev->cam.cam_mode; /* "Open the shutter" and set size, to start capture */ switch (gspca_dev->pixfmt.width) { case 640: gspca_dbg(gspca_dev, D_STREAM, "Start streaming at high resolution\n"); dev->cap_mode++; ret = sq905c_command(gspca_dev, SQ905C_CAPTURE_HI, SQ905C_CAPTURE_INDEX); break; default: /* 320 */ gspca_dbg(gspca_dev, D_STREAM, "Start streaming at medium resolution\n"); ret = sq905c_command(gspca_dev, SQ905C_CAPTURE_MED, SQ905C_CAPTURE_INDEX); } if (ret < 0) { gspca_err(gspca_dev, "Start streaming command failed\n"); return ret; } /* Start the workqueue function to do the streaming */ dev->work_thread = create_singlethread_workqueue(MODULE_NAME); if (!dev->work_thread) return -ENOMEM; queue_work(dev->work_thread, &dev->work_struct); return 0; } /* Table of supported USB devices */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x2770, 0x905c)}, {USB_DEVICE(0x2770, 0x9050)}, {USB_DEVICE(0x2770, 0x9051)}, {USB_DEVICE(0x2770, 0x9052)}, {USB_DEVICE(0x2770, 0x913d)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .start = sd_start, .stop0 = sd_stop0, }; /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); |
2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | // SPDX-License-Identifier: GPL-2.0-only /* * ebt_nflog * * Author: * Peter Warasin <peter@endian.com> * * February, 2008 * * Based on: * xt_NFLOG.c, (C) 2006 by Patrick McHardy <kaber@trash.net> * ebt_ulog.c, (C) 2004 by Bart De Schuymer <bdschuym@pandora.be> * */ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nflog.h> #include <net/netfilter/nf_log.h> static unsigned int ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct net *net = xt_net(par); struct nf_loginfo li; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; li.u.ulog.flags = 0; nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par), xt_out(par), &li, "%s", info->prefix); return EBT_CONTINUE; } static int ebt_nflog_tg_check(const struct xt_tgchk_param *par) { struct ebt_nflog_info *info = par->targinfo; if (info->flags & ~EBT_NFLOG_MASK) return -EINVAL; info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; return 0; } static struct xt_target ebt_nflog_tg_reg __read_mostly = { .name = "nflog", .revision = 0, .family = NFPROTO_BRIDGE, .target = ebt_nflog_tg, .checkentry = ebt_nflog_tg_check, .targetsize = sizeof(struct ebt_nflog_info), .me = THIS_MODULE, }; static int __init ebt_nflog_init(void) { return xt_register_target(&ebt_nflog_tg_reg); } static void __exit ebt_nflog_fini(void) { xt_unregister_target(&ebt_nflog_tg_reg); } module_init(ebt_nflog_init); module_exit(ebt_nflog_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Peter Warasin <peter@endian.com>"); MODULE_DESCRIPTION("ebtables NFLOG netfilter logging module"); |
1 1 1 1 1 1 1 1 3 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 | // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 TDLS handling code * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH * Copyright (C) 2019, 2021-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/log2.h> #include <net/cfg80211.h> #include <linux/rtnetlink.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "wme.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk) { struct ieee80211_sub_if_data *sdata; struct ieee80211_local *local; sdata = container_of(wk, struct ieee80211_sub_if_data, u.mgd.tdls_peer_del_work.work); local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer)) { tdls_dbg(sdata, "TDLS del peer %pM\n", sdata->u.mgd.tdls_peer); sta_info_destroy_addr(sdata, sdata->u.mgd.tdls_peer); eth_zero_addr(sdata->u.mgd.tdls_peer); } } static void ieee80211_tdls_add_ext_capab(struct ieee80211_link_data *link, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool chan_switch = local->hw.wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH; bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && !ifmgd->tdls_wider_bw_prohibited; bool buffer_sta = ieee80211_hw_check(&local->hw, SUPPORTS_TDLS_BUFFER_STA); struct ieee80211_supported_band *sband = ieee80211_get_link_sband(link); bool vht = sband && sband->vht_cap.vht_supported; u8 *pos = skb_put(skb, 10); *pos++ = WLAN_EID_EXT_CAPABILITY; *pos++ = 8; /* len */ *pos++ = 0x0; *pos++ = 0x0; *pos++ = 0x0; *pos++ = (chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0) | (buffer_sta ? WLAN_EXT_CAPA4_TDLS_BUFFER_STA : 0); *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; *pos++ = 0; *pos++ = 0; *pos++ = (vht && wider_band) ? WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED : 0; } static u8 ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u16 start, u16 end, u16 spacing) { u8 subband_cnt = 0, ch_cnt = 0; struct ieee80211_channel *ch; struct cfg80211_chan_def chandef; int i, subband_start; struct wiphy *wiphy = sdata->local->hw.wiphy; for (i = start; i <= end; i += spacing) { if (!ch_cnt) subband_start = i; ch = ieee80211_get_channel(sdata->local->hw.wiphy, i); if (ch) { /* we will be active on the channel */ cfg80211_chandef_create(&chandef, ch, NL80211_CHAN_NO_HT); if (cfg80211_reg_can_beacon_relax(wiphy, &chandef, sdata->wdev.iftype)) { ch_cnt++; /* * check if the next channel is also part of * this allowed range */ continue; } } /* * we've reached the end of a range, with allowed channels * found */ if (ch_cnt) { u8 *pos = skb_put(skb, 2); *pos++ = ieee80211_frequency_to_channel(subband_start); *pos++ = ch_cnt; subband_cnt++; ch_cnt = 0; } } /* all channels in the requested range are allowed - add them here */ if (ch_cnt) { u8 *pos = skb_put(skb, 2); *pos++ = ieee80211_frequency_to_channel(subband_start); *pos++ = ch_cnt; subband_cnt++; } return subband_cnt; } static void ieee80211_tdls_add_supp_channels(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { /* * Add possible channels for TDLS. These are channels that are allowed * to be active. */ u8 subband_cnt; u8 *pos = skb_put(skb, 2); *pos++ = WLAN_EID_SUPPORTED_CHANNELS; /* * 5GHz and 2GHz channels numbers can overlap. Ignore this for now, as * this doesn't happen in real world scenarios. */ /* 2GHz, with 5MHz spacing */ subband_cnt = ieee80211_tdls_add_subband(sdata, skb, 2412, 2472, 5); /* 5GHz, with 20MHz spacing */ subband_cnt += ieee80211_tdls_add_subband(sdata, skb, 5000, 5825, 20); /* length */ *pos = 2 * subband_cnt; } static void ieee80211_tdls_add_oper_classes(struct ieee80211_link_data *link, struct sk_buff *skb) { u8 *pos; u8 op_class; if (!ieee80211_chandef_to_operating_class(&link->conf->chanreq.oper, &op_class)) return; pos = skb_put(skb, 4); *pos++ = WLAN_EID_SUPPORTED_REGULATORY_CLASSES; *pos++ = 2; /* len */ *pos++ = op_class; *pos++ = op_class; /* give current operating class as alternate too */ } static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb) { u8 *pos = skb_put(skb, 3); *pos++ = WLAN_EID_BSS_COEX_2040; *pos++ = 1; /* len */ *pos++ = WLAN_BSS_COEX_INFORMATION_REQUEST; } static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_link_data *link, u16 status_code) { struct ieee80211_supported_band *sband; /* The capability will be 0 when sending a failure code */ if (status_code != 0) return 0; sband = ieee80211_get_link_sband(link); if (sband && sband->band == NL80211_BAND_2GHZ) { return WLAN_CAPABILITY_SHORT_SLOT_TIME | WLAN_CAPABILITY_SHORT_PREAMBLE; } return 0; } static void ieee80211_tdls_add_link_ie(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, bool initiator) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_tdls_lnkie *lnkid; const u8 *init_addr, *rsp_addr; if (initiator) { init_addr = sdata->vif.addr; rsp_addr = peer; } else { init_addr = peer; rsp_addr = sdata->vif.addr; } lnkid = skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); lnkid->ie_type = WLAN_EID_LINK_ID; lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; memcpy(lnkid->bssid, link->u.mgd.bssid, ETH_ALEN); memcpy(lnkid->init_sta, init_addr, ETH_ALEN); memcpy(lnkid->resp_sta, rsp_addr, ETH_ALEN); } static void ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { u8 *pos = skb_put(skb, 4); *pos++ = WLAN_EID_AID; *pos++ = 2; /* len */ put_unaligned_le16(sdata->vif.cfg.aid, pos); } /* translate numbering in the WMM parameter IE to the mac80211 notation */ static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac) { switch (ac) { default: WARN_ON_ONCE(1); fallthrough; case 0: return IEEE80211_AC_BE; case 1: return IEEE80211_AC_BK; case 2: return IEEE80211_AC_VI; case 3: return IEEE80211_AC_VO; } } static u8 ieee80211_wmm_aci_aifsn(int aifsn, bool acm, int aci) { u8 ret; ret = aifsn & 0x0f; if (acm) ret |= 0x10; ret |= (aci << 5) & 0x60; return ret; } static u8 ieee80211_wmm_ecw(u16 cw_min, u16 cw_max) { return ((ilog2(cw_min + 1) << 0x0) & 0x0f) | ((ilog2(cw_max + 1) << 0x4) & 0xf0); } static void ieee80211_tdls_add_wmm_param_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_wmm_param_ie *wmm; struct ieee80211_tx_queue_params *txq; int i; wmm = skb_put_zero(skb, sizeof(*wmm)); wmm->element_id = WLAN_EID_VENDOR_SPECIFIC; wmm->len = sizeof(*wmm) - 2; wmm->oui[0] = 0x00; /* Microsoft OUI 00:50:F2 */ wmm->oui[1] = 0x50; wmm->oui[2] = 0xf2; wmm->oui_type = 2; /* WME */ wmm->oui_subtype = 1; /* WME param */ wmm->version = 1; /* WME ver */ wmm->qos_info = 0; /* U-APSD not in use */ /* * Use the EDCA parameters defined for the BSS, or default if the AP * doesn't support it, as mandated by 802.11-2012 section 10.22.4 */ for (i = 0; i < IEEE80211_NUM_ACS; i++) { txq = &sdata->deflink.tx_conf[ieee80211_ac_from_wmm(i)]; wmm->ac[i].aci_aifsn = ieee80211_wmm_aci_aifsn(txq->aifs, txq->acm, i); wmm->ac[i].cw = ieee80211_wmm_ecw(txq->cw_min, txq->cw_max); wmm->ac[i].txop_limit = cpu_to_le16(txq->txop); } } static void ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { /* IEEE802.11ac-2013 Table E-4 */ static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; struct cfg80211_chan_def uc = sta->tdls_chandef; enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(&sta->deflink); int i; /* only support upgrading non-narrow channels up to 80Mhz */ if (max_width == NL80211_CHAN_WIDTH_5 || max_width == NL80211_CHAN_WIDTH_10) return; if (max_width > NL80211_CHAN_WIDTH_80) max_width = NL80211_CHAN_WIDTH_80; if (uc.width >= max_width) return; /* * Channel usage constrains in the IEEE802.11ac-2013 specification only * allow expanding a 20MHz channel to 80MHz in a single way. In * addition, there are no 40MHz allowed channels that are not part of * the allowed 80MHz range in the 5GHz spectrum (the relevant one here). */ for (i = 0; i < ARRAY_SIZE(centers_80mhz); i++) if (abs(uc.chan->center_freq - centers_80mhz[i]) <= 30) { uc.center_freq1 = centers_80mhz[i]; uc.center_freq2 = 0; uc.width = NL80211_CHAN_WIDTH_80; break; } if (!uc.center_freq1) return; /* proceed to downgrade the chandef until usable or the same as AP BW */ while (uc.width > max_width || (uc.width > sta->tdls_chandef.width && !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc, sdata->wdev.iftype))) ieee80211_chandef_downgrade(&uc, NULL); if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) { tdls_dbg(sdata, "TDLS ch width upgraded %d -> %d\n", sta->tdls_chandef.width, uc.width); /* * the station is not yet authorized when BW upgrade is done, * locking is not required */ sta->tdls_chandef = uc; } } static void ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, u8 action_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_supported_band *sband; struct ieee80211_local *local = sdata->local; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; const struct ieee80211_sta_he_cap *he_cap; const struct ieee80211_sta_eht_cap *eht_cap; struct sta_info *sta = NULL; size_t offset = 0, noffset; u8 *pos; sband = ieee80211_get_link_sband(link); if (WARN_ON_ONCE(!sband)) return; ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_SUPP_RATES); ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_EXT_SUPP_RATES); ieee80211_tdls_add_supp_channels(sdata, skb); /* add any custom IEs that go before Extended Capabilities */ if (extra_ies_len) { static const u8 before_ext_cap[] = { WLAN_EID_SUPP_RATES, WLAN_EID_COUNTRY, WLAN_EID_EXT_SUPP_RATES, WLAN_EID_SUPPORTED_CHANNELS, WLAN_EID_RSN, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_ext_cap, ARRAY_SIZE(before_ext_cap), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } ieee80211_tdls_add_ext_capab(link, skb); /* add the QoS element if we support it */ if (local->hw.queues >= IEEE80211_NUM_ACS && action_code != WLAN_PUB_ACTION_TDLS_DISCOVER_RES) ieee80211_add_wmm_info_ie(skb_put(skb, 9), 0); /* no U-APSD */ /* add any custom IEs that go before HT capabilities */ if (extra_ies_len) { static const u8 before_ht_cap[] = { WLAN_EID_SUPP_RATES, WLAN_EID_COUNTRY, WLAN_EID_EXT_SUPP_RATES, WLAN_EID_SUPPORTED_CHANNELS, WLAN_EID_RSN, WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_CAPA, WLAN_EID_FAST_BSS_TRANSITION, WLAN_EID_TIMEOUT_INTERVAL, WLAN_EID_SUPPORTED_REGULATORY_CLASSES, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_ht_cap, ARRAY_SIZE(before_ht_cap), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } /* we should have the peer STA if we're already responding */ if (action_code == WLAN_TDLS_SETUP_RESPONSE) { sta = sta_info_get(sdata, peer); if (WARN_ON_ONCE(!sta)) return; sta->tdls_chandef = link->conf->chanreq.oper; } ieee80211_tdls_add_oper_classes(link, skb); /* * with TDLS we can switch channels, and HT-caps are not necessarily * the same on all bands. The specification limits the setup to a * single HT-cap, so use the current band for now. */ memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); if ((action_code == WLAN_TDLS_SETUP_REQUEST || action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) && ht_cap.ht_supported) { ieee80211_apply_htcap_overrides(sdata, &ht_cap); /* disable SMPS in TDLS initiator */ ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED << IEEE80211_HT_CAP_SM_PS_SHIFT; pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) { /* the peer caps are already intersected with our own */ memcpy(&ht_cap, &sta->sta.deflink.ht_cap, sizeof(ht_cap)); pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); } if (ht_cap.ht_supported && (ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) ieee80211_tdls_add_bss_coex_ie(skb); ieee80211_tdls_add_link_ie(link, skb, peer, initiator); /* add any custom IEs that go before VHT capabilities */ if (extra_ies_len) { static const u8 before_vht_cap[] = { WLAN_EID_SUPP_RATES, WLAN_EID_COUNTRY, WLAN_EID_EXT_SUPP_RATES, WLAN_EID_SUPPORTED_CHANNELS, WLAN_EID_RSN, WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_CAPA, WLAN_EID_FAST_BSS_TRANSITION, WLAN_EID_TIMEOUT_INTERVAL, WLAN_EID_SUPPORTED_REGULATORY_CLASSES, WLAN_EID_MULTI_BAND, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_vht_cap, ARRAY_SIZE(before_vht_cap), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } /* add AID if VHT, HE or EHT capabilities supported */ memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); if ((vht_cap.vht_supported || he_cap || eht_cap) && (action_code == WLAN_TDLS_SETUP_REQUEST || action_code == WLAN_TDLS_SETUP_RESPONSE)) ieee80211_tdls_add_aid(sdata, skb); /* build the VHT-cap similarly to the HT-cap */ if ((action_code == WLAN_TDLS_SETUP_REQUEST || action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) && vht_cap.vht_supported) { ieee80211_apply_vhtcap_overrides(sdata, &vht_cap); pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && vht_cap.vht_supported && sta->sta.deflink.vht_cap.vht_supported) { /* the peer caps are already intersected with our own */ memcpy(&vht_cap, &sta->sta.deflink.vht_cap, sizeof(vht_cap)); pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap); /* * if both peers support WIDER_BW, we can expand the chandef to * a wider compatible one, up to 80MHz */ if (test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) ieee80211_tdls_chandef_vht_upgrade(sdata, sta); } /* add any custom IEs that go before HE capabilities */ if (extra_ies_len) { static const u8 before_he_cap[] = { WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_REQ_PARAMS, WLAN_EID_AP_CSN, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_he_cap, ARRAY_SIZE(before_he_cap), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } /* build the HE-cap from sband */ if (action_code == WLAN_TDLS_SETUP_REQUEST || action_code == WLAN_TDLS_SETUP_RESPONSE || action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) { ieee80211_put_he_cap(skb, sdata, sband, NULL); /* Build HE 6Ghz capa IE from sband */ if (sband->band == NL80211_BAND_6GHZ) ieee80211_put_he_6ghz_cap(skb, sdata, link->smps_mode); } /* add any custom IEs that go before EHT capabilities */ if (extra_ies_len) { static const u8 before_he_cap[] = { WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_REQ_PARAMS, WLAN_EID_AP_CSN, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_he_cap, ARRAY_SIZE(before_he_cap), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } /* build the EHT-cap from sband */ if (action_code == WLAN_TDLS_SETUP_REQUEST || action_code == WLAN_TDLS_SETUP_RESPONSE || action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) ieee80211_put_eht_cap(skb, sdata, sband, NULL); /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; skb_put_data(skb, extra_ies + offset, noffset - offset); } } static void ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; size_t offset = 0, noffset; struct sta_info *sta, *ap_sta; struct ieee80211_supported_band *sband; u8 *pos; sband = ieee80211_get_link_sband(link); if (WARN_ON_ONCE(!sband)) return; sta = sta_info_get(sdata, peer); ap_sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); if (WARN_ON_ONCE(!sta || !ap_sta)) return; sta->tdls_chandef = link->conf->chanreq.oper; /* add any custom IEs that go before the QoS IE */ if (extra_ies_len) { static const u8 before_qos[] = { WLAN_EID_RSN, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_qos, ARRAY_SIZE(before_qos), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } /* add the QoS param IE if both the peer and we support it */ if (local->hw.queues >= IEEE80211_NUM_ACS && sta->sta.wme) ieee80211_tdls_add_wmm_param_ie(sdata, skb); /* add any custom IEs that go before HT operation */ if (extra_ies_len) { static const u8 before_ht_op[] = { WLAN_EID_RSN, WLAN_EID_QOS_CAPA, WLAN_EID_FAST_BSS_TRANSITION, WLAN_EID_TIMEOUT_INTERVAL, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_ht_op, ARRAY_SIZE(before_ht_op), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } /* * if HT support is only added in TDLS, we need an HT-operation IE. * add the IE as required by IEEE802.11-2012 9.23.3.2. */ if (!ap_sta->sta.deflink.ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) { u16 prot = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap, &link->conf->chanreq.oper, prot, true); } ieee80211_tdls_add_link_ie(link, skb, peer, initiator); /* only include VHT-operation if not on the 2.4GHz band */ if (sband->band != NL80211_BAND_2GHZ && sta->sta.deflink.vht_cap.vht_supported) { /* * if both peers support WIDER_BW, we can expand the chandef to * a wider compatible one, up to 80MHz */ if (test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) ieee80211_tdls_chandef_vht_upgrade(sdata, sta); pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation)); ieee80211_ie_build_vht_oper(pos, &sta->sta.deflink.vht_cap, &sta->tdls_chandef); } /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; skb_put_data(skb, extra_ies + offset, noffset - offset); } } static void ieee80211_tdls_add_chan_switch_req_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, bool initiator, const u8 *extra_ies, size_t extra_ies_len, u8 oper_class, struct cfg80211_chan_def *chandef) { struct ieee80211_tdls_data *tf; size_t offset = 0, noffset; if (WARN_ON_ONCE(!chandef)) return; tf = (void *)skb->data; tf->u.chan_switch_req.target_channel = ieee80211_frequency_to_channel(chandef->chan->center_freq); tf->u.chan_switch_req.oper_class = oper_class; if (extra_ies_len) { static const u8 before_lnkie[] = { WLAN_EID_SECONDARY_CHANNEL_OFFSET, }; noffset = ieee80211_ie_split(extra_ies, extra_ies_len, before_lnkie, ARRAY_SIZE(before_lnkie), offset); skb_put_data(skb, extra_ies + offset, noffset - offset); offset = noffset; } ieee80211_tdls_add_link_ie(link, skb, peer, initiator); /* add any remaining IEs */ if (extra_ies_len) { noffset = extra_ies_len; skb_put_data(skb, extra_ies + offset, noffset - offset); } } static void ieee80211_tdls_add_chan_switch_resp_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, u16 status_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { if (status_code == 0) ieee80211_tdls_add_link_ie(link, skb, peer, initiator); if (extra_ies_len) skb_put_data(skb, extra_ies, extra_ies_len); } static void ieee80211_tdls_add_ies(struct ieee80211_link_data *link, struct sk_buff *skb, const u8 *peer, u8 action_code, u16 status_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len, u8 oper_class, struct cfg80211_chan_def *chandef) { switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: if (status_code == 0) ieee80211_tdls_add_setup_start_ies(link, skb, peer, action_code, initiator, extra_ies, extra_ies_len); break; case WLAN_TDLS_SETUP_CONFIRM: if (status_code == 0) ieee80211_tdls_add_setup_cfm_ies(link, skb, peer, initiator, extra_ies, extra_ies_len); break; case WLAN_TDLS_TEARDOWN: case WLAN_TDLS_DISCOVERY_REQUEST: if (extra_ies_len) skb_put_data(skb, extra_ies, extra_ies_len); if (status_code == 0 || action_code == WLAN_TDLS_TEARDOWN) ieee80211_tdls_add_link_ie(link, skb, peer, initiator); break; case WLAN_TDLS_CHANNEL_SWITCH_REQUEST: ieee80211_tdls_add_chan_switch_req_ies(link, skb, peer, initiator, extra_ies, extra_ies_len, oper_class, chandef); break; case WLAN_TDLS_CHANNEL_SWITCH_RESPONSE: ieee80211_tdls_add_chan_switch_resp_ies(link, skb, peer, status_code, initiator, extra_ies, extra_ies_len); break; } } static int ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_link_data *link, const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_tdls_data *tf; tf = skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); memcpy(tf->da, peer, ETH_ALEN); memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); tf->ether_type = cpu_to_be16(ETH_P_TDLS); tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; /* network header is after the ethernet header */ skb_set_network_header(skb, ETH_HLEN); switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: tf->category = WLAN_CATEGORY_TDLS; tf->action_code = WLAN_TDLS_SETUP_REQUEST; skb_put(skb, sizeof(tf->u.setup_req)); tf->u.setup_req.dialog_token = dialog_token; tf->u.setup_req.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(link, status_code)); break; case WLAN_TDLS_SETUP_RESPONSE: tf->category = WLAN_CATEGORY_TDLS; tf->action_code = WLAN_TDLS_SETUP_RESPONSE; skb_put(skb, sizeof(tf->u.setup_resp)); tf->u.setup_resp.status_code = cpu_to_le16(status_code); tf->u.setup_resp.dialog_token = dialog_token; tf->u.setup_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(link, status_code)); break; case WLAN_TDLS_SETUP_CONFIRM: tf->category = WLAN_CATEGORY_TDLS; tf->action_code = WLAN_TDLS_SETUP_CONFIRM; skb_put(skb, sizeof(tf->u.setup_cfm)); tf->u.setup_cfm.status_code = cpu_to_le16(status_code); tf->u.setup_cfm.dialog_token = dialog_token; break; case WLAN_TDLS_TEARDOWN: tf->category = WLAN_CATEGORY_TDLS; tf->action_code = WLAN_TDLS_TEARDOWN; skb_put(skb, sizeof(tf->u.teardown)); tf->u.teardown.reason_code = cpu_to_le16(status_code); break; case WLAN_TDLS_DISCOVERY_REQUEST: tf->category = WLAN_CATEGORY_TDLS; tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; skb_put(skb, sizeof(tf->u.discover_req)); tf->u.discover_req.dialog_token = dialog_token; break; case WLAN_TDLS_CHANNEL_SWITCH_REQUEST: tf->category = WLAN_CATEGORY_TDLS; tf->action_code = WLAN_TDLS_CHANNEL_SWITCH_REQUEST; skb_put(skb, sizeof(tf->u.chan_switch_req)); break; case WLAN_TDLS_CHANNEL_SWITCH_RESPONSE: tf->category = WLAN_CATEGORY_TDLS; tf->action_code = WLAN_TDLS_CHANNEL_SWITCH_RESPONSE; skb_put(skb, sizeof(tf->u.chan_switch_resp)); tf->u.chan_switch_resp.status_code = cpu_to_le16(status_code); break; default: return -EINVAL; } return 0; } static int ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, struct ieee80211_link_data *link, u8 action_code, u8 dialog_token, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_mgmt *mgmt; mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, peer, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, link->u.mgd.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); switch (action_code) { case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; mgmt->u.action.u.tdls_discover_resp.action_code = WLAN_PUB_ACTION_TDLS_DISCOVER_RES; mgmt->u.action.u.tdls_discover_resp.dialog_token = dialog_token; mgmt->u.action.u.tdls_discover_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(link, status_code)); break; default: return -EINVAL; } return 0; } static struct sk_buff * ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, const u8 *peer, int link_id, u8 action_code, u8 dialog_token, u16 status_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len, u8 oper_class, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int ret; struct ieee80211_link_data *link; link_id = link_id >= 0 ? link_id : 0; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) goto unlock; skb = netdev_alloc_skb(sdata->dev, local->hw.extra_tx_headroom + max(sizeof(struct ieee80211_mgmt), sizeof(struct ieee80211_tdls_data)) + 50 + /* supported rates */ 10 + /* ext capab */ 26 + /* max(WMM-info, WMM-param) */ 2 + max(sizeof(struct ieee80211_ht_cap), sizeof(struct ieee80211_ht_operation)) + 2 + max(sizeof(struct ieee80211_vht_cap), sizeof(struct ieee80211_vht_operation)) + 2 + 1 + sizeof(struct ieee80211_he_cap_elem) + sizeof(struct ieee80211_he_mcs_nss_supp) + IEEE80211_HE_PPE_THRES_MAX_LEN + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + 2 + 1 + sizeof(struct ieee80211_eht_cap_elem) + sizeof(struct ieee80211_eht_mcs_nss_supp) + IEEE80211_EHT_PPE_THRES_MAX_LEN + 50 + /* supported channels */ 3 + /* 40/20 BSS coex */ 4 + /* AID */ 4 + /* oper classes */ extra_ies_len + sizeof(struct ieee80211_tdls_lnkie)); if (!skb) goto unlock; skb_reserve(skb, local->hw.extra_tx_headroom); switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: case WLAN_TDLS_SETUP_CONFIRM: case WLAN_TDLS_TEARDOWN: case WLAN_TDLS_DISCOVERY_REQUEST: case WLAN_TDLS_CHANNEL_SWITCH_REQUEST: case WLAN_TDLS_CHANNEL_SWITCH_RESPONSE: ret = ieee80211_prep_tdls_encap_data(local->hw.wiphy, sdata->dev, link, peer, action_code, dialog_token, status_code, skb); break; case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: ret = ieee80211_prep_tdls_direct(local->hw.wiphy, sdata->dev, peer, link, action_code, dialog_token, status_code, skb); break; default: ret = -EOPNOTSUPP; break; } if (ret < 0) goto fail; ieee80211_tdls_add_ies(link, skb, peer, action_code, status_code, initiator, extra_ies, extra_ies_len, oper_class, chandef); rcu_read_unlock(); return skb; fail: dev_kfree_skb(skb); unlock: rcu_read_unlock(); return NULL; } static int ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, int link_id, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *extra_ies, size_t extra_ies_len, u8 oper_class, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sk_buff *skb = NULL; struct sta_info *sta; u32 flags = 0; int ret = 0; rcu_read_lock(); sta = sta_info_get(sdata, peer); /* infer the initiator if we can, to support old userspace */ switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: if (sta) { set_sta_flag(sta, WLAN_STA_TDLS_INITIATOR); sta->sta.tdls_initiator = false; } fallthrough; case WLAN_TDLS_SETUP_CONFIRM: case WLAN_TDLS_DISCOVERY_REQUEST: initiator = true; break; case WLAN_TDLS_SETUP_RESPONSE: /* * In some testing scenarios, we send a request and response. * Make the last packet sent take effect for the initiator * value. */ if (sta) { clear_sta_flag(sta, WLAN_STA_TDLS_INITIATOR); sta->sta.tdls_initiator = true; } fallthrough; case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: initiator = false; break; case WLAN_TDLS_TEARDOWN: case WLAN_TDLS_CHANNEL_SWITCH_REQUEST: case WLAN_TDLS_CHANNEL_SWITCH_RESPONSE: /* any value is ok */ break; default: ret = -EOPNOTSUPP; break; } if (sta && test_sta_flag(sta, WLAN_STA_TDLS_INITIATOR)) initiator = true; rcu_read_unlock(); if (ret < 0) goto fail; skb = ieee80211_tdls_build_mgmt_packet_data(sdata, peer, link_id, action_code, dialog_token, status_code, initiator, extra_ies, extra_ies_len, oper_class, chandef); if (!skb) { ret = -EINVAL; goto fail; } if (action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) { ieee80211_tx_skb_tid(sdata, skb, 7, link_id); return 0; } /* * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise * we should default to AC_VI. */ switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: skb->priority = 256 + 2; break; default: skb->priority = 256 + 5; break; } /* * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress. * Later, if no ACK is returned from peer, we will re-send the teardown * packet through the AP. */ if ((action_code == WLAN_TDLS_TEARDOWN) && ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { bool try_resend; /* Should we keep skb for possible resend */ /* If not sending directly to peer - no point in keeping skb */ rcu_read_lock(); sta = sta_info_get(sdata, peer); try_resend = sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); rcu_read_unlock(); spin_lock_bh(&sdata->u.mgd.teardown_lock); if (try_resend && !sdata->u.mgd.teardown_skb) { /* Mark it as requiring TX status callback */ flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_MLME_CONN_TX; /* * skb is copied since mac80211 will later set * properties that might not be the same as the AP, * such as encryption, QoS, addresses, etc. * * No problem if skb_copy() fails, so no need to check. */ sdata->u.mgd.teardown_skb = skb_copy(skb, GFP_ATOMIC); sdata->u.mgd.orig_teardown_skb = skb; } spin_unlock_bh(&sdata->u.mgd.teardown_lock); } /* disable bottom halves when entering the Tx path */ local_bh_disable(); __ieee80211_subif_start_xmit(skb, dev, flags, IEEE80211_TX_CTRL_MLO_LINK_UNSPEC, NULL); local_bh_enable(); return ret; fail: dev_kfree_skb(skb); return ret; } static int ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, int link_id, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; enum ieee80211_smps_mode smps_mode = sdata->deflink.u.mgd.driver_smps_mode; int ret; /* don't support setup with forced SMPS mode that's not off */ if (smps_mode != IEEE80211_SMPS_AUTOMATIC && smps_mode != IEEE80211_SMPS_OFF) { tdls_dbg(sdata, "Aborting TDLS setup due to SMPS mode %d\n", smps_mode); return -EOPNOTSUPP; } lockdep_assert_wiphy(local->hw.wiphy); /* we don't support concurrent TDLS peer setups */ if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer) && !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { ret = -EBUSY; goto out_unlock; } /* * make sure we have a STA representing the peer so we drop or buffer * non-TDLS-setup frames to the peer. We can't send other packets * during setup through the AP path. * Allow error packets to be sent - sometimes we don't even add a STA * before failing the setup. */ if (status_code == 0) { rcu_read_lock(); if (!sta_info_get(sdata, peer)) { rcu_read_unlock(); ret = -ENOLINK; goto out_unlock; } rcu_read_unlock(); } ieee80211_flush_queues(local, sdata, false); memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN); /* we cannot take the mutex while preparing the setup packet */ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len, 0, NULL); if (ret < 0) { eth_zero_addr(sdata->u.mgd.tdls_peer); return ret; } wiphy_delayed_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.tdls_peer_del_work, TDLS_PEER_SETUP_TIMEOUT); return 0; out_unlock: return ret; } static int ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, int link_id, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret; /* * No packets can be transmitted to the peer via the AP during setup - * the STA is set as a TDLS peer, but is not authorized. * During teardown, we prevent direct transmissions by stopping the * queues and flushing all direct packets. */ ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN); ieee80211_flush_queues(local, sdata, false); ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len, 0, NULL); if (ret < 0) sdata_err(sdata, "Failed sending TDLS teardown packet %d\n", ret); /* * Remove the STA AUTH flag to force further traffic through the AP. If * the STA was unreachable, it was already removed. */ rcu_read_lock(); sta = sta_info_get(sdata, peer); if (sta) clear_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); rcu_read_unlock(); ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN); return 0; } int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, int link_id, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int ret; if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) return -EOPNOTSUPP; /* make sure we are in managed mode, and associated */ if (sdata->vif.type != NL80211_IFTYPE_STATION || !sdata->u.mgd.associated) return -EINVAL; switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: ret = ieee80211_tdls_mgmt_setup(wiphy, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len); break; case WLAN_TDLS_TEARDOWN: ret = ieee80211_tdls_mgmt_teardown(wiphy, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len); break; case WLAN_TDLS_DISCOVERY_REQUEST: /* * Protect the discovery so we can hear the TDLS discovery * response frame. It is transmitted directly and not buffered * by the AP. */ drv_mgd_protect_tdls_discover(sdata->local, sdata, link_id); fallthrough; case WLAN_TDLS_SETUP_CONFIRM: case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: /* no special handling */ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, extra_ies, extra_ies_len, 0, NULL); break; default: ret = -EOPNOTSUPP; break; } tdls_dbg(sdata, "TDLS mgmt action %d peer %pM link_id %d status %d\n", action_code, peer, link_id, ret); return ret; } static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; enum nl80211_chan_width width; struct ieee80211_supported_band *sband; lockdep_assert_wiphy(local->hw.wiphy); conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { width = conf->def.width; sband = local->hw.wiphy->bands[conf->def.chan->band]; ctx = container_of(conf, struct ieee80211_chanctx, conf); ieee80211_recalc_chanctx_chantype(local, ctx); /* if width changed and a peer is given, update its BW */ if (width != conf->def.width && sta && test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) { enum ieee80211_sta_rx_bandwidth bw; bw = ieee80211_chan_width_to_rx_bw(conf->def.width); bw = min(bw, ieee80211_sta_cap_rx_bw(&sta->deflink)); if (bw != sta->sta.deflink.bandwidth) { sta->sta.deflink.bandwidth = bw; rate_control_rate_update(local, sband, sta, 0, IEEE80211_RC_BW_CHANGED); /* * if a TDLS peer BW was updated, we need to * recalc the chandef width again, to get the * correct chanctx min_def */ ieee80211_recalc_chanctx_chantype(local, ctx); } } } } static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata) { struct sta_info *sta; bool result = false; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH) || !sta->sta.deflink.ht_cap.ht_supported) continue; result = true; break; } rcu_read_unlock(); return result; } static void iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { bool tdls_ht; u16 protection = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; u16 opmode; /* Nothing to do if the BSS connection uses (at least) HT */ if (sdata->deflink.u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) return; tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) || iee80211_tdls_have_ht_peers(sdata); opmode = sdata->vif.bss_conf.ht_operation_mode; if (tdls_ht) opmode |= protection; else opmode &= ~protection; if (opmode == sdata->vif.bss_conf.ht_operation_mode) return; sdata->vif.bss_conf.ht_operation_mode = opmode; ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_HT); } int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper) { struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; int ret; lockdep_assert_wiphy(local->hw.wiphy); if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) return -EOPNOTSUPP; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; switch (oper) { case NL80211_TDLS_ENABLE_LINK: case NL80211_TDLS_DISABLE_LINK: break; case NL80211_TDLS_TEARDOWN: case NL80211_TDLS_SETUP: case NL80211_TDLS_DISCOVERY_REQ: /* We don't support in-driver setup/teardown/discovery */ return -EOPNOTSUPP; } /* protect possible bss_conf changes and avoid concurrency in * ieee80211_bss_info_change_notify() */ tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); switch (oper) { case NL80211_TDLS_ENABLE_LINK: if (sdata->vif.bss_conf.csa_active) { tdls_dbg(sdata, "TDLS: disallow link during CSA\n"); return -EBUSY; } sta = sta_info_get(sdata, peer); if (!sta) return -ENOLINK; iee80211_tdls_recalc_chanctx(sdata, sta); iee80211_tdls_recalc_ht_protection(sdata, sta); set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) || !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)); break; case NL80211_TDLS_DISABLE_LINK: /* * The teardown message in ieee80211_tdls_mgmt_teardown() was * created while the queues were stopped, so it might still be * pending. Before flushing the queues we need to be sure the * message is handled by the tasklet handling pending messages, * otherwise we might start destroying the station before * sending the teardown packet. * Note that this only forces the tasklet to flush pendings - * not to stop the tasklet from rescheduling itself. */ tasklet_kill(&local->tx_pending_tasklet); /* flush a potentially queued teardown packet */ ieee80211_flush_queues(local, sdata, false); ret = sta_info_destroy_addr(sdata, peer); iee80211_tdls_recalc_ht_protection(sdata, NULL); iee80211_tdls_recalc_chanctx(sdata, NULL); if (ret) return ret; break; default: return -EOPNOTSUPP; } if (ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.tdls_peer_del_work); eth_zero_addr(sdata->u.mgd.tdls_peer); } wiphy_work_queue(sdata->local->hw.wiphy, &sdata->deflink.u.mgd.request_smps_work); return 0; } void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) { sdata_err(sdata, "Discarding TDLS oper %d - not STA or disconnected\n", oper); return; } cfg80211_tdls_oper_request(sdata->dev, peer, oper, reason_code, gfp); } EXPORT_SYMBOL(ieee80211_tdls_oper_request); static void iee80211_tdls_add_ch_switch_timing(u8 *buf, u16 switch_time, u16 switch_timeout) { struct ieee80211_ch_switch_timing *ch_sw; *buf++ = WLAN_EID_CHAN_SWITCH_TIMING; *buf++ = sizeof(struct ieee80211_ch_switch_timing); ch_sw = (void *)buf; ch_sw->switch_time = cpu_to_le16(switch_time); ch_sw->switch_timeout = cpu_to_le16(switch_timeout); } /* find switch timing IE in SKB ready for Tx */ static const u8 *ieee80211_tdls_find_sw_timing_ie(struct sk_buff *skb) { struct ieee80211_tdls_data *tf; const u8 *ie_start; /* * Get the offset for the new location of the switch timing IE. * The SKB network header will now point to the "payload_type" * element of the TDLS data frame struct. */ tf = container_of(skb->data + skb_network_offset(skb), struct ieee80211_tdls_data, payload_type); ie_start = tf->u.chan_switch_req.variable; return cfg80211_find_ie(WLAN_EID_CHAN_SWITCH_TIMING, ie_start, skb->len - (ie_start - skb->data)); } static struct sk_buff * ieee80211_tdls_ch_sw_tmpl_get(struct sta_info *sta, u8 oper_class, struct cfg80211_chan_def *chandef, u32 *ch_sw_tm_ie_offset) { struct ieee80211_sub_if_data *sdata = sta->sdata; u8 extra_ies[2 + sizeof(struct ieee80211_sec_chan_offs_ie) + 2 + sizeof(struct ieee80211_ch_switch_timing)]; int extra_ies_len = 2 + sizeof(struct ieee80211_ch_switch_timing); u8 *pos = extra_ies; struct sk_buff *skb; int link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0; /* * if chandef points to a wide channel add a Secondary-Channel * Offset information element */ if (chandef->width == NL80211_CHAN_WIDTH_40) { struct ieee80211_sec_chan_offs_ie *sec_chan_ie; bool ht40plus; *pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; *pos++ = sizeof(*sec_chan_ie); sec_chan_ie = (void *)pos; ht40plus = cfg80211_get_chandef_type(chandef) == NL80211_CHAN_HT40PLUS; sec_chan_ie->sec_chan_offs = ht40plus ? IEEE80211_HT_PARAM_CHA_SEC_ABOVE : IEEE80211_HT_PARAM_CHA_SEC_BELOW; pos += sizeof(*sec_chan_ie); extra_ies_len += 2 + sizeof(struct ieee80211_sec_chan_offs_ie); } /* just set the values to 0, this is a template */ iee80211_tdls_add_ch_switch_timing(pos, 0, 0); skb = ieee80211_tdls_build_mgmt_packet_data(sdata, sta->sta.addr, link_id, WLAN_TDLS_CHANNEL_SWITCH_REQUEST, 0, 0, !sta->sta.tdls_initiator, extra_ies, extra_ies_len, oper_class, chandef); if (!skb) return NULL; skb = ieee80211_build_data_template(sdata, skb, 0); if (IS_ERR(skb)) { tdls_dbg(sdata, "Failed building TDLS channel switch frame\n"); return NULL; } if (ch_sw_tm_ie_offset) { const u8 *tm_ie = ieee80211_tdls_find_sw_timing_ie(skb); if (!tm_ie) { tdls_dbg(sdata, "No switch timing IE in TDLS switch\n"); dev_kfree_skb_any(skb); return NULL; } *ch_sw_tm_ie_offset = tm_ie - skb->data; } tdls_dbg(sdata, "TDLS channel switch request template for %pM ch %d width %d\n", sta->sta.addr, chandef->chan->center_freq, chandef->width); return skb; } int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, const u8 *addr, u8 oper_class, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct sk_buff *skb = NULL; u32 ch_sw_tm_ie; int ret; lockdep_assert_wiphy(local->hw.wiphy); if (chandef->chan->freq_offset) /* this may work, but is untested */ return -EOPNOTSUPP; sta = sta_info_get(sdata, addr); if (!sta) { tdls_dbg(sdata, "Invalid TDLS peer %pM for channel switch request\n", addr); ret = -ENOENT; goto out; } if (!test_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH)) { tdls_dbg(sdata, "TDLS channel switch unsupported by %pM\n", addr); ret = -EOPNOTSUPP; goto out; } skb = ieee80211_tdls_ch_sw_tmpl_get(sta, oper_class, chandef, &ch_sw_tm_ie); if (!skb) { ret = -ENOENT; goto out; } ret = drv_tdls_channel_switch(local, sdata, &sta->sta, oper_class, chandef, skb, ch_sw_tm_ie); if (!ret) set_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); out: dev_kfree_skb_any(skb); return ret; } void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy, struct net_device *dev, const u8 *addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, addr); if (!sta) { tdls_dbg(sdata, "Invalid TDLS peer %pM for channel switch cancel\n", addr); return; } if (!test_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL)) { tdls_dbg(sdata, "TDLS channel switch not initiated by %pM\n", addr); return; } drv_tdls_cancel_channel_switch(local, sdata, &sta->sta); clear_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); } static struct sk_buff * ieee80211_tdls_ch_sw_resp_tmpl_get(struct sta_info *sta, u32 *ch_sw_tm_ie_offset) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct sk_buff *skb; u8 extra_ies[2 + sizeof(struct ieee80211_ch_switch_timing)]; int link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0; /* initial timing are always zero in the template */ iee80211_tdls_add_ch_switch_timing(extra_ies, 0, 0); skb = ieee80211_tdls_build_mgmt_packet_data(sdata, sta->sta.addr, link_id, WLAN_TDLS_CHANNEL_SWITCH_RESPONSE, 0, 0, !sta->sta.tdls_initiator, extra_ies, sizeof(extra_ies), 0, NULL); if (!skb) return NULL; skb = ieee80211_build_data_template(sdata, skb, 0); if (IS_ERR(skb)) { tdls_dbg(sdata, "Failed building TDLS channel switch resp frame\n"); return NULL; } if (ch_sw_tm_ie_offset) { const u8 *tm_ie = ieee80211_tdls_find_sw_timing_ie(skb); if (!tm_ie) { tdls_dbg(sdata, "No switch timing IE in TDLS switch resp\n"); dev_kfree_skb_any(skb); return NULL; } *ch_sw_tm_ie_offset = tm_ie - skb->data; } tdls_dbg(sdata, "TDLS get channel switch response template for %pM\n", sta->sta.addr); return skb; } static int ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee802_11_elems *elems = NULL; struct sta_info *sta; struct ieee80211_tdls_data *tf = (void *)skb->data; bool local_initiator; struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); int baselen = offsetof(typeof(*tf), u.chan_switch_resp.variable); struct ieee80211_tdls_ch_sw_params params = {}; int ret; lockdep_assert_wiphy(local->hw.wiphy); params.action_code = WLAN_TDLS_CHANNEL_SWITCH_RESPONSE; params.timestamp = rx_status->device_timestamp; if (skb->len < baselen) { tdls_dbg(sdata, "TDLS channel switch resp too short: %d\n", skb->len); return -EINVAL; } sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", tf->sa); ret = -EINVAL; goto out; } params.sta = &sta->sta; params.status = le16_to_cpu(tf->u.chan_switch_resp.status_code); if (params.status != 0) { ret = 0; goto call_drv; } elems = ieee802_11_parse_elems(tf->u.chan_switch_resp.variable, skb->len - baselen, false, NULL); if (!elems) { ret = -ENOMEM; goto out; } if (elems->parse_error) { tdls_dbg(sdata, "Invalid IEs in TDLS channel switch resp\n"); ret = -EINVAL; goto out; } if (!elems->ch_sw_timing || !elems->lnk_id) { tdls_dbg(sdata, "TDLS channel switch resp - missing IEs\n"); ret = -EINVAL; goto out; } /* validate the initiator is set correctly */ local_initiator = !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); if (local_initiator == sta->sta.tdls_initiator) { tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n"); ret = -EINVAL; goto out; } params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time); params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout); params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, ¶ms.ch_sw_tm_ie); if (!params.tmpl_skb) { ret = -ENOENT; goto out; } ret = 0; call_drv: drv_tdls_recv_channel_switch(sdata->local, sdata, ¶ms); tdls_dbg(sdata, "TDLS channel switch response received from %pM status %d\n", tf->sa, params.status); out: dev_kfree_skb_any(params.tmpl_skb); kfree(elems); return ret; } static int ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee802_11_elems *elems; struct cfg80211_chan_def chandef; struct ieee80211_channel *chan; enum nl80211_channel_type chan_type; int freq; u8 target_channel, oper_class; bool local_initiator; struct sta_info *sta; enum nl80211_band band; struct ieee80211_tdls_data *tf = (void *)skb->data; struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); int baselen = offsetof(typeof(*tf), u.chan_switch_req.variable); struct ieee80211_tdls_ch_sw_params params = {}; int ret = 0; lockdep_assert_wiphy(local->hw.wiphy); params.action_code = WLAN_TDLS_CHANNEL_SWITCH_REQUEST; params.timestamp = rx_status->device_timestamp; if (skb->len < baselen) { tdls_dbg(sdata, "TDLS channel switch req too short: %d\n", skb->len); return -EINVAL; } target_channel = tf->u.chan_switch_req.target_channel; oper_class = tf->u.chan_switch_req.oper_class; /* * We can't easily infer the channel band. The operating class is * ambiguous - there are multiple tables (US/Europe/JP/Global). The * solution here is to treat channels with number >14 as 5GHz ones, * and specifically check for the (oper_class, channel) combinations * where this doesn't hold. These are thankfully unique according to * IEEE802.11-2012. * We consider only the 2GHz and 5GHz bands and 20MHz+ channels as * valid here. */ if ((oper_class == 112 || oper_class == 2 || oper_class == 3 || oper_class == 4 || oper_class == 5 || oper_class == 6) && target_channel < 14) band = NL80211_BAND_5GHZ; else band = target_channel < 14 ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; freq = ieee80211_channel_to_frequency(target_channel, band); if (freq == 0) { tdls_dbg(sdata, "Invalid channel in TDLS chan switch: %d\n", target_channel); return -EINVAL; } chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq); if (!chan) { tdls_dbg(sdata, "Unsupported channel for TDLS chan switch: %d\n", target_channel); return -EINVAL; } elems = ieee802_11_parse_elems(tf->u.chan_switch_req.variable, skb->len - baselen, false, NULL); if (!elems) return -ENOMEM; if (elems->parse_error) { tdls_dbg(sdata, "Invalid IEs in TDLS channel switch req\n"); ret = -EINVAL; goto free; } if (!elems->ch_sw_timing || !elems->lnk_id) { tdls_dbg(sdata, "TDLS channel switch req - missing IEs\n"); ret = -EINVAL; goto free; } if (!elems->sec_chan_offs) { chan_type = NL80211_CHAN_HT20; } else { switch (elems->sec_chan_offs->sec_chan_offs) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: chan_type = NL80211_CHAN_HT40PLUS; break; case IEEE80211_HT_PARAM_CHA_SEC_BELOW: chan_type = NL80211_CHAN_HT40MINUS; break; default: chan_type = NL80211_CHAN_HT20; break; } } cfg80211_chandef_create(&chandef, chan, chan_type); /* we will be active on the TDLS link */ if (!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &chandef, sdata->wdev.iftype)) { tdls_dbg(sdata, "TDLS chan switch to forbidden channel\n"); ret = -EINVAL; goto free; } sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", tf->sa); ret = -EINVAL; goto out; } params.sta = &sta->sta; /* validate the initiator is set correctly */ local_initiator = !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); if (local_initiator == sta->sta.tdls_initiator) { tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n"); ret = -EINVAL; goto out; } /* peer should have known better */ if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs && elems->sec_chan_offs->sec_chan_offs) { tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n"); ret = -EOPNOTSUPP; goto out; } params.chandef = &chandef; params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time); params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout); params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, ¶ms.ch_sw_tm_ie); if (!params.tmpl_skb) { ret = -ENOENT; goto out; } drv_tdls_recv_channel_switch(sdata->local, sdata, ¶ms); tdls_dbg(sdata, "TDLS ch switch request received from %pM ch %d width %d\n", tf->sa, params.chandef->chan->center_freq, params.chandef->width); out: dev_kfree_skb_any(params.tmpl_skb); free: kfree(elems); return ret; } void ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_tdls_data *tf = (void *)skb->data; struct wiphy *wiphy = sdata->local->hw.wiphy; lockdep_assert_wiphy(wiphy); /* make sure the driver supports it */ if (!(wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) return; /* we want to access the entire packet */ if (skb_linearize(skb)) return; /* * The packet/size was already validated by mac80211 Rx path, only look * at the action type. */ switch (tf->action_code) { case WLAN_TDLS_CHANNEL_SWITCH_REQUEST: ieee80211_process_tdls_channel_switch_req(sdata, skb); break; case WLAN_TDLS_CHANNEL_SWITCH_RESPONSE: ieee80211_process_tdls_channel_switch_resp(sdata, skb); break; default: WARN_ON_ONCE(1); return; } } void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; struct sta_info *sta; u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) continue; if (sta->deflink.link_id != link->link_id) continue; ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr, NL80211_TDLS_TEARDOWN, reason, GFP_ATOMIC); } rcu_read_unlock(); } void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata, const u8 *peer, u16 reason) { struct ieee80211_sta *sta; rcu_read_lock(); sta = ieee80211_find_sta(&sdata->vif, peer); if (!sta || !sta->tdls) { rcu_read_unlock(); return; } rcu_read_unlock(); tdls_dbg(sdata, "disconnected from TDLS peer %pM (Reason: %u=%s)\n", peer, reason, ieee80211_get_reason_code_string(reason)); ieee80211_tdls_oper_request(&sdata->vif, peer, NL80211_TDLS_TEARDOWN, WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE, GFP_ATOMIC); } |
1189 1192 29 31 1192 1195 1192 31 2388 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | // SPDX-License-Identifier: GPL-2.0 /* * USB-ACPI glue code * * Copyright 2012 Red Hat <mjg@redhat.com> */ #include <linux/module.h> #include <linux/usb.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/pci.h> #include <linux/usb/hcd.h> #include "hub.h" /** * usb_acpi_power_manageable - check whether usb port has * acpi power resource. * @hdev: USB device belonging to the usb hub * @index: port index based zero * * Return true if the port has acpi power resource and false if no. */ bool usb_acpi_power_manageable(struct usb_device *hdev, int index) { acpi_handle port_handle; int port1 = index + 1; port_handle = usb_get_hub_port_acpi_handle(hdev, port1); if (port_handle) return acpi_bus_power_manageable(port_handle); else return false; } EXPORT_SYMBOL_GPL(usb_acpi_power_manageable); #define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899" #define USB_DSM_DISABLE_U1_U2_FOR_PORT 5 /** * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port. * @hdev: USB device belonging to the usb hub * @index: zero based port index * * Some USB3 ports may not support USB3 link power management U1/U2 states * due to different retimer setup. ACPI provides _DSM method which returns 0x01 * if U1 and U2 states should be disabled. Evaluate _DSM with: * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899 * Arg1: Revision ID = 0 * Arg2: Function Index = 5 * Arg3: (empty) * * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0 */ int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index) { union acpi_object *obj; acpi_handle port_handle; int port1 = index + 1; guid_t guid; int ret; ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid); if (ret) return ret; port_handle = usb_get_hub_port_acpi_handle(hdev, port1); if (!port_handle) { dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1); return -ENODEV; } if (!acpi_check_dsm(port_handle, &guid, 0, BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) { dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n", port1, USB_DSM_DISABLE_U1_U2_FOR_PORT); return -ENODEV; } obj = acpi_evaluate_dsm_typed(port_handle, &guid, 0, USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL, ACPI_TYPE_INTEGER); if (!obj) { dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1); return -EINVAL; } if (obj->integer.value == 0x01) ret = 1; ACPI_FREE(obj); return ret; } EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable); /** * usb_acpi_set_power_state - control usb port's power via acpi power * resource * @hdev: USB device belonging to the usb hub * @index: port index based zero * @enable: power state expected to be set * * Notice to use usb_acpi_power_manageable() to check whether the usb port * has acpi power resource before invoking this function. * * Returns 0 on success, else negative errno. */ int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_port *port_dev; acpi_handle port_handle; unsigned char state; int port1 = index + 1; int error = -EINVAL; if (!hub) return -ENODEV; port_dev = hub->ports[port1 - 1]; port_handle = (acpi_handle) usb_get_hub_port_acpi_handle(hdev, port1); if (!port_handle) return error; if (enable) state = ACPI_STATE_D0; else state = ACPI_STATE_D3_COLD; error = acpi_bus_set_power(port_handle, state); if (!error) dev_dbg(&port_dev->dev, "acpi: power was set to %d\n", enable); else dev_dbg(&port_dev->dev, "acpi: power failed to be set\n"); return error; } EXPORT_SYMBOL_GPL(usb_acpi_set_power_state); /* * Private to usb-acpi, all the core needs to know is that * port_dev->location is non-zero when it has been set by the firmware. */ #define USB_ACPI_LOCATION_VALID (1 << 31) static void usb_acpi_get_connect_type(struct usb_port *port_dev, acpi_handle *handle) { enum usb_port_connect_type connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *upc = NULL; struct acpi_pld_info *pld = NULL; acpi_status status; /* * According to 9.14 in ACPI Spec 6.2. _PLD indicates whether usb port * is user visible and _UPC indicates whether it is connectable. If * the port was visible and connectable, it could be freely connected * and disconnected with USB devices. If no visible and connectable, * a usb device is directly hard-wired to the port. If no visible and * no connectable, the port would be not used. */ status = acpi_get_physical_device_location(handle, &pld); if (ACPI_SUCCESS(status) && pld) port_dev->location = USB_ACPI_LOCATION_VALID | pld->group_token << 8 | pld->group_position; status = acpi_evaluate_object(handle, "_UPC", NULL, &buffer); if (ACPI_FAILURE(status)) goto out; upc = buffer.pointer; if (!upc || (upc->type != ACPI_TYPE_PACKAGE) || upc->package.count != 4) goto out; /* UPC states port is connectable */ if (upc->package.elements[0].integer.value) if (!pld) ; /* keep connect_type as unknown */ else if (pld->user_visible) connect_type = USB_PORT_CONNECT_TYPE_HOT_PLUG; else connect_type = USB_PORT_CONNECT_TYPE_HARD_WIRED; else connect_type = USB_PORT_NOT_USED; out: port_dev->connect_type = connect_type; kfree(upc); ACPI_FREE(pld); } static struct acpi_device * usb_acpi_get_companion_for_port(struct usb_port *port_dev) { struct usb_device *udev; struct acpi_device *adev; acpi_handle *parent_handle; int port1; /* Get the struct usb_device point of port's hub */ udev = to_usb_device(port_dev->dev.parent->parent); /* * The root hub ports' parent is the root hub. The non-root-hub * ports' parent is the parent hub port which the hub is * connected to. */ if (!udev->parent) { adev = ACPI_COMPANION(&udev->dev); port1 = usb_hcd_find_raw_port_number(bus_to_hcd(udev->bus), port_dev->portnum); } else { parent_handle = usb_get_hub_port_acpi_handle(udev->parent, udev->portnum); if (!parent_handle) return NULL; adev = acpi_fetch_acpi_dev(parent_handle); port1 = port_dev->portnum; } return acpi_find_child_by_adr(adev, port1); } static struct acpi_device * usb_acpi_find_companion_for_port(struct usb_port *port_dev) { struct acpi_device *adev; adev = usb_acpi_get_companion_for_port(port_dev); if (!adev) return NULL; usb_acpi_get_connect_type(port_dev, adev->handle); return adev; } static struct acpi_device * usb_acpi_find_companion_for_device(struct usb_device *udev) { struct acpi_device *adev; struct usb_port *port_dev; struct usb_hub *hub; if (!udev->parent) { /* * root hub is only child (_ADR=0) under its parent, the HC. * sysdev pointer is the HC as seen from firmware. */ adev = ACPI_COMPANION(udev->bus->sysdev); return acpi_find_child_device(adev, 0, false); } hub = usb_hub_to_struct_hub(udev->parent); if (!hub) return NULL; /* * This is an embedded USB device connected to a port and such * devices share port's ACPI companion. */ port_dev = hub->ports[udev->portnum - 1]; return usb_acpi_get_companion_for_port(port_dev); } static struct acpi_device *usb_acpi_find_companion(struct device *dev) { /* * The USB hierarchy like following: * * Device (EHC1) * Device (HUBN) * Device (PR01) * Device (PR11) * Device (PR12) * Device (FN12) * Device (FN13) * Device (PR13) * ... * where HUBN is root hub, and PRNN are USB ports and devices * connected to them, and FNNN are individualk functions for * connected composite USB devices. PRNN and FNNN may contain * _CRS and other methods describing sideband resources for * the connected device. * * On the kernel side both root hub and embedded USB devices are * represented as instances of usb_device structure, and ports * are represented as usb_port structures, so the whole process * is split into 2 parts: finding companions for devices and * finding companions for ports. * * Note that we do not handle individual functions of composite * devices yet, for that we would need to assign companions to * devices corresponding to USB interfaces. */ if (is_usb_device(dev)) return usb_acpi_find_companion_for_device(to_usb_device(dev)); else if (is_usb_port(dev)) return usb_acpi_find_companion_for_port(to_usb_port(dev)); return NULL; } static bool usb_acpi_bus_match(struct device *dev) { return is_usb_device(dev) || is_usb_port(dev); } static struct acpi_bus_type usb_acpi_bus = { .name = "USB", .match = usb_acpi_bus_match, .find_companion = usb_acpi_find_companion, }; int usb_acpi_register(void) { return register_acpi_bus_type(&usb_acpi_bus); } void usb_acpi_unregister(void) { unregister_acpi_bus_type(&usb_acpi_bus); } |
1 1 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2013 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/usb.h> #include <linux/hid.h> #include <linux/mutex.h> #include <linux/videodev2.h> #include <asm/unaligned.h> #include <media/v4l2-device.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-ctrls.h> #include <media/v4l2-event.h> /* * 'Thanko's Raremono' is a Japanese si4734-based AM/FM/SW USB receiver: * * http://www.raremono.jp/product/484.html/ * * The USB protocol has been reversed engineered using wireshark, initially * by Dinesh Ram <dinesh.ram@cern.ch> and finished by Hans Verkuil * <hverkuil@xs4all.nl>. * * Sadly the firmware used in this product hides lots of goodies since the * si4734 has more features than are supported by the firmware. Oh well... */ /* driver and module definitions */ MODULE_AUTHOR("Hans Verkuil <hverkuil@xs4all.nl>"); MODULE_DESCRIPTION("Thanko's Raremono AM/FM/SW Receiver USB driver"); MODULE_LICENSE("GPL v2"); /* * The Device announces itself as Cygnal Integrated Products, Inc. * * The vendor and product IDs (and in fact all other lsusb information as * well) are identical to the si470x Silicon Labs USB FM Radio Reference * Design board, even though this card has a si4734 device. Clearly the * designer of this product never bothered to change the USB IDs. */ /* USB Device ID List */ static const struct usb_device_id usb_raremono_device_table[] = { {USB_DEVICE_AND_INTERFACE_INFO(0x10c4, 0x818a, USB_CLASS_HID, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usb_raremono_device_table); #define BUFFER_LENGTH 64 /* Timeout is set to a high value, could probably be reduced. Need more tests */ #define USB_TIMEOUT 10000 /* Frequency limits in KHz */ #define FM_FREQ_RANGE_LOW 64000 #define FM_FREQ_RANGE_HIGH 108000 #define AM_FREQ_RANGE_LOW 520 #define AM_FREQ_RANGE_HIGH 1710 #define SW_FREQ_RANGE_LOW 2300 #define SW_FREQ_RANGE_HIGH 26100 enum { BAND_FM, BAND_AM, BAND_SW }; static const struct v4l2_frequency_band bands[] = { /* Band FM */ { .type = V4L2_TUNER_RADIO, .index = 0, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = FM_FREQ_RANGE_LOW * 16, .rangehigh = FM_FREQ_RANGE_HIGH * 16, .modulation = V4L2_BAND_MODULATION_FM, }, /* Band AM */ { .type = V4L2_TUNER_RADIO, .index = 1, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = AM_FREQ_RANGE_LOW * 16, .rangehigh = AM_FREQ_RANGE_HIGH * 16, .modulation = V4L2_BAND_MODULATION_AM, }, /* Band SW */ { .type = V4L2_TUNER_RADIO, .index = 2, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = SW_FREQ_RANGE_LOW * 16, .rangehigh = SW_FREQ_RANGE_HIGH * 16, .modulation = V4L2_BAND_MODULATION_AM, }, }; struct raremono_device { struct usb_device *usbdev; struct usb_interface *intf; struct video_device vdev; struct v4l2_device v4l2_dev; struct mutex lock; u8 *buffer; u32 band; unsigned curfreq; }; static inline struct raremono_device *to_raremono_dev(struct v4l2_device *v4l2_dev) { return container_of(v4l2_dev, struct raremono_device, v4l2_dev); } /* Set frequency. */ static int raremono_cmd_main(struct raremono_device *radio, unsigned band, unsigned freq) { unsigned band_offset; int ret; switch (band) { case BAND_FM: band_offset = 1; freq /= 10; break; case BAND_AM: band_offset = 0; break; default: band_offset = 2; break; } radio->buffer[0] = 0x04 + band_offset; radio->buffer[1] = freq >> 8; radio->buffer[2] = freq & 0xff; ret = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0), HID_REQ_SET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, 0x0300 + radio->buffer[0], 2, radio->buffer, 3, USB_TIMEOUT); if (ret < 0) { dev_warn(radio->v4l2_dev.dev, "%s failed (%d)\n", __func__, ret); return ret; } radio->curfreq = (band == BAND_FM) ? freq * 10 : freq; return 0; } /* Handle unplugging the device. * We call video_unregister_device in any case. * The last function called in this procedure is * usb_raremono_device_release. */ static void usb_raremono_disconnect(struct usb_interface *intf) { struct raremono_device *radio = to_raremono_dev(usb_get_intfdata(intf)); dev_info(&intf->dev, "Thanko's Raremono disconnected\n"); mutex_lock(&radio->lock); usb_set_intfdata(intf, NULL); video_unregister_device(&radio->vdev); v4l2_device_disconnect(&radio->v4l2_dev); mutex_unlock(&radio->lock); v4l2_device_put(&radio->v4l2_dev); } /* * Linux Video interface */ static int vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *v) { struct raremono_device *radio = video_drvdata(file); strscpy(v->driver, "radio-raremono", sizeof(v->driver)); strscpy(v->card, "Thanko's Raremono", sizeof(v->card)); usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info)); return 0; } static int vidioc_enum_freq_bands(struct file *file, void *priv, struct v4l2_frequency_band *band) { if (band->tuner != 0) return -EINVAL; if (band->index >= ARRAY_SIZE(bands)) return -EINVAL; *band = bands[band->index]; return 0; } static int vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *v) { struct raremono_device *radio = video_drvdata(file); int ret; if (v->index > 0) return -EINVAL; strscpy(v->name, "AM/FM/SW", sizeof(v->name)); v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_FREQ_BANDS; v->rangelow = AM_FREQ_RANGE_LOW * 16; v->rangehigh = FM_FREQ_RANGE_HIGH * 16; v->rxsubchans = V4L2_TUNER_SUB_STEREO | V4L2_TUNER_SUB_MONO; v->audmode = (radio->curfreq < FM_FREQ_RANGE_LOW) ? V4L2_TUNER_MODE_MONO : V4L2_TUNER_MODE_STEREO; memset(radio->buffer, 1, BUFFER_LENGTH); ret = usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0), 1, 0xa1, 0x030d, 2, radio->buffer, BUFFER_LENGTH, USB_TIMEOUT); if (ret < 0) { dev_warn(radio->v4l2_dev.dev, "%s failed (%d)\n", __func__, ret); return ret; } v->signal = ((radio->buffer[1] & 0xf) << 8 | radio->buffer[2]) << 4; return 0; } static int vidioc_s_tuner(struct file *file, void *priv, const struct v4l2_tuner *v) { return v->index ? -EINVAL : 0; } static int vidioc_s_frequency(struct file *file, void *priv, const struct v4l2_frequency *f) { struct raremono_device *radio = video_drvdata(file); u32 freq; unsigned band; if (f->tuner != 0 || f->type != V4L2_TUNER_RADIO) return -EINVAL; if (f->frequency >= (FM_FREQ_RANGE_LOW + SW_FREQ_RANGE_HIGH) * 8) band = BAND_FM; else if (f->frequency <= (AM_FREQ_RANGE_HIGH + SW_FREQ_RANGE_LOW) * 8) band = BAND_AM; else band = BAND_SW; freq = clamp_t(u32, f->frequency, bands[band].rangelow, bands[band].rangehigh); return raremono_cmd_main(radio, band, freq / 16); } static int vidioc_g_frequency(struct file *file, void *priv, struct v4l2_frequency *f) { struct raremono_device *radio = video_drvdata(file); if (f->tuner != 0) return -EINVAL; f->type = V4L2_TUNER_RADIO; f->frequency = radio->curfreq * 16; return 0; } static void raremono_device_release(struct v4l2_device *v4l2_dev) { struct raremono_device *radio = to_raremono_dev(v4l2_dev); kfree(radio->buffer); kfree(radio); } /* File system interface */ static const struct v4l2_file_operations usb_raremono_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = v4l2_fh_release, .unlocked_ioctl = video_ioctl2, }; static const struct v4l2_ioctl_ops usb_raremono_ioctl_ops = { .vidioc_querycap = vidioc_querycap, .vidioc_g_tuner = vidioc_g_tuner, .vidioc_s_tuner = vidioc_s_tuner, .vidioc_g_frequency = vidioc_g_frequency, .vidioc_s_frequency = vidioc_s_frequency, .vidioc_enum_freq_bands = vidioc_enum_freq_bands, }; /* check if the device is present and register with v4l and usb if it is */ static int usb_raremono_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct raremono_device *radio; int retval = 0; radio = kzalloc(sizeof(*radio), GFP_KERNEL); if (!radio) return -ENOMEM; radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL); if (!radio->buffer) { kfree(radio); return -ENOMEM; } radio->usbdev = interface_to_usbdev(intf); radio->intf = intf; /* * This device uses the same USB IDs as the si470x SiLabs reference * design. So do an additional check: attempt to read the device ID * from the si470x: the lower 12 bits are 0x0242 for the si470x. The * Raremono always returns 0x0800 (the meaning of that is unknown, but * at least it works). * * We use this check to determine which device we are dealing with. */ msleep(20); retval = usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0), HID_REQ_GET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, 1, 2, radio->buffer, 3, 500); if (retval != 3 || (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) { dev_info(&intf->dev, "this is not Thanko's Raremono.\n"); retval = -ENODEV; goto free_mem; } dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n", id->idVendor, id->idProduct); retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev); if (retval < 0) { dev_err(&intf->dev, "couldn't register v4l2_device\n"); goto free_mem; } mutex_init(&radio->lock); strscpy(radio->vdev.name, radio->v4l2_dev.name, sizeof(radio->vdev.name)); radio->vdev.v4l2_dev = &radio->v4l2_dev; radio->vdev.fops = &usb_raremono_fops; radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops; radio->vdev.lock = &radio->lock; radio->vdev.release = video_device_release_empty; radio->vdev.device_caps = V4L2_CAP_TUNER | V4L2_CAP_RADIO; radio->v4l2_dev.release = raremono_device_release; usb_set_intfdata(intf, &radio->v4l2_dev); video_set_drvdata(&radio->vdev, radio); raremono_cmd_main(radio, BAND_FM, 95160); retval = video_register_device(&radio->vdev, VFL_TYPE_RADIO, -1); if (retval == 0) { dev_info(&intf->dev, "V4L2 device registered as %s\n", video_device_node_name(&radio->vdev)); return 0; } dev_err(&intf->dev, "could not register video device\n"); v4l2_device_unregister(&radio->v4l2_dev); free_mem: kfree(radio->buffer); kfree(radio); return retval; } /* USB subsystem interface */ static struct usb_driver usb_raremono_driver = { .name = "radio-raremono", .probe = usb_raremono_probe, .disconnect = usb_raremono_disconnect, .id_table = usb_raremono_device_table, }; module_usb_driver(usb_raremono_driver); |
5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. */ #include "rxe.h" #include "rxe_hw_counters.h" static const struct rdma_stat_desc rxe_counter_descs[] = { [RXE_CNT_SENT_PKTS].name = "sent_pkts", [RXE_CNT_RCVD_PKTS].name = "rcvd_pkts", [RXE_CNT_DUP_REQ].name = "duplicate_request", [RXE_CNT_OUT_OF_SEQ_REQ].name = "out_of_seq_request", [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", [RXE_CNT_SND_RNR].name = "send_rnr_err", [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", [RXE_CNT_COMP_RETRY].name = "completer_retry_err", [RXE_CNT_SEND_ERR].name = "send_err", [RXE_CNT_LINK_DOWNED].name = "link_downed", [RXE_CNT_RDMA_SEND].name = "rdma_sends", [RXE_CNT_RDMA_RECV].name = "rdma_recvs", }; int rxe_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index) { struct rxe_dev *dev = to_rdev(ibdev); unsigned int cnt; if (!port || !stats) return -EINVAL; for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_descs); cnt++) stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); return ARRAY_SIZE(rxe_counter_descs); } struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_descs) != RXE_NUM_OF_COUNTERS); return rdma_alloc_hw_stats_struct(rxe_counter_descs, ARRAY_SIZE(rxe_counter_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } |
22 22 12 13 42 37 17 3 5 1 1 4 2 30 29 8 30 32 2 2 2 2 1 3 1 16 1 1 1 2 2 2 2 2 3 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * Timer control routines * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_timer.h" #include "seq_oss_event.h" #include <sound/seq_oss_legacy.h> #include <linux/slab.h> /* */ #define MIN_OSS_TEMPO 8 #define MAX_OSS_TEMPO 360 #define MIN_OSS_TIMEBASE 1 #define MAX_OSS_TIMEBASE 1000 /* */ static void calc_alsa_tempo(struct seq_oss_timer *timer); static int send_timer_event(struct seq_oss_devinfo *dp, int type, int value); /* * create and register a new timer. * if queue is not started yet, start it. */ struct seq_oss_timer * snd_seq_oss_timer_new(struct seq_oss_devinfo *dp) { struct seq_oss_timer *rec; rec = kzalloc(sizeof(*rec), GFP_KERNEL); if (rec == NULL) return NULL; rec->dp = dp; rec->cur_tick = 0; rec->realtime = 0; rec->running = 0; rec->oss_tempo = 60; rec->oss_timebase = 100; calc_alsa_tempo(rec); return rec; } /* * delete timer. * if no more timer exists, stop the queue. */ void snd_seq_oss_timer_delete(struct seq_oss_timer *rec) { if (rec) { snd_seq_oss_timer_stop(rec); kfree(rec); } } /* * process one timing event * return 1 : event proceseed -- skip this event * 0 : not a timer event -- enqueue this event */ int snd_seq_oss_process_timer_event(struct seq_oss_timer *rec, union evrec *ev) { abstime_t parm = ev->t.time; if (ev->t.code == EV_TIMING) { switch (ev->t.cmd) { case TMR_WAIT_REL: parm += rec->cur_tick; rec->realtime = 0; fallthrough; case TMR_WAIT_ABS: if (parm == 0) { rec->realtime = 1; } else if (parm >= rec->cur_tick) { rec->realtime = 0; rec->cur_tick = parm; } return 1; /* skip this event */ case TMR_START: snd_seq_oss_timer_start(rec); return 1; } } else if (ev->s.code == SEQ_WAIT) { /* time = from 1 to 3 bytes */ parm = (ev->echo >> 8) & 0xffffff; if (parm > rec->cur_tick) { /* set next event time */ rec->cur_tick = parm; rec->realtime = 0; } return 1; } return 0; } /* * convert tempo units */ static void calc_alsa_tempo(struct seq_oss_timer *timer) { timer->tempo = (60 * 1000000) / timer->oss_tempo; timer->ppq = timer->oss_timebase; } /* * dispatch a timer event */ static int send_timer_event(struct seq_oss_devinfo *dp, int type, int value) { struct snd_seq_event ev; memset(&ev, 0, sizeof(ev)); ev.type = type; ev.source.client = dp->cseq; ev.source.port = 0; ev.dest.client = SNDRV_SEQ_CLIENT_SYSTEM; ev.dest.port = SNDRV_SEQ_PORT_SYSTEM_TIMER; ev.queue = dp->queue; ev.data.queue.queue = dp->queue; ev.data.queue.param.value = value; return snd_seq_kernel_client_dispatch(dp->cseq, &ev, 1, 0); } /* * set queue tempo and start queue */ int snd_seq_oss_timer_start(struct seq_oss_timer *timer) { struct seq_oss_devinfo *dp = timer->dp; struct snd_seq_queue_tempo tmprec; if (timer->running) snd_seq_oss_timer_stop(timer); memset(&tmprec, 0, sizeof(tmprec)); tmprec.queue = dp->queue; tmprec.ppq = timer->ppq; tmprec.tempo = timer->tempo; snd_seq_set_queue_tempo(dp->cseq, &tmprec); send_timer_event(dp, SNDRV_SEQ_EVENT_START, 0); timer->running = 1; timer->cur_tick = 0; return 0; } /* * stop queue */ int snd_seq_oss_timer_stop(struct seq_oss_timer *timer) { if (! timer->running) return 0; send_timer_event(timer->dp, SNDRV_SEQ_EVENT_STOP, 0); timer->running = 0; return 0; } /* * continue queue */ int snd_seq_oss_timer_continue(struct seq_oss_timer *timer) { if (timer->running) return 0; send_timer_event(timer->dp, SNDRV_SEQ_EVENT_CONTINUE, 0); timer->running = 1; return 0; } /* * change queue tempo */ int snd_seq_oss_timer_tempo(struct seq_oss_timer *timer, int value) { if (value < MIN_OSS_TEMPO) value = MIN_OSS_TEMPO; else if (value > MAX_OSS_TEMPO) value = MAX_OSS_TEMPO; timer->oss_tempo = value; calc_alsa_tempo(timer); if (timer->running) send_timer_event(timer->dp, SNDRV_SEQ_EVENT_TEMPO, timer->tempo); return 0; } /* * ioctls */ int snd_seq_oss_timer_ioctl(struct seq_oss_timer *timer, unsigned int cmd, int __user *arg) { int value; if (cmd == SNDCTL_SEQ_CTRLRATE) { /* if *arg == 0, just return the current rate */ if (get_user(value, arg)) return -EFAULT; if (value) return -EINVAL; value = ((timer->oss_tempo * timer->oss_timebase) + 30) / 60; return put_user(value, arg) ? -EFAULT : 0; } if (timer->dp->seq_mode == SNDRV_SEQ_OSS_MODE_SYNTH) return 0; switch (cmd) { case SNDCTL_TMR_START: return snd_seq_oss_timer_start(timer); case SNDCTL_TMR_STOP: return snd_seq_oss_timer_stop(timer); case SNDCTL_TMR_CONTINUE: return snd_seq_oss_timer_continue(timer); case SNDCTL_TMR_TEMPO: if (get_user(value, arg)) return -EFAULT; return snd_seq_oss_timer_tempo(timer, value); case SNDCTL_TMR_TIMEBASE: if (get_user(value, arg)) return -EFAULT; if (value < MIN_OSS_TIMEBASE) value = MIN_OSS_TIMEBASE; else if (value > MAX_OSS_TIMEBASE) value = MAX_OSS_TIMEBASE; timer->oss_timebase = value; calc_alsa_tempo(timer); return 0; case SNDCTL_TMR_METRONOME: case SNDCTL_TMR_SELECT: case SNDCTL_TMR_SOURCE: /* not supported */ return 0; } return 0; } |
9 1 3 7 9 9 9 3 4 5 9 6 3 9 12 12 10 5 6 4 6 8 4 7 9 5 4 9 4 5 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 | /* * xxHash - Extremely Fast Hash algorithm * Copyright (C) 2012-2016, Yann Collet. * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. This program is dual-licensed; you may select * either version 2 of the GNU General Public License ("GPL") or BSD license * ("BSD"). * * You can contact the author at: * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ #include <asm/unaligned.h> #include <linux/errno.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/xxhash.h> /*-************************************* * Macros **************************************/ #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r))) #define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r))) #ifdef __LITTLE_ENDIAN # define XXH_CPU_LITTLE_ENDIAN 1 #else # define XXH_CPU_LITTLE_ENDIAN 0 #endif /*-************************************* * Constants **************************************/ static const uint32_t PRIME32_1 = 2654435761U; static const uint32_t PRIME32_2 = 2246822519U; static const uint32_t PRIME32_3 = 3266489917U; static const uint32_t PRIME32_4 = 668265263U; static const uint32_t PRIME32_5 = 374761393U; static const uint64_t PRIME64_1 = 11400714785074694791ULL; static const uint64_t PRIME64_2 = 14029467366897019727ULL; static const uint64_t PRIME64_3 = 1609587929392839161ULL; static const uint64_t PRIME64_4 = 9650029242287828579ULL; static const uint64_t PRIME64_5 = 2870177450012600261ULL; /*-************************** * Utils ***************************/ void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src) { memcpy(dst, src, sizeof(*dst)); } EXPORT_SYMBOL(xxh32_copy_state); void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src) { memcpy(dst, src, sizeof(*dst)); } EXPORT_SYMBOL(xxh64_copy_state); /*-*************************** * Simple Hash Functions ****************************/ static uint32_t xxh32_round(uint32_t seed, const uint32_t input) { seed += input * PRIME32_2; seed = xxh_rotl32(seed, 13); seed *= PRIME32_1; return seed; } uint32_t xxh32(const void *input, const size_t len, const uint32_t seed) { const uint8_t *p = (const uint8_t *)input; const uint8_t *b_end = p + len; uint32_t h32; if (len >= 16) { const uint8_t *const limit = b_end - 16; uint32_t v1 = seed + PRIME32_1 + PRIME32_2; uint32_t v2 = seed + PRIME32_2; uint32_t v3 = seed + 0; uint32_t v4 = seed - PRIME32_1; do { v1 = xxh32_round(v1, get_unaligned_le32(p)); p += 4; v2 = xxh32_round(v2, get_unaligned_le32(p)); p += 4; v3 = xxh32_round(v3, get_unaligned_le32(p)); p += 4; v4 = xxh32_round(v4, get_unaligned_le32(p)); p += 4; } while (p <= limit); h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) + xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } h32 += (uint32_t)len; while (p + 4 <= b_end) { h32 += get_unaligned_le32(p) * PRIME32_3; h32 = xxh_rotl32(h32, 17) * PRIME32_4; p += 4; } while (p < b_end) { h32 += (*p) * PRIME32_5; h32 = xxh_rotl32(h32, 11) * PRIME32_1; p++; } h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } EXPORT_SYMBOL(xxh32); static uint64_t xxh64_round(uint64_t acc, const uint64_t input) { acc += input * PRIME64_2; acc = xxh_rotl64(acc, 31); acc *= PRIME64_1; return acc; } static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val) { val = xxh64_round(0, val); acc ^= val; acc = acc * PRIME64_1 + PRIME64_4; return acc; } uint64_t xxh64(const void *input, const size_t len, const uint64_t seed) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; uint64_t h64; if (len >= 32) { const uint8_t *const limit = b_end - 32; uint64_t v1 = seed + PRIME64_1 + PRIME64_2; uint64_t v2 = seed + PRIME64_2; uint64_t v3 = seed + 0; uint64_t v4 = seed - PRIME64_1; do { v1 = xxh64_round(v1, get_unaligned_le64(p)); p += 8; v2 = xxh64_round(v2, get_unaligned_le64(p)); p += 8; v3 = xxh64_round(v3, get_unaligned_le64(p)); p += 8; v4 = xxh64_round(v4, get_unaligned_le64(p)); p += 8; } while (p <= limit); h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); h64 = xxh64_merge_round(h64, v1); h64 = xxh64_merge_round(h64, v2); h64 = xxh64_merge_round(h64, v3); h64 = xxh64_merge_round(h64, v4); } else { h64 = seed + PRIME64_5; } h64 += (uint64_t)len; while (p + 8 <= b_end) { const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); h64 ^= k1; h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; p += 8; } if (p + 4 <= b_end) { h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p += 4; } while (p < b_end) { h64 ^= (*p) * PRIME64_5; h64 = xxh_rotl64(h64, 11) * PRIME64_1; p++; } h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } EXPORT_SYMBOL(xxh64); /*-************************************************** * Advanced Hash Functions ***************************************************/ void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed) { /* use a local state for memcpy() to avoid strict-aliasing warnings */ struct xxh32_state state; memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; memcpy(statePtr, &state, sizeof(state)); } EXPORT_SYMBOL(xxh32_reset); void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) { /* use a local state for memcpy() to avoid strict-aliasing warnings */ struct xxh64_state state; memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; memcpy(statePtr, &state, sizeof(state)); } EXPORT_SYMBOL(xxh64_reset); int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; if (input == NULL) return -EINVAL; state->total_len_32 += (uint32_t)len; state->large_len |= (len >= 16) | (state->total_len_32 >= 16); if (state->memsize + len < 16) { /* fill in tmp buffer */ memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); state->memsize += (uint32_t)len; return 0; } if (state->memsize) { /* some data left from previous update */ const uint32_t *p32 = state->mem32; memcpy((uint8_t *)(state->mem32) + state->memsize, input, 16 - state->memsize); state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); p32++; state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); p32++; state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); p32++; state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); p32++; p += 16-state->memsize; state->memsize = 0; } if (p <= b_end - 16) { const uint8_t *const limit = b_end - 16; uint32_t v1 = state->v1; uint32_t v2 = state->v2; uint32_t v3 = state->v3; uint32_t v4 = state->v4; do { v1 = xxh32_round(v1, get_unaligned_le32(p)); p += 4; v2 = xxh32_round(v2, get_unaligned_le32(p)); p += 4; v3 = xxh32_round(v3, get_unaligned_le32(p)); p += 4; v4 = xxh32_round(v4, get_unaligned_le32(p)); p += 4; } while (p <= limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < b_end) { memcpy(state->mem32, p, (size_t)(b_end-p)); state->memsize = (uint32_t)(b_end-p); } return 0; } EXPORT_SYMBOL(xxh32_update); uint32_t xxh32_digest(const struct xxh32_state *state) { const uint8_t *p = (const uint8_t *)state->mem32; const uint8_t *const b_end = (const uint8_t *)(state->mem32) + state->memsize; uint32_t h32; if (state->large_len) { h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; while (p + 4 <= b_end) { h32 += get_unaligned_le32(p) * PRIME32_3; h32 = xxh_rotl32(h32, 17) * PRIME32_4; p += 4; } while (p < b_end) { h32 += (*p) * PRIME32_5; h32 = xxh_rotl32(h32, 11) * PRIME32_1; p++; } h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } EXPORT_SYMBOL(xxh32_digest); int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; if (input == NULL) return -EINVAL; state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ memcpy(((uint8_t *)state->mem64) + state->memsize, input, len); state->memsize += (uint32_t)len; return 0; } if (state->memsize) { /* tmp buffer is full */ uint64_t *p64 = state->mem64; memcpy(((uint8_t *)p64) + state->memsize, input, 32 - state->memsize); state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64)); p64++; state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64)); p64++; state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64)); p64++; state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64)); p += 32 - state->memsize; state->memsize = 0; } if (p + 32 <= b_end) { const uint8_t *const limit = b_end - 32; uint64_t v1 = state->v1; uint64_t v2 = state->v2; uint64_t v3 = state->v3; uint64_t v4 = state->v4; do { v1 = xxh64_round(v1, get_unaligned_le64(p)); p += 8; v2 = xxh64_round(v2, get_unaligned_le64(p)); p += 8; v3 = xxh64_round(v3, get_unaligned_le64(p)); p += 8; v4 = xxh64_round(v4, get_unaligned_le64(p)); p += 8; } while (p <= limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < b_end) { memcpy(state->mem64, p, (size_t)(b_end-p)); state->memsize = (uint32_t)(b_end - p); } return 0; } EXPORT_SYMBOL(xxh64_update); uint64_t xxh64_digest(const struct xxh64_state *state) { const uint8_t *p = (const uint8_t *)state->mem64; const uint8_t *const b_end = (const uint8_t *)state->mem64 + state->memsize; uint64_t h64; if (state->total_len >= 32) { const uint64_t v1 = state->v1; const uint64_t v2 = state->v2; const uint64_t v3 = state->v3; const uint64_t v4 = state->v4; h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); h64 = xxh64_merge_round(h64, v1); h64 = xxh64_merge_round(h64, v2); h64 = xxh64_merge_round(h64, v3); h64 = xxh64_merge_round(h64, v4); } else { h64 = state->v3 + PRIME64_5; } h64 += (uint64_t)state->total_len; while (p + 8 <= b_end) { const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); h64 ^= k1; h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; p += 8; } if (p + 4 <= b_end) { h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p += 4; } while (p < b_end) { h64 ^= (*p) * PRIME64_5; h64 = xxh_rotl64(h64, 11) * PRIME64_1; p++; } h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } EXPORT_SYMBOL(xxh64_digest); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("xxHash"); |
4 4 4 4 4 1 1 1 3 1 23 9 13 12 5 2 8 1 23 9 18 9 10 4 4 4 1 2 3 4 44 7 7 6 1 40 6 41 5 1 4 4 4 25 11 20 10 25 10 6 3 6 6 6 3 6 5 1 1 1 1 1 1 2 2 2 2 6 6 16 7 10 6 3 6 6 10 11 11 3 3 2 4 1 1 1 2 2 1 1 1 1 6 6 3 3 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 | /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. Copyright 2023-2024 NXP Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth HCI connection handling. */ #include <linux/export.h> #include <linux/debugfs.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/iso.h> #include <net/bluetooth/mgmt.h> #include "hci_request.h" #include "smp.h" #include "eir.h" struct sco_param { u16 pkt_type; u16 max_latency; u8 retrans_effort; }; struct conn_handle_t { struct hci_conn *conn; __u16 handle; }; static const struct sco_param esco_param_cvsd[] = { { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a, 0x01 }, /* S3 */ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007, 0x01 }, /* S2 */ { EDR_ESCO_MASK | ESCO_EV3, 0x0007, 0x01 }, /* S1 */ { EDR_ESCO_MASK | ESCO_HV3, 0xffff, 0x01 }, /* D1 */ { EDR_ESCO_MASK | ESCO_HV1, 0xffff, 0x01 }, /* D0 */ }; static const struct sco_param sco_param_cvsd[] = { { EDR_ESCO_MASK | ESCO_HV3, 0xffff, 0xff }, /* D1 */ { EDR_ESCO_MASK | ESCO_HV1, 0xffff, 0xff }, /* D0 */ }; static const struct sco_param esco_param_msbc[] = { { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000d, 0x02 }, /* T2 */ { EDR_ESCO_MASK | ESCO_EV3, 0x0008, 0x02 }, /* T1 */ }; /* This function requires the caller holds hdev->lock */ void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) { struct hci_conn_params *params; struct hci_dev *hdev = conn->hdev; struct smp_irk *irk; bdaddr_t *bdaddr; u8 bdaddr_type; bdaddr = &conn->dst; bdaddr_type = conn->dst_type; /* Check if we need to convert to identity address */ irk = hci_get_irk(hdev, bdaddr, bdaddr_type); if (irk) { bdaddr = &irk->bdaddr; bdaddr_type = irk->addr_type; } params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr, bdaddr_type); if (!params) return; if (params->conn) { hci_conn_drop(params->conn); hci_conn_put(params->conn); params->conn = NULL; } if (!params->explicit_connect) return; /* If the status indicates successful cancellation of * the attempt (i.e. Unknown Connection Id) there's no point of * notifying failure since we'll go back to keep trying to * connect. The only exception is explicit connect requests * where a timeout + cancel does indicate an actual failure. */ if (status && status != HCI_ERROR_UNKNOWN_CONN_ID) mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); /* The connection attempt was doing scan for new RPA, and is * in scan phase. If params are not associated with any other * autoconnect action, remove them completely. If they are, just unmark * them as waiting for connection, by clearing explicit_connect field. */ params->explicit_connect = false; hci_pend_le_list_del_init(params); switch (params->auto_connect) { case HCI_AUTO_CONN_EXPLICIT: hci_conn_params_del(hdev, bdaddr, bdaddr_type); /* return instead of break to avoid duplicate scan update */ return; case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: hci_pend_le_list_add(params, &hdev->pend_le_conns); break; case HCI_AUTO_CONN_REPORT: hci_pend_le_list_add(params, &hdev->pend_le_reports); break; default: break; } hci_update_passive_scan(hdev); } static void hci_conn_cleanup(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); if (test_and_clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); hci_chan_list_flush(conn); hci_conn_hash_del(hdev, conn); if (HCI_CONN_HANDLE_UNSET(conn->handle)) ida_free(&hdev->unset_handle_ida, conn->handle); if (conn->cleanup) conn->cleanup(conn); if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { switch (conn->setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_CVSD: case SCO_AIRMODE_TRANSP: if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_DISABLE_SCO); break; } } else { if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); } debugfs_remove_recursive(conn->debugfs); hci_conn_del_sysfs(conn); hci_dev_put(hdev); } int hci_disconnect(struct hci_conn *conn, __u8 reason) { BT_DBG("hcon %p", conn); /* When we are central of an established connection and it enters * the disconnect timeout, then go ahead and try to read the * current clock offset. Processing of the result is done * within the event handling and hci_clock_offset_evt function. */ if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER && (conn->state == BT_CONNECTED || conn->state == BT_CONFIG)) { struct hci_dev *hdev = conn->hdev; struct hci_cp_read_clock_offset clkoff_cp; clkoff_cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(hdev, HCI_OP_READ_CLOCK_OFFSET, sizeof(clkoff_cp), &clkoff_cp); } return hci_abort_conn(conn, reason); } static void hci_add_sco(struct hci_conn *conn, __u16 handle) { struct hci_dev *hdev = conn->hdev; struct hci_cp_add_sco cp; BT_DBG("hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; conn->attempt++; cp.handle = cpu_to_le16(handle); cp.pkt_type = cpu_to_le16(conn->pkt_type); hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); } static bool find_next_esco_param(struct hci_conn *conn, const struct sco_param *esco_param, int size) { if (!conn->parent) return false; for (; conn->attempt <= size; conn->attempt++) { if (lmp_esco_2m_capable(conn->parent) || (esco_param[conn->attempt - 1].pkt_type & ESCO_2EV3)) break; BT_DBG("hcon %p skipped attempt %d, eSCO 2M not supported", conn, conn->attempt); } return conn->attempt <= size; } static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec) { int err; __u8 vnd_len, *vnd_data = NULL; struct hci_op_configure_data_path *cmd = NULL; /* Do not take below 2 checks as error since the 1st means user do not * want to use HFP offload mode and the 2nd means the vendor controller * do not need to send below HCI command for offload mode. */ if (!codec->data_path || !hdev->get_codec_config_data) return 0; err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, &vnd_data); if (err < 0) goto error; cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL); if (!cmd) { err = -ENOMEM; goto error; } err = hdev->get_data_path_id(hdev, &cmd->data_path_id); if (err < 0) goto error; cmd->vnd_len = vnd_len; memcpy(cmd->vnd_data, vnd_data, vnd_len); cmd->direction = 0x00; __hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd, HCI_CMD_TIMEOUT); cmd->direction = 0x01; err = __hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd, HCI_CMD_TIMEOUT); error: kfree(cmd); kfree(vnd_data); return err; } static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data) { struct conn_handle_t *conn_handle = data; struct hci_conn *conn = conn_handle->conn; __u16 handle = conn_handle->handle; struct hci_cp_enhanced_setup_sync_conn cp; const struct sco_param *param; kfree(conn_handle); bt_dev_dbg(hdev, "hcon %p", conn); configure_datapath_sync(hdev, &conn->codec); conn->state = BT_CONNECT; conn->out = true; conn->attempt++; memset(&cp, 0x00, sizeof(cp)); cp.handle = cpu_to_le16(handle); cp.tx_bandwidth = cpu_to_le32(0x00001f40); cp.rx_bandwidth = cpu_to_le32(0x00001f40); switch (conn->codec.id) { case BT_CODEC_MSBC: if (!find_next_esco_param(conn, esco_param_msbc, ARRAY_SIZE(esco_param_msbc))) return -EINVAL; param = &esco_param_msbc[conn->attempt - 1]; cp.tx_coding_format.id = 0x05; cp.rx_coding_format.id = 0x05; cp.tx_codec_frame_size = __cpu_to_le16(60); cp.rx_codec_frame_size = __cpu_to_le16(60); cp.in_bandwidth = __cpu_to_le32(32000); cp.out_bandwidth = __cpu_to_le32(32000); cp.in_coding_format.id = 0x04; cp.out_coding_format.id = 0x04; cp.in_coded_data_size = __cpu_to_le16(16); cp.out_coded_data_size = __cpu_to_le16(16); cp.in_pcm_data_format = 2; cp.out_pcm_data_format = 2; cp.in_pcm_sample_payload_msb_pos = 0; cp.out_pcm_sample_payload_msb_pos = 0; cp.in_data_path = conn->codec.data_path; cp.out_data_path = conn->codec.data_path; cp.in_transport_unit_size = 1; cp.out_transport_unit_size = 1; break; case BT_CODEC_TRANSPARENT: if (!find_next_esco_param(conn, esco_param_msbc, ARRAY_SIZE(esco_param_msbc))) return false; param = &esco_param_msbc[conn->attempt - 1]; cp.tx_coding_format.id = 0x03; cp.rx_coding_format.id = 0x03; cp.tx_codec_frame_size = __cpu_to_le16(60); cp.rx_codec_frame_size = __cpu_to_le16(60); cp.in_bandwidth = __cpu_to_le32(0x1f40); cp.out_bandwidth = __cpu_to_le32(0x1f40); cp.in_coding_format.id = 0x03; cp.out_coding_format.id = 0x03; cp.in_coded_data_size = __cpu_to_le16(16); cp.out_coded_data_size = __cpu_to_le16(16); cp.in_pcm_data_format = 2; cp.out_pcm_data_format = 2; cp.in_pcm_sample_payload_msb_pos = 0; cp.out_pcm_sample_payload_msb_pos = 0; cp.in_data_path = conn->codec.data_path; cp.out_data_path = conn->codec.data_path; cp.in_transport_unit_size = 1; cp.out_transport_unit_size = 1; break; case BT_CODEC_CVSD: if (conn->parent && lmp_esco_capable(conn->parent)) { if (!find_next_esco_param(conn, esco_param_cvsd, ARRAY_SIZE(esco_param_cvsd))) return -EINVAL; param = &esco_param_cvsd[conn->attempt - 1]; } else { if (conn->attempt > ARRAY_SIZE(sco_param_cvsd)) return -EINVAL; param = &sco_param_cvsd[conn->attempt - 1]; } cp.tx_coding_format.id = 2; cp.rx_coding_format.id = 2; cp.tx_codec_frame_size = __cpu_to_le16(60); cp.rx_codec_frame_size = __cpu_to_le16(60); cp.in_bandwidth = __cpu_to_le32(16000); cp.out_bandwidth = __cpu_to_le32(16000); cp.in_coding_format.id = 4; cp.out_coding_format.id = 4; cp.in_coded_data_size = __cpu_to_le16(16); cp.out_coded_data_size = __cpu_to_le16(16); cp.in_pcm_data_format = 2; cp.out_pcm_data_format = 2; cp.in_pcm_sample_payload_msb_pos = 0; cp.out_pcm_sample_payload_msb_pos = 0; cp.in_data_path = conn->codec.data_path; cp.out_data_path = conn->codec.data_path; cp.in_transport_unit_size = 16; cp.out_transport_unit_size = 16; break; default: return -EINVAL; } cp.retrans_effort = param->retrans_effort; cp.pkt_type = __cpu_to_le16(param->pkt_type); cp.max_latency = __cpu_to_le16(param->max_latency); if (hci_send_cmd(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0) return -EIO; return 0; } static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle) { struct hci_dev *hdev = conn->hdev; struct hci_cp_setup_sync_conn cp; const struct sco_param *param; bt_dev_dbg(hdev, "hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; conn->attempt++; cp.handle = cpu_to_le16(handle); cp.tx_bandwidth = cpu_to_le32(0x00001f40); cp.rx_bandwidth = cpu_to_le32(0x00001f40); cp.voice_setting = cpu_to_le16(conn->setting); switch (conn->setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_TRANSP: if (!find_next_esco_param(conn, esco_param_msbc, ARRAY_SIZE(esco_param_msbc))) return false; param = &esco_param_msbc[conn->attempt - 1]; break; case SCO_AIRMODE_CVSD: if (conn->parent && lmp_esco_capable(conn->parent)) { if (!find_next_esco_param(conn, esco_param_cvsd, ARRAY_SIZE(esco_param_cvsd))) return false; param = &esco_param_cvsd[conn->attempt - 1]; } else { if (conn->attempt > ARRAY_SIZE(sco_param_cvsd)) return false; param = &sco_param_cvsd[conn->attempt - 1]; } break; default: return false; } cp.retrans_effort = param->retrans_effort; cp.pkt_type = __cpu_to_le16(param->pkt_type); cp.max_latency = __cpu_to_le16(param->max_latency); if (hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0) return false; return true; } bool hci_setup_sync(struct hci_conn *conn, __u16 handle) { int result; struct conn_handle_t *conn_handle; if (enhanced_sync_conn_capable(conn->hdev)) { conn_handle = kzalloc(sizeof(*conn_handle), GFP_KERNEL); if (!conn_handle) return false; conn_handle->conn = conn; conn_handle->handle = handle; result = hci_cmd_sync_queue(conn->hdev, hci_enhanced_setup_sync, conn_handle, NULL); if (result < 0) kfree(conn_handle); return result == 0; } return hci_setup_sync_conn(conn, handle); } u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier) { struct hci_dev *hdev = conn->hdev; struct hci_conn_params *params; struct hci_cp_le_conn_update cp; hci_dev_lock(hdev); params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); if (params) { params->conn_min_interval = min; params->conn_max_interval = max; params->conn_latency = latency; params->supervision_timeout = to_multiplier; } hci_dev_unlock(hdev); memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.conn_interval_min = cpu_to_le16(min); cp.conn_interval_max = cpu_to_le16(max); cp.conn_latency = cpu_to_le16(latency); cp.supervision_timeout = cpu_to_le16(to_multiplier); cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); if (params) return 0x01; return 0x00; } void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, __u8 ltk[16], __u8 key_size) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_start_enc cp; BT_DBG("hcon %p", conn); memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.rand = rand; cp.ediv = ediv; memcpy(cp.ltk, ltk, key_size); hci_send_cmd(hdev, HCI_OP_LE_START_ENC, sizeof(cp), &cp); } /* Device _must_ be locked */ void hci_sco_setup(struct hci_conn *conn, __u8 status) { struct hci_link *link; link = list_first_entry_or_null(&conn->link_list, struct hci_link, list); if (!link || !link->conn) return; BT_DBG("hcon %p", conn); if (!status) { if (lmp_esco_capable(conn->hdev)) hci_setup_sync(link->conn, conn->handle); else hci_add_sco(link->conn, conn->handle); } else { hci_connect_cfm(link->conn, status); hci_conn_del(link->conn); } } static void hci_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, disc_work.work); int refcnt = atomic_read(&conn->refcnt); BT_DBG("hcon %p state %s", conn, state_to_string(conn->state)); WARN_ON(refcnt < 0); /* FIXME: It was observed that in pairing failed scenario, refcnt * drops below 0. Probably this is because l2cap_conn_del calls * l2cap_chan_del for each channel, and inside l2cap_chan_del conn is * dropped. After that loop hci_chan_del is called which also drops * conn. For now make sure that ACL is alive if refcnt is higher then 0, * otherwise drop it. */ if (refcnt > 0) return; hci_abort_conn(conn, hci_proto_disconn_ind(conn)); } /* Enter sniff mode */ static void hci_conn_idle(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, idle_work.work); struct hci_dev *hdev = conn->hdev; BT_DBG("hcon %p mode %d", conn, conn->mode); if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn)) return; if (conn->mode != HCI_CM_ACTIVE || !(conn->link_policy & HCI_LP_SNIFF)) return; if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) { struct hci_cp_sniff_subrate cp; cp.handle = cpu_to_le16(conn->handle); cp.max_latency = cpu_to_le16(0); cp.min_remote_timeout = cpu_to_le16(0); cp.min_local_timeout = cpu_to_le16(0); hci_send_cmd(hdev, HCI_OP_SNIFF_SUBRATE, sizeof(cp), &cp); } if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->flags)) { struct hci_cp_sniff_mode cp; cp.handle = cpu_to_le16(conn->handle); cp.max_interval = cpu_to_le16(hdev->sniff_max_interval); cp.min_interval = cpu_to_le16(hdev->sniff_min_interval); cp.attempt = cpu_to_le16(4); cp.timeout = cpu_to_le16(1); hci_send_cmd(hdev, HCI_OP_SNIFF_MODE, sizeof(cp), &cp); } } static void hci_conn_auto_accept(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, auto_accept_work.work); hci_send_cmd(conn->hdev, HCI_OP_USER_CONFIRM_REPLY, sizeof(conn->dst), &conn->dst); } static void le_disable_advertising(struct hci_dev *hdev) { if (ext_adv_capable(hdev)) { struct hci_cp_le_set_ext_adv_enable cp; cp.enable = 0x00; cp.num_of_sets = 0x00; hci_send_cmd(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(cp), &cp); } else { u8 enable = 0x00; hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } } static void le_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, le_conn_timeout.work); struct hci_dev *hdev = conn->hdev; BT_DBG(""); /* We could end up here due to having done directed advertising, * so clean up the state if necessary. This should however only * happen with broken hardware or if low duty cycle was used * (which doesn't have a timeout of its own). */ if (conn->role == HCI_ROLE_SLAVE) { /* Disable LE Advertising */ le_disable_advertising(hdev); hci_dev_lock(hdev); hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); hci_dev_unlock(hdev); return; } hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); } struct iso_list_data { union { u8 cig; u8 big; }; union { u8 cis; u8 bis; u16 sync_handle; }; int count; bool big_term; bool pa_sync_term; bool big_sync_term; }; static void bis_list(struct hci_conn *conn, void *data) { struct iso_list_data *d = data; /* Skip if not broadcast/ANY address */ if (bacmp(&conn->dst, BDADDR_ANY)) return; if (d->big != conn->iso_qos.bcast.big || d->bis == BT_ISO_QOS_BIS_UNSET || d->bis != conn->iso_qos.bcast.bis) return; d->count++; } static int terminate_big_sync(struct hci_dev *hdev, void *data) { struct iso_list_data *d = data; bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", d->big, d->bis); hci_disable_per_advertising_sync(hdev, d->bis); hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL); /* Only terminate BIG if it has been created */ if (!d->big_term) return 0; return hci_le_terminate_big_sync(hdev, d->big, HCI_ERROR_LOCAL_HOST_TERM); } static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err) { kfree(data); } static int hci_le_terminate_big(struct hci_dev *hdev, struct hci_conn *conn) { struct iso_list_data *d; int ret; bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", conn->iso_qos.bcast.big, conn->iso_qos.bcast.bis); d = kzalloc(sizeof(*d), GFP_KERNEL); if (!d) return -ENOMEM; d->big = conn->iso_qos.bcast.big; d->bis = conn->iso_qos.bcast.bis; d->big_term = test_and_clear_bit(HCI_CONN_BIG_CREATED, &conn->flags); ret = hci_cmd_sync_queue(hdev, terminate_big_sync, d, terminate_big_destroy); if (ret) kfree(d); return ret; } static int big_terminate_sync(struct hci_dev *hdev, void *data) { struct iso_list_data *d = data; bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", d->big, d->sync_handle); if (d->big_sync_term) hci_le_big_terminate_sync(hdev, d->big); if (d->pa_sync_term) return hci_le_pa_terminate_sync(hdev, d->sync_handle); return 0; } static void find_bis(struct hci_conn *conn, void *data) { struct iso_list_data *d = data; /* Ignore if BIG doesn't match */ if (d->big != conn->iso_qos.bcast.big) return; d->count++; } static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn) { struct iso_list_data *d; int ret; bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle); d = kzalloc(sizeof(*d), GFP_KERNEL); if (!d) return -ENOMEM; memset(d, 0, sizeof(*d)); d->big = big; d->sync_handle = conn->sync_handle; if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, HCI_CONN_PA_SYNC, d); if (!d->count) d->pa_sync_term = true; d->count = 0; } if (test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags)) { hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, HCI_CONN_BIG_SYNC, d); if (!d->count) d->big_sync_term = true; } ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); if (ret) kfree(d); return ret; } /* Cleanup BIS connection * * Detects if there any BIS left connected in a BIG * broadcaster: Remove advertising instance and terminate BIG. * broadcaster receiver: Teminate BIG sync and terminate PA sync. */ static void bis_cleanup(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct hci_conn *bis; bt_dev_dbg(hdev, "conn %p", conn); if (conn->role == HCI_ROLE_MASTER) { if (!test_and_clear_bit(HCI_CONN_PER_ADV, &conn->flags)) return; /* Check if ISO connection is a BIS and terminate advertising * set and BIG if there are no other connections using it. */ bis = hci_conn_hash_lookup_big(hdev, conn->iso_qos.bcast.big); if (bis) return; hci_le_terminate_big(hdev, conn); } else { hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, conn); } } static int remove_cig_sync(struct hci_dev *hdev, void *data) { u8 handle = PTR_UINT(data); return hci_le_remove_cig_sync(hdev, handle); } static int hci_le_remove_cig(struct hci_dev *hdev, u8 handle) { bt_dev_dbg(hdev, "handle 0x%2.2x", handle); return hci_cmd_sync_queue(hdev, remove_cig_sync, UINT_PTR(handle), NULL); } static void find_cis(struct hci_conn *conn, void *data) { struct iso_list_data *d = data; /* Ignore broadcast or if CIG don't match */ if (!bacmp(&conn->dst, BDADDR_ANY) || d->cig != conn->iso_qos.ucast.cig) return; d->count++; } /* Cleanup CIS connection: * * Detects if there any CIS left connected in a CIG and remove it. */ static void cis_cleanup(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct iso_list_data d; if (conn->iso_qos.ucast.cig == BT_ISO_QOS_CIG_UNSET) return; memset(&d, 0, sizeof(d)); d.cig = conn->iso_qos.ucast.cig; /* Check if ISO connection is a CIS and remove CIG if there are * no other connections using it. */ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_BOUND, &d); hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &d); hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d); if (d.count) return; hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig); } static int hci_conn_hash_alloc_unset(struct hci_dev *hdev) { return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1, U16_MAX, GFP_ATOMIC); } struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role, u16 handle) { struct hci_conn *conn; switch (type) { case ACL_LINK: if (!hdev->acl_mtu) return ERR_PTR(-ECONNREFUSED); break; case ISO_LINK: if (hdev->iso_mtu) /* Dedicated ISO Buffer exists */ break; fallthrough; case LE_LINK: if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU) return ERR_PTR(-ECONNREFUSED); if (!hdev->le_mtu && hdev->acl_mtu < HCI_MIN_LE_MTU) return ERR_PTR(-ECONNREFUSED); break; case SCO_LINK: case ESCO_LINK: if (!hdev->sco_pkts) /* Controller does not support SCO or eSCO over HCI */ return ERR_PTR(-ECONNREFUSED); break; default: return ERR_PTR(-ECONNREFUSED); } bt_dev_dbg(hdev, "dst %pMR handle 0x%4.4x", dst, handle); conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) return ERR_PTR(-ENOMEM); bacpy(&conn->dst, dst); bacpy(&conn->src, &hdev->bdaddr); conn->handle = handle; conn->hdev = hdev; conn->type = type; conn->role = role; conn->mode = HCI_CM_ACTIVE; conn->state = BT_OPEN; conn->auth_type = HCI_AT_GENERAL_BONDING; conn->io_capability = hdev->io_capability; conn->remote_auth = 0xff; conn->key_type = 0xff; conn->rssi = HCI_RSSI_INVALID; conn->tx_power = HCI_TX_POWER_INVALID; conn->max_tx_power = HCI_TX_POWER_INVALID; conn->sync_handle = HCI_SYNC_HANDLE_INVALID; set_bit(HCI_CONN_POWER_SAVE, &conn->flags); conn->disc_timeout = HCI_DISCONN_TIMEOUT; /* Set Default Authenticated payload timeout to 30s */ conn->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT; if (conn->role == HCI_ROLE_MASTER) conn->out = true; switch (type) { case ACL_LINK: conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK; conn->mtu = hdev->acl_mtu; break; case LE_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; case ISO_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); /* set proper cleanup function */ if (!bacmp(dst, BDADDR_ANY)) conn->cleanup = bis_cleanup; else if (conn->role == HCI_ROLE_MASTER) conn->cleanup = cis_cleanup; conn->mtu = hdev->iso_mtu ? hdev->iso_mtu : hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; case SCO_LINK: if (lmp_esco_capable(hdev)) conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | (hdev->esco_type & EDR_ESCO_MASK); else conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; conn->mtu = hdev->sco_mtu; break; case ESCO_LINK: conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK; conn->mtu = hdev->sco_mtu; break; } skb_queue_head_init(&conn->data_q); INIT_LIST_HEAD(&conn->chan_list); INIT_LIST_HEAD(&conn->link_list); INIT_DELAYED_WORK(&conn->disc_work, hci_conn_timeout); INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); atomic_set(&conn->refcnt, 0); hci_dev_hold(hdev); hci_conn_hash_add(hdev, conn); /* The SCO and eSCO connections will only be notified when their * setup has been completed. This is different to ACL links which * can be notified right away. */ if (conn->type != SCO_LINK && conn->type != ESCO_LINK) { if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); } hci_conn_init_sysfs(conn); return conn; } struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role) { int handle; bt_dev_dbg(hdev, "dst %pMR", dst); handle = hci_conn_hash_alloc_unset(hdev); if (unlikely(handle < 0)) return ERR_PTR(-ECONNREFUSED); return hci_conn_add(hdev, type, dst, role, handle); } static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) { if (!reason) reason = HCI_ERROR_REMOTE_USER_TERM; /* Due to race, SCO/ISO conn might be not established yet at this point, * and nothing else will clean it up. In other cases it is done via HCI * events. */ switch (conn->type) { case SCO_LINK: case ESCO_LINK: if (HCI_CONN_HANDLE_UNSET(conn->handle)) hci_conn_failed(conn, reason); break; case ISO_LINK: if ((conn->state != BT_CONNECTED && !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) || test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) hci_conn_failed(conn, reason); break; } } static void hci_conn_unlink(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; bt_dev_dbg(hdev, "hcon %p", conn); if (!conn->parent) { struct hci_link *link, *t; list_for_each_entry_safe(link, t, &conn->link_list, list) { struct hci_conn *child = link->conn; hci_conn_unlink(child); /* If hdev is down it means * hci_dev_close_sync/hci_conn_hash_flush is in progress * and links don't need to be cleanup as all connections * would be cleanup. */ if (!test_bit(HCI_UP, &hdev->flags)) continue; hci_conn_cleanup_child(child, conn->abort_reason); } return; } if (!conn->link) return; list_del_rcu(&conn->link->list); synchronize_rcu(); hci_conn_drop(conn->parent); hci_conn_put(conn->parent); conn->parent = NULL; kfree(conn->link); conn->link = NULL; } void hci_conn_del(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle); hci_conn_unlink(conn); cancel_delayed_work_sync(&conn->disc_work); cancel_delayed_work_sync(&conn->auto_accept_work); cancel_delayed_work_sync(&conn->idle_work); if (conn->type == ACL_LINK) { /* Unacked frames */ hdev->acl_cnt += conn->sent; } else if (conn->type == LE_LINK) { cancel_delayed_work(&conn->le_conn_timeout); if (hdev->le_pkts) hdev->le_cnt += conn->sent; else hdev->acl_cnt += conn->sent; } else { /* Unacked ISO frames */ if (conn->type == ISO_LINK) { if (hdev->iso_pkts) hdev->iso_cnt += conn->sent; else if (hdev->le_pkts) hdev->le_cnt += conn->sent; else hdev->acl_cnt += conn->sent; } } skb_queue_purge(&conn->data_q); /* Remove the connection from the list and cleanup its remaining * state. This is a separate function since for some cases like * BT_CONNECT_SCAN we *only* want the cleanup part without the * rest of hci_conn_del. */ hci_conn_cleanup(conn); /* Dequeue callbacks using connection pointer as data */ hci_cmd_sync_dequeue(hdev, NULL, conn, NULL); } struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) { int use_src = bacmp(src, BDADDR_ANY); struct hci_dev *hdev = NULL, *d; BT_DBG("%pMR -> %pMR", src, dst); read_lock(&hci_dev_list_lock); list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Simple routing: * No source address - find interface with bdaddr != dst * Source address - find interface with bdaddr == src */ if (use_src) { bdaddr_t id_addr; u8 id_addr_type; if (src_type == BDADDR_BREDR) { if (!lmp_bredr_capable(d)) continue; bacpy(&id_addr, &d->bdaddr); id_addr_type = BDADDR_BREDR; } else { if (!lmp_le_capable(d)) continue; hci_copy_identity_address(d, &id_addr, &id_addr_type); /* Convert from HCI to three-value type */ if (id_addr_type == ADDR_LE_DEV_PUBLIC) id_addr_type = BDADDR_LE_PUBLIC; else id_addr_type = BDADDR_LE_RANDOM; } if (!bacmp(&id_addr, src) && id_addr_type == src_type) { hdev = d; break; } } else { if (bacmp(&d->bdaddr, dst)) { hdev = d; break; } } } if (hdev) hdev = hci_dev_hold(hdev); read_unlock(&hci_dev_list_lock); return hdev; } EXPORT_SYMBOL(hci_get_route); /* This function requires the caller holds hdev->lock */ static void hci_le_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; hci_connect_le_scan_cleanup(conn, status); /* Enable advertising in case this was a failed connection * attempt as a peripheral. */ hci_enable_advertising(hdev); } /* This function requires the caller holds hdev->lock */ void hci_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; bt_dev_dbg(hdev, "status 0x%2.2x", status); switch (conn->type) { case LE_LINK: hci_le_conn_failed(conn, status); break; case ACL_LINK: mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); break; } /* In case of BIG/PA sync failed, clear conn flags so that * the conns will be correctly cleaned up by ISO layer */ test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags); test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags); conn->state = BT_CLOSED; hci_connect_cfm(conn, status); hci_conn_del(conn); } /* This function requires the caller holds hdev->lock */ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle) { struct hci_dev *hdev = conn->hdev; bt_dev_dbg(hdev, "hcon %p handle 0x%4.4x", conn, handle); if (conn->handle == handle) return 0; if (handle > HCI_CONN_HANDLE_MAX) { bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle, HCI_CONN_HANDLE_MAX); return HCI_ERROR_INVALID_PARAMETERS; } /* If abort_reason has been sent it means the connection is being * aborted and the handle shall not be changed. */ if (conn->abort_reason) return conn->abort_reason; if (HCI_CONN_HANDLE_UNSET(conn->handle)) ida_free(&hdev->unset_handle_ida, conn->handle); conn->handle = handle; return 0; } struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, u16 conn_timeout, u8 role, u8 phy, u8 sec_phy) { struct hci_conn *conn; struct smp_irk *irk; int err; /* Let's make sure that le is enabled.*/ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { if (lmp_le_capable(hdev)) return ERR_PTR(-ECONNREFUSED); return ERR_PTR(-EOPNOTSUPP); } /* Since the controller supports only one LE connection attempt at a * time, we return -EBUSY if there is any connection attempt running. */ if (hci_lookup_le_connect(hdev)) return ERR_PTR(-EBUSY); /* If there's already a connection object but it's not in * scanning state it means it must already be established, in * which case we can't do anything else except report a failure * to connect. */ conn = hci_conn_hash_lookup_le(hdev, dst, dst_type); if (conn && !test_bit(HCI_CONN_SCANNING, &conn->flags)) { return ERR_PTR(-EBUSY); } /* Check if the destination address has been resolved by the controller * since if it did then the identity address shall be used. */ if (!dst_resolved) { /* When given an identity address with existing identity * resolving key, the connection needs to be established * to a resolvable random address. * * Storing the resolvable random address is required here * to handle connection failures. The address will later * be resolved back into the original identity address * from the connect request. */ irk = hci_find_irk_by_addr(hdev, dst, dst_type); if (irk && bacmp(&irk->rpa, BDADDR_ANY)) { dst = &irk->rpa; dst_type = ADDR_LE_DEV_RANDOM; } } if (conn) { bacpy(&conn->dst, dst); } else { conn = hci_conn_add_unset(hdev, LE_LINK, dst, role); if (IS_ERR(conn)) return conn; hci_conn_hold(conn); conn->pending_sec_level = sec_level; } conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; conn->le_adv_phy = phy; conn->le_adv_sec_phy = sec_phy; err = hci_connect_le_sync(hdev, conn); if (err) { hci_conn_del(conn); return ERR_PTR(err); } return conn; } static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) { struct hci_conn *conn; conn = hci_conn_hash_lookup_le(hdev, addr, type); if (!conn) return false; if (conn->state != BT_CONNECTED) return false; return true; } /* This function requires the caller holds hdev->lock */ static int hci_explicit_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) { struct hci_conn_params *params; if (is_connected(hdev, addr, addr_type)) return -EISCONN; params = hci_conn_params_lookup(hdev, addr, addr_type); if (!params) { params = hci_conn_params_add(hdev, addr, addr_type); if (!params) return -ENOMEM; /* If we created new params, mark them to be deleted in * hci_connect_le_scan_cleanup. It's different case than * existing disabled params, those will stay after cleanup. */ params->auto_connect = HCI_AUTO_CONN_EXPLICIT; } /* We're trying to connect, so make sure params are at pend_le_conns */ if (params->auto_connect == HCI_AUTO_CONN_DISABLED || params->auto_connect == HCI_AUTO_CONN_REPORT || params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { hci_pend_le_list_del_init(params); hci_pend_le_list_add(params, &hdev->pend_le_conns); } params->explicit_connect = true; BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, params->auto_connect); return 0; } static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos) { struct hci_conn *conn; u8 big; /* Allocate a BIG if not set */ if (qos->bcast.big == BT_ISO_QOS_BIG_UNSET) { for (big = 0x00; big < 0xef; big++) { conn = hci_conn_hash_lookup_big(hdev, big); if (!conn) break; } if (big == 0xef) return -EADDRNOTAVAIL; /* Update BIG */ qos->bcast.big = big; } return 0; } static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos) { struct hci_conn *conn; u8 bis; /* Allocate BIS if not set */ if (qos->bcast.bis == BT_ISO_QOS_BIS_UNSET) { if (qos->bcast.big != BT_ISO_QOS_BIG_UNSET) { conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big); if (conn) { /* If the BIG handle is already matched to an advertising * handle, do not allocate a new one. */ qos->bcast.bis = conn->iso_qos.bcast.bis; return 0; } } /* Find an unused adv set to advertise BIS, skip instance 0x00 * since it is reserved as general purpose set. */ for (bis = 0x01; bis < hdev->le_num_of_adv_sets; bis++) { conn = hci_conn_hash_lookup_bis(hdev, BDADDR_ANY, bis); if (!conn) break; } if (bis == hdev->le_num_of_adv_sets) return -EADDRNOTAVAIL; /* Update BIS */ qos->bcast.bis = bis; } return 0; } /* This function requires the caller holds hdev->lock */ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; /* Let's make sure that le is enabled.*/ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { if (lmp_le_capable(hdev)) return ERR_PTR(-ECONNREFUSED); return ERR_PTR(-EOPNOTSUPP); } err = qos_set_big(hdev, qos); if (err) return ERR_PTR(err); err = qos_set_bis(hdev, qos); if (err) return ERR_PTR(err); /* Check if the LE Create BIG command has already been sent */ conn = hci_conn_hash_lookup_per_adv_bis(hdev, dst, qos->bcast.big, qos->bcast.big); if (conn) return ERR_PTR(-EADDRINUSE); /* Check BIS settings against other bound BISes, since all * BISes in a BIG must have the same value for all parameters */ conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big); if (conn && (memcmp(qos, &conn->iso_qos, sizeof(*qos)) || base_len != conn->le_per_adv_data_len || memcmp(conn->le_per_adv_data, base, base_len))) return ERR_PTR(-EADDRINUSE); conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); if (IS_ERR(conn)) return conn; conn->state = BT_CONNECT; hci_conn_hold(conn); return conn; } /* This function requires the caller holds hdev->lock */ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, enum conn_reasons conn_reason) { struct hci_conn *conn; /* Let's make sure that le is enabled.*/ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { if (lmp_le_capable(hdev)) return ERR_PTR(-ECONNREFUSED); return ERR_PTR(-EOPNOTSUPP); } /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing * process. * * So if a hci_conn object already exists for the following connection * attempt, we simply update pending_sec_level and auth_type fields * and return the object found. */ conn = hci_conn_hash_lookup_le(hdev, dst, dst_type); if (conn) { if (conn->pending_sec_level < sec_level) conn->pending_sec_level = sec_level; goto done; } BT_DBG("requesting refresh of dst_addr"); conn = hci_conn_add_unset(hdev, LE_LINK, dst, HCI_ROLE_MASTER); if (IS_ERR(conn)) return conn; if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) { hci_conn_del(conn); return ERR_PTR(-EBUSY); } conn->state = BT_CONNECT; set_bit(HCI_CONN_SCANNING, &conn->flags); conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->pending_sec_level = sec_level; conn->conn_timeout = conn_timeout; conn->conn_reason = conn_reason; hci_update_passive_scan(hdev); done: hci_conn_hold(conn); return conn; } struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason, u16 timeout) { struct hci_conn *acl; if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { if (lmp_bredr_capable(hdev)) return ERR_PTR(-ECONNREFUSED); return ERR_PTR(-EOPNOTSUPP); } /* Reject outgoing connection to device with same BD ADDR against * CVE-2020-26555 */ if (!bacmp(&hdev->bdaddr, dst)) { bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", dst); return ERR_PTR(-ECONNREFUSED); } acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add_unset(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); if (IS_ERR(acl)) return acl; } hci_conn_hold(acl); acl->conn_reason = conn_reason; if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { int err; acl->sec_level = BT_SECURITY_LOW; acl->pending_sec_level = sec_level; acl->auth_type = auth_type; acl->conn_timeout = timeout; err = hci_connect_acl_sync(hdev, acl); if (err) { hci_conn_del(acl); return ERR_PTR(err); } } return acl; } static struct hci_link *hci_conn_link(struct hci_conn *parent, struct hci_conn *conn) { struct hci_dev *hdev = parent->hdev; struct hci_link *link; bt_dev_dbg(hdev, "parent %p hcon %p", parent, conn); if (conn->link) return conn->link; if (conn->parent) return NULL; link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return NULL; link->conn = hci_conn_hold(conn); conn->link = link; conn->parent = hci_conn_get(parent); /* Use list_add_tail_rcu append to the list */ list_add_tail_rcu(&link->list, &parent->link_list); return link; } struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, __u16 setting, struct bt_codec *codec, u16 timeout) { struct hci_conn *acl; struct hci_conn *sco; struct hci_link *link; acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING, CONN_REASON_SCO_CONNECT, timeout); if (IS_ERR(acl)) return acl; sco = hci_conn_hash_lookup_ba(hdev, type, dst); if (!sco) { sco = hci_conn_add_unset(hdev, type, dst, HCI_ROLE_MASTER); if (IS_ERR(sco)) { hci_conn_drop(acl); return sco; } } link = hci_conn_link(acl, sco); if (!link) { hci_conn_drop(acl); hci_conn_drop(sco); return ERR_PTR(-ENOLINK); } sco->setting = setting; sco->codec = *codec; if (acl->state == BT_CONNECTED && (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { set_bit(HCI_CONN_POWER_SAVE, &acl->flags); hci_conn_enter_active_mode(acl, BT_POWER_FORCE_ACTIVE_ON); if (test_bit(HCI_CONN_MODE_CHANGE_PEND, &acl->flags)) { /* defer SCO setup until mode change completed */ set_bit(HCI_CONN_SCO_SETUP_PEND, &acl->flags); return sco; } hci_sco_setup(acl, 0x00); } return sco; } static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_create_big cp; struct iso_list_data data; memset(&cp, 0, sizeof(cp)); data.big = qos->bcast.big; data.bis = qos->bcast.bis; data.count = 0; /* Create a BIS for each bound connection */ hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, BT_BOUND, &data); cp.handle = qos->bcast.big; cp.adv_handle = qos->bcast.bis; cp.num_bis = data.count; hci_cpu_to_le24(qos->bcast.out.interval, cp.bis.sdu_interval); cp.bis.sdu = cpu_to_le16(qos->bcast.out.sdu); cp.bis.latency = cpu_to_le16(qos->bcast.out.latency); cp.bis.rtn = qos->bcast.out.rtn; cp.bis.phy = qos->bcast.out.phy; cp.bis.packing = qos->bcast.packing; cp.bis.framing = qos->bcast.framing; cp.bis.encryption = qos->bcast.encryption; memcpy(cp.bis.bcode, qos->bcast.bcode, sizeof(cp.bis.bcode)); return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp); } static int set_cig_params_sync(struct hci_dev *hdev, void *data) { DEFINE_FLEX(struct hci_cp_le_set_cig_params, pdu, cis, num_cis, 0x1f); u8 cig_id = PTR_UINT(data); struct hci_conn *conn; struct bt_iso_qos *qos; u8 aux_num_cis = 0; u8 cis_id; conn = hci_conn_hash_lookup_cig(hdev, cig_id); if (!conn) return 0; qos = &conn->iso_qos; pdu->cig_id = cig_id; hci_cpu_to_le24(qos->ucast.out.interval, pdu->c_interval); hci_cpu_to_le24(qos->ucast.in.interval, pdu->p_interval); pdu->sca = qos->ucast.sca; pdu->packing = qos->ucast.packing; pdu->framing = qos->ucast.framing; pdu->c_latency = cpu_to_le16(qos->ucast.out.latency); pdu->p_latency = cpu_to_le16(qos->ucast.in.latency); /* Reprogram all CIS(s) with the same CIG, valid range are: * num_cis: 0x00 to 0x1F * cis_id: 0x00 to 0xEF */ for (cis_id = 0x00; cis_id < 0xf0 && aux_num_cis < pdu->num_cis; cis_id++) { struct hci_cis_params *cis; conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id); if (!conn) continue; qos = &conn->iso_qos; cis = &pdu->cis[aux_num_cis++]; cis->cis_id = cis_id; cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu); cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu); cis->c_phy = qos->ucast.out.phy ? qos->ucast.out.phy : qos->ucast.in.phy; cis->p_phy = qos->ucast.in.phy ? qos->ucast.in.phy : qos->ucast.out.phy; cis->c_rtn = qos->ucast.out.rtn; cis->p_rtn = qos->ucast.in.rtn; } pdu->num_cis = aux_num_cis; if (!pdu->num_cis) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS, struct_size(pdu, cis, pdu->num_cis), pdu, HCI_CMD_TIMEOUT); } static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) { struct hci_dev *hdev = conn->hdev; struct iso_list_data data; memset(&data, 0, sizeof(data)); /* Allocate first still reconfigurable CIG if not set */ if (qos->ucast.cig == BT_ISO_QOS_CIG_UNSET) { for (data.cig = 0x00; data.cig < 0xf0; data.cig++) { data.count = 0; hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &data); if (data.count) continue; hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &data); if (!data.count) break; } if (data.cig == 0xf0) return false; /* Update CIG */ qos->ucast.cig = data.cig; } if (qos->ucast.cis != BT_ISO_QOS_CIS_UNSET) { if (hci_conn_hash_lookup_cis(hdev, NULL, 0, qos->ucast.cig, qos->ucast.cis)) return false; goto done; } /* Allocate first available CIS if not set */ for (data.cig = qos->ucast.cig, data.cis = 0x00; data.cis < 0xf0; data.cis++) { if (!hci_conn_hash_lookup_cis(hdev, NULL, 0, data.cig, data.cis)) { /* Update CIS */ qos->ucast.cis = data.cis; break; } } if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET) return false; done: if (hci_cmd_sync_queue(hdev, set_cig_params_sync, UINT_PTR(qos->ucast.cig), NULL) < 0) return false; return true; } struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos) { struct hci_conn *cis; cis = hci_conn_hash_lookup_cis(hdev, dst, dst_type, qos->ucast.cig, qos->ucast.cis); if (!cis) { cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); if (IS_ERR(cis)) return cis; cis->cleanup = cis_cleanup; cis->dst_type = dst_type; cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET; cis->iso_qos.ucast.cis = BT_ISO_QOS_CIS_UNSET; } if (cis->state == BT_CONNECTED) return cis; /* Check if CIS has been set and the settings matches */ if (cis->state == BT_BOUND && !memcmp(&cis->iso_qos, qos, sizeof(*qos))) return cis; /* Update LINK PHYs according to QoS preference */ cis->le_tx_phy = qos->ucast.out.phy; cis->le_rx_phy = qos->ucast.in.phy; /* If output interval is not set use the input interval as it cannot be * 0x000000. */ if (!qos->ucast.out.interval) qos->ucast.out.interval = qos->ucast.in.interval; /* If input interval is not set use the output interval as it cannot be * 0x000000. */ if (!qos->ucast.in.interval) qos->ucast.in.interval = qos->ucast.out.interval; /* If output latency is not set use the input latency as it cannot be * 0x0000. */ if (!qos->ucast.out.latency) qos->ucast.out.latency = qos->ucast.in.latency; /* If input latency is not set use the output latency as it cannot be * 0x0000. */ if (!qos->ucast.in.latency) qos->ucast.in.latency = qos->ucast.out.latency; if (!hci_le_set_cig_params(cis, qos)) { hci_conn_drop(cis); return ERR_PTR(-EINVAL); } hci_conn_hold(cis); cis->iso_qos = *qos; cis->state = BT_BOUND; return cis; } bool hci_iso_setup_path(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_setup_iso_path cmd; memset(&cmd, 0, sizeof(cmd)); if (conn->iso_qos.ucast.out.sdu) { cmd.handle = cpu_to_le16(conn->handle); cmd.direction = 0x00; /* Input (Host to Controller) */ cmd.path = 0x00; /* HCI path if enabled */ cmd.codec = 0x03; /* Transparent Data */ if (hci_send_cmd(hdev, HCI_OP_LE_SETUP_ISO_PATH, sizeof(cmd), &cmd) < 0) return false; } if (conn->iso_qos.ucast.in.sdu) { cmd.handle = cpu_to_le16(conn->handle); cmd.direction = 0x01; /* Output (Controller to Host) */ cmd.path = 0x00; /* HCI path if enabled */ cmd.codec = 0x03; /* Transparent Data */ if (hci_send_cmd(hdev, HCI_OP_LE_SETUP_ISO_PATH, sizeof(cmd), &cmd) < 0) return false; } return true; } int hci_conn_check_create_cis(struct hci_conn *conn) { if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY)) return -EINVAL; if (!conn->parent || conn->parent->state != BT_CONNECTED || conn->state != BT_CONNECT || HCI_CONN_HANDLE_UNSET(conn->handle)) return 1; return 0; } static int hci_create_cis_sync(struct hci_dev *hdev, void *data) { return hci_le_create_cis_sync(hdev); } int hci_le_create_cis_pending(struct hci_dev *hdev) { struct hci_conn *conn; bool pending = false; rcu_read_lock(); list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) { rcu_read_unlock(); return -EBUSY; } if (!hci_conn_check_create_cis(conn)) pending = true; } rcu_read_unlock(); if (!pending) return 0; /* Queue Create CIS */ return hci_cmd_sync_queue(hdev, hci_create_cis_sync, NULL, NULL); } static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn, struct bt_iso_io_qos *qos, __u8 phy) { /* Only set MTU if PHY is enabled */ if (!qos->sdu && qos->phy) qos->sdu = conn->mtu; /* Use the same PHY as ACL if set to any */ if (qos->phy == BT_ISO_PHY_ANY) qos->phy = phy; /* Use LE ACL connection interval if not set */ if (!qos->interval) /* ACL interval unit in 1.25 ms to us */ qos->interval = conn->le_conn_interval * 1250; /* Use LE ACL connection latency if not set */ if (!qos->latency) qos->latency = conn->le_conn_latency; } static int create_big_sync(struct hci_dev *hdev, void *data) { struct hci_conn *conn = data; struct bt_iso_qos *qos = &conn->iso_qos; u16 interval, sync_interval = 0; u32 flags = 0; int err; if (qos->bcast.out.phy == 0x02) flags |= MGMT_ADV_FLAG_SEC_2M; /* Align intervals */ interval = (qos->bcast.out.interval / 1250) * qos->bcast.sync_factor; if (qos->bcast.bis) sync_interval = interval * 4; err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len, conn->le_per_adv_data, flags, interval, interval, sync_interval); if (err) return err; return hci_le_create_big(conn, &conn->iso_qos); } static void create_pa_complete(struct hci_dev *hdev, void *data, int err) { struct hci_cp_le_pa_create_sync *cp = data; bt_dev_dbg(hdev, ""); if (err) bt_dev_err(hdev, "Unable to create PA: %d", err); kfree(cp); } static int create_pa_sync(struct hci_dev *hdev, void *data) { struct hci_cp_le_pa_create_sync *cp = data; int err; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC, sizeof(*cp), cp, HCI_CMD_TIMEOUT); if (err) { hci_dev_clear_flag(hdev, HCI_PA_SYNC); return err; } return hci_update_passive_scan_sync(hdev); } struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos) { struct hci_cp_le_pa_create_sync *cp; struct hci_conn *conn; int err; if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) return ERR_PTR(-EBUSY); conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE); if (IS_ERR(conn)) return conn; conn->iso_qos = *qos; conn->state = BT_LISTEN; hci_conn_hold(conn); cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) { hci_dev_clear_flag(hdev, HCI_PA_SYNC); hci_conn_drop(conn); return ERR_PTR(-ENOMEM); } cp->options = qos->bcast.options; cp->sid = sid; cp->addr_type = dst_type; bacpy(&cp->addr, dst); cp->skip = cpu_to_le16(qos->bcast.skip); cp->sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); cp->sync_cte_type = qos->bcast.sync_cte_type; /* Queue start pa_create_sync and scan */ err = hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete); if (err < 0) { hci_conn_drop(conn); kfree(cp); return ERR_PTR(err); } return conn; } int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, struct bt_iso_qos *qos, __u16 sync_handle, __u8 num_bis, __u8 bis[]) { DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11); int err; if (num_bis < 0x01 || num_bis > pdu->num_bis) return -EINVAL; err = qos_set_big(hdev, qos); if (err) return err; if (hcon) hcon->iso_qos.bcast.big = qos->bcast.big; pdu->handle = qos->bcast.big; pdu->sync_handle = cpu_to_le16(sync_handle); pdu->encryption = qos->bcast.encryption; memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode)); pdu->mse = qos->bcast.mse; pdu->timeout = cpu_to_le16(qos->bcast.timeout); pdu->num_bis = num_bis; memcpy(pdu->bis, bis, num_bis); return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, struct_size(pdu, bis, num_bis), pdu); } static void create_big_complete(struct hci_dev *hdev, void *data, int err) { struct hci_conn *conn = data; bt_dev_dbg(hdev, "conn %p", conn); if (err) { bt_dev_err(hdev, "Unable to create BIG: %d", err); hci_connect_cfm(conn, err); hci_conn_del(conn); } } struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { struct hci_conn *conn; struct hci_conn *parent; __u8 eir[HCI_MAX_PER_AD_LENGTH]; struct hci_link *link; /* Look for any BIS that is open for rebinding */ conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN); if (conn) { memcpy(qos, &conn->iso_qos, sizeof(*qos)); conn->state = BT_CONNECTED; return conn; } if (base_len && base) base_len = eir_append_service_data(eir, 0, 0x1851, base, base_len); /* We need hci_conn object using the BDADDR_ANY as dst */ conn = hci_add_bis(hdev, dst, qos, base_len, eir); if (IS_ERR(conn)) return conn; /* Update LINK PHYs according to QoS preference */ conn->le_tx_phy = qos->bcast.out.phy; conn->le_tx_phy = qos->bcast.out.phy; /* Add Basic Announcement into Peridic Adv Data if BASE is set */ if (base_len && base) { memcpy(conn->le_per_adv_data, eir, sizeof(eir)); conn->le_per_adv_data_len = base_len; } hci_iso_qos_setup(hdev, conn, &qos->bcast.out, conn->le_tx_phy ? conn->le_tx_phy : hdev->le_tx_def_phys); conn->iso_qos = *qos; conn->state = BT_BOUND; /* Link BISes together */ parent = hci_conn_hash_lookup_big(hdev, conn->iso_qos.bcast.big); if (parent && parent != conn) { link = hci_conn_link(parent, conn); if (!link) { hci_conn_drop(conn); return ERR_PTR(-ENOLINK); } /* Link takes the refcount */ hci_conn_drop(conn); } return conn; } static void bis_mark_per_adv(struct hci_conn *conn, void *data) { struct iso_list_data *d = data; /* Skip if not broadcast/ANY address */ if (bacmp(&conn->dst, BDADDR_ANY)) return; if (d->big != conn->iso_qos.bcast.big || d->bis == BT_ISO_QOS_BIS_UNSET || d->bis != conn->iso_qos.bcast.bis) return; set_bit(HCI_CONN_PER_ADV, &conn->flags); } struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; struct iso_list_data data; conn = hci_bind_bis(hdev, dst, qos, base_len, base); if (IS_ERR(conn)) return conn; if (conn->state == BT_CONNECTED) return conn; data.big = qos->bcast.big; data.bis = qos->bcast.bis; /* Set HCI_CONN_PER_ADV for all bound connections, to mark that * the start periodic advertising and create BIG commands have * been queued */ hci_conn_hash_list_state(hdev, bis_mark_per_adv, ISO_LINK, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ err = hci_cmd_sync_queue(hdev, create_big_sync, conn, create_big_complete); if (err < 0) { hci_conn_drop(conn); return ERR_PTR(err); } return conn; } struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos) { struct hci_conn *le; struct hci_conn *cis; struct hci_link *link; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) le = hci_connect_le(hdev, dst, dst_type, false, BT_SECURITY_LOW, HCI_LE_CONN_TIMEOUT, HCI_ROLE_SLAVE, 0, 0); else le = hci_connect_le_scan(hdev, dst, dst_type, BT_SECURITY_LOW, HCI_LE_CONN_TIMEOUT, CONN_REASON_ISO_CONNECT); if (IS_ERR(le)) return le; hci_iso_qos_setup(hdev, le, &qos->ucast.out, le->le_tx_phy ? le->le_tx_phy : hdev->le_tx_def_phys); hci_iso_qos_setup(hdev, le, &qos->ucast.in, le->le_rx_phy ? le->le_rx_phy : hdev->le_rx_def_phys); cis = hci_bind_cis(hdev, dst, dst_type, qos); if (IS_ERR(cis)) { hci_conn_drop(le); return cis; } link = hci_conn_link(le, cis); if (!link) { hci_conn_drop(le); hci_conn_drop(cis); return ERR_PTR(-ENOLINK); } /* Link takes the refcount */ hci_conn_drop(cis); cis->state = BT_CONNECT; hci_le_create_cis_pending(hdev); return cis; } /* Check link security requirement */ int hci_conn_check_link_mode(struct hci_conn *conn) { BT_DBG("hcon %p", conn); /* In Secure Connections Only mode, it is required that Secure * Connections is used and the link is encrypted with AES-CCM * using a P-256 authenticated combination key. */ if (hci_dev_test_flag(conn->hdev, HCI_SC_ONLY)) { if (!hci_conn_sc_enabled(conn) || !test_bit(HCI_CONN_AES_CCM, &conn->flags) || conn->key_type != HCI_LK_AUTH_COMBINATION_P256) return 0; } /* AES encryption is required for Level 4: * * BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 3, Part C * page 1319: * * 128-bit equivalent strength for link and encryption keys * required using FIPS approved algorithms (E0 not allowed, * SAFER+ not allowed, and P-192 not allowed; encryption key * not shortened) */ if (conn->sec_level == BT_SECURITY_FIPS && !test_bit(HCI_CONN_AES_CCM, &conn->flags)) { bt_dev_err(conn->hdev, "Invalid security: Missing AES-CCM usage"); return 0; } if (hci_conn_ssp_enabled(conn) && !test_bit(HCI_CONN_ENCRYPT, &conn->flags)) return 0; return 1; } /* Authenticate remote device */ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("hcon %p", conn); if (conn->pending_sec_level > sec_level) sec_level = conn->pending_sec_level; if (sec_level > conn->sec_level) conn->pending_sec_level = sec_level; else if (test_bit(HCI_CONN_AUTH, &conn->flags)) return 1; /* Make sure we preserve an existing MITM requirement*/ auth_type |= (conn->auth_type & 0x01); conn->auth_type = auth_type; if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_auth_requested cp; cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); /* Set the ENCRYPT_PEND to trigger encryption after * authentication. */ if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } return 0; } /* Encrypt the link */ static void hci_conn_encrypt(struct hci_conn *conn) { BT_DBG("hcon %p", conn); if (!test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) { struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 0x01; hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); } } /* Enable security */ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, bool initiator) { BT_DBG("hcon %p", conn); if (conn->type == LE_LINK) return smp_conn_security(conn, sec_level); /* For sdp we don't need the link key. */ if (sec_level == BT_SECURITY_SDP) return 1; /* For non 2.1 devices and low security level we don't need the link key. */ if (sec_level == BT_SECURITY_LOW && !hci_conn_ssp_enabled(conn)) return 1; /* For other security levels we need the link key. */ if (!test_bit(HCI_CONN_AUTH, &conn->flags)) goto auth; switch (conn->key_type) { case HCI_LK_AUTH_COMBINATION_P256: /* An authenticated FIPS approved combination key has * sufficient security for security level 4 or lower. */ if (sec_level <= BT_SECURITY_FIPS) goto encrypt; break; case HCI_LK_AUTH_COMBINATION_P192: /* An authenticated combination key has sufficient security for * security level 3 or lower. */ if (sec_level <= BT_SECURITY_HIGH) goto encrypt; break; case HCI_LK_UNAUTH_COMBINATION_P192: case HCI_LK_UNAUTH_COMBINATION_P256: /* An unauthenticated combination key has sufficient security * for security level 2 or lower. */ if (sec_level <= BT_SECURITY_MEDIUM) goto encrypt; break; case HCI_LK_COMBINATION: /* A combination key has always sufficient security for the * security levels 2 or lower. High security level requires the * combination key is generated using maximum PIN code length * (16). For pre 2.1 units. */ if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16) goto encrypt; break; default: break; } auth: if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) return 0; if (initiator) set_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags); if (!hci_conn_auth(conn, sec_level, auth_type)) return 0; encrypt: if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) { /* Ensure that the encryption key size has been read, * otherwise stall the upper layer responses. */ if (!conn->enc_key_size) return 0; /* Nothing else needed, all requirements are met */ return 1; } hci_conn_encrypt(conn); return 0; } EXPORT_SYMBOL(hci_conn_security); /* Check secure link requirement */ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level) { BT_DBG("hcon %p", conn); /* Accept if non-secure or higher security level is required */ if (sec_level != BT_SECURITY_HIGH && sec_level != BT_SECURITY_FIPS) return 1; /* Accept if secure or higher security level is already present */ if (conn->sec_level == BT_SECURITY_HIGH || conn->sec_level == BT_SECURITY_FIPS) return 1; /* Reject not secure link */ return 0; } EXPORT_SYMBOL(hci_conn_check_secure); /* Switch role */ int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { BT_DBG("hcon %p", conn); if (role == conn->role) return 1; if (!test_and_set_bit(HCI_CONN_RSWITCH_PEND, &conn->flags)) { struct hci_cp_switch_role cp; bacpy(&cp.bdaddr, &conn->dst); cp.role = role; hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); } return 0; } EXPORT_SYMBOL(hci_conn_switch_role); /* Enter active mode */ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active) { struct hci_dev *hdev = conn->hdev; BT_DBG("hcon %p mode %d", conn, conn->mode); if (conn->mode != HCI_CM_SNIFF) goto timer; if (!test_bit(HCI_CONN_POWER_SAVE, &conn->flags) && !force_active) goto timer; if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->flags)) { struct hci_cp_exit_sniff_mode cp; cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(hdev, HCI_OP_EXIT_SNIFF_MODE, sizeof(cp), &cp); } timer: if (hdev->idle_timeout > 0) queue_delayed_work(hdev->workqueue, &conn->idle_work, msecs_to_jiffies(hdev->idle_timeout)); } /* Drop all connection on the device */ void hci_conn_hash_flush(struct hci_dev *hdev) { struct list_head *head = &hdev->conn_hash.list; struct hci_conn *conn; BT_DBG("hdev %s", hdev->name); /* We should not traverse the list here, because hci_conn_del * can remove extra links, which may cause the list traversal * to hit items that have already been released. */ while ((conn = list_first_entry_or_null(head, struct hci_conn, list)) != NULL) { conn->state = BT_CLOSED; hci_disconn_cfm(conn, HCI_ERROR_LOCAL_HOST_TERM); hci_conn_del(conn); } } static u32 get_link_mode(struct hci_conn *conn) { u32 link_mode = 0; if (conn->role == HCI_ROLE_MASTER) link_mode |= HCI_LM_MASTER; if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) link_mode |= HCI_LM_ENCRYPT; if (test_bit(HCI_CONN_AUTH, &conn->flags)) link_mode |= HCI_LM_AUTH; if (test_bit(HCI_CONN_SECURE, &conn->flags)) link_mode |= HCI_LM_SECURE; if (test_bit(HCI_CONN_FIPS, &conn->flags)) link_mode |= HCI_LM_FIPS; return link_mode; } int hci_get_conn_list(void __user *arg) { struct hci_conn *c; struct hci_conn_list_req req, *cl; struct hci_conn_info *ci; struct hci_dev *hdev; int n = 0, size, err; if (copy_from_user(&req, arg, sizeof(req))) return -EFAULT; if (!req.conn_num || req.conn_num > (PAGE_SIZE * 2) / sizeof(*ci)) return -EINVAL; size = sizeof(req) + req.conn_num * sizeof(*ci); cl = kmalloc(size, GFP_KERNEL); if (!cl) return -ENOMEM; hdev = hci_dev_get(req.dev_id); if (!hdev) { kfree(cl); return -ENODEV; } ci = cl->conn_info; hci_dev_lock(hdev); list_for_each_entry(c, &hdev->conn_hash.list, list) { bacpy(&(ci + n)->bdaddr, &c->dst); (ci + n)->handle = c->handle; (ci + n)->type = c->type; (ci + n)->out = c->out; (ci + n)->state = c->state; (ci + n)->link_mode = get_link_mode(c); if (++n >= req.conn_num) break; } hci_dev_unlock(hdev); cl->dev_id = hdev->id; cl->conn_num = n; size = sizeof(req) + n * sizeof(*ci); hci_dev_put(hdev); err = copy_to_user(arg, cl, size); kfree(cl); return err ? -EFAULT : 0; } int hci_get_conn_info(struct hci_dev *hdev, void __user *arg) { struct hci_conn_info_req req; struct hci_conn_info ci; struct hci_conn *conn; char __user *ptr = arg + sizeof(req); if (copy_from_user(&req, arg, sizeof(req))) return -EFAULT; hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, req.type, &req.bdaddr); if (conn) { bacpy(&ci.bdaddr, &conn->dst); ci.handle = conn->handle; ci.type = conn->type; ci.out = conn->out; ci.state = conn->state; ci.link_mode = get_link_mode(conn); } hci_dev_unlock(hdev); if (!conn) return -ENOENT; return copy_to_user(ptr, &ci, sizeof(ci)) ? -EFAULT : 0; } int hci_get_auth_info(struct hci_dev *hdev, void __user *arg) { struct hci_auth_info_req req; struct hci_conn *conn; if (copy_from_user(&req, arg, sizeof(req))) return -EFAULT; hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &req.bdaddr); if (conn) req.type = conn->auth_type; hci_dev_unlock(hdev); if (!conn) return -ENOENT; return copy_to_user(arg, &req, sizeof(req)) ? -EFAULT : 0; } struct hci_chan *hci_chan_create(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct hci_chan *chan; BT_DBG("%s hcon %p", hdev->name, conn); if (test_bit(HCI_CONN_DROP, &conn->flags)) { BT_DBG("Refusing to create new hci_chan"); return NULL; } chan = kzalloc(sizeof(*chan), GFP_KERNEL); if (!chan) return NULL; chan->conn = hci_conn_get(conn); skb_queue_head_init(&chan->data_q); chan->state = BT_CONNECTED; list_add_rcu(&chan->list, &conn->chan_list); return chan; } void hci_chan_del(struct hci_chan *chan) { struct hci_conn *conn = chan->conn; struct hci_dev *hdev = conn->hdev; BT_DBG("%s hcon %p chan %p", hdev->name, conn, chan); list_del_rcu(&chan->list); synchronize_rcu(); /* Prevent new hci_chan's to be created for this hci_conn */ set_bit(HCI_CONN_DROP, &conn->flags); hci_conn_put(conn); skb_queue_purge(&chan->data_q); kfree(chan); } void hci_chan_list_flush(struct hci_conn *conn) { struct hci_chan *chan, *n; BT_DBG("hcon %p", conn); list_for_each_entry_safe(chan, n, &conn->chan_list, list) hci_chan_del(chan); } static struct hci_chan *__hci_chan_lookup_handle(struct hci_conn *hcon, __u16 handle) { struct hci_chan *hchan; list_for_each_entry(hchan, &hcon->chan_list, list) { if (hchan->handle == handle) return hchan; } return NULL; } struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *hcon; struct hci_chan *hchan = NULL; rcu_read_lock(); list_for_each_entry_rcu(hcon, &h->list, list) { hchan = __hci_chan_lookup_handle(hcon, handle); if (hchan) break; } rcu_read_unlock(); return hchan; } u32 hci_conn_get_phy(struct hci_conn *conn) { u32 phys = 0; /* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471: * Table 6.2: Packets defined for synchronous, asynchronous, and * CPB logical transport types. */ switch (conn->type) { case SCO_LINK: /* SCO logical transport (1 Mb/s): * HV1, HV2, HV3 and DV. */ phys |= BT_PHY_BR_1M_1SLOT; break; case ACL_LINK: /* ACL logical transport (1 Mb/s) ptt=0: * DH1, DM3, DH3, DM5 and DH5. */ phys |= BT_PHY_BR_1M_1SLOT; if (conn->pkt_type & (HCI_DM3 | HCI_DH3)) phys |= BT_PHY_BR_1M_3SLOT; if (conn->pkt_type & (HCI_DM5 | HCI_DH5)) phys |= BT_PHY_BR_1M_5SLOT; /* ACL logical transport (2 Mb/s) ptt=1: * 2-DH1, 2-DH3 and 2-DH5. */ if (!(conn->pkt_type & HCI_2DH1)) phys |= BT_PHY_EDR_2M_1SLOT; if (!(conn->pkt_type & HCI_2DH3)) phys |= BT_PHY_EDR_2M_3SLOT; if (!(conn->pkt_type & HCI_2DH5)) phys |= BT_PHY_EDR_2M_5SLOT; /* ACL logical transport (3 Mb/s) ptt=1: * 3-DH1, 3-DH3 and 3-DH5. */ if (!(conn->pkt_type & HCI_3DH1)) phys |= BT_PHY_EDR_3M_1SLOT; if (!(conn->pkt_type & HCI_3DH3)) phys |= BT_PHY_EDR_3M_3SLOT; if (!(conn->pkt_type & HCI_3DH5)) phys |= BT_PHY_EDR_3M_5SLOT; break; case ESCO_LINK: /* eSCO logical transport (1 Mb/s): EV3, EV4 and EV5 */ phys |= BT_PHY_BR_1M_1SLOT; if (!(conn->pkt_type & (ESCO_EV4 | ESCO_EV5))) phys |= BT_PHY_BR_1M_3SLOT; /* eSCO logical transport (2 Mb/s): 2-EV3, 2-EV5 */ if (!(conn->pkt_type & ESCO_2EV3)) phys |= BT_PHY_EDR_2M_1SLOT; if (!(conn->pkt_type & ESCO_2EV5)) phys |= BT_PHY_EDR_2M_3SLOT; /* eSCO logical transport (3 Mb/s): 3-EV3, 3-EV5 */ if (!(conn->pkt_type & ESCO_3EV3)) phys |= BT_PHY_EDR_3M_1SLOT; if (!(conn->pkt_type & ESCO_3EV5)) phys |= BT_PHY_EDR_3M_3SLOT; break; case LE_LINK: if (conn->le_tx_phy & HCI_LE_SET_PHY_1M) phys |= BT_PHY_LE_1M_TX; if (conn->le_rx_phy & HCI_LE_SET_PHY_1M) phys |= BT_PHY_LE_1M_RX; if (conn->le_tx_phy & HCI_LE_SET_PHY_2M) phys |= BT_PHY_LE_2M_TX; if (conn->le_rx_phy & HCI_LE_SET_PHY_2M) phys |= BT_PHY_LE_2M_RX; if (conn->le_tx_phy & HCI_LE_SET_PHY_CODED) phys |= BT_PHY_LE_CODED_TX; if (conn->le_rx_phy & HCI_LE_SET_PHY_CODED) phys |= BT_PHY_LE_CODED_RX; break; } return phys; } static int abort_conn_sync(struct hci_dev *hdev, void *data) { struct hci_conn *conn = data; if (!hci_conn_valid(hdev, conn)) return -ECANCELED; return hci_abort_conn_sync(hdev, conn, conn->abort_reason); } int hci_abort_conn(struct hci_conn *conn, u8 reason) { struct hci_dev *hdev = conn->hdev; /* If abort_reason has already been set it means the connection is * already being aborted so don't attempt to overwrite it. */ if (conn->abort_reason) return 0; bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason); conn->abort_reason = reason; /* If the connection is pending check the command opcode since that * might be blocking on hci_cmd_sync_work while waiting its respective * event so we need to hci_cmd_sync_cancel to cancel it. * * hci_connect_le serializes the connection attempts so only one * connection can be in BT_CONNECT at time. */ if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: hci_cmd_sync_cancel(hdev, ECANCELED); break; } /* Cancel connect attempt if still queued/pending */ } else if (!hci_cancel_connect_sync(hdev, conn)) { return 0; } return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL); } |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM udp #if !defined(_TRACE_UDP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_UDP_H #include <linux/udp.h> #include <linux/tracepoint.h> #include <trace/events/net_probe_common.h> TRACE_EVENT(udp_fail_queue_rcv_skb, TP_PROTO(int rc, struct sock *sk, struct sk_buff *skb), TP_ARGS(rc, sk, skb), TP_STRUCT__entry( __field(int, rc) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( const struct udphdr *uh = (const struct udphdr *)udp_hdr(skb); __entry->rc = rc; /* for filtering use */ __entry->sport = ntohs(uh->source); __entry->dport = ntohs(uh->dest); __entry->family = sk->sk_family; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, uh, __entry->saddr, __entry->daddr); ), TP_printk("rc=%d family=%s src=%pISpc dest=%pISpc", __entry->rc, show_family_name(__entry->family), __entry->saddr, __entry->daddr) ); #endif /* _TRACE_UDP_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
22 23 22 6 9 8 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 | // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains basic common functions used in AppArmor * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #include <linux/ctype.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/vmalloc.h> #include "include/audit.h" #include "include/apparmor.h" #include "include/lib.h" #include "include/perms.h" #include "include/policy.h" struct aa_perms nullperms; struct aa_perms allperms = { .allow = ALL_PERMS_MASK, .quiet = ALL_PERMS_MASK, .hide = ALL_PERMS_MASK }; /** * aa_free_str_table - free entries str table * @t: the string table to free (MAYBE NULL) */ void aa_free_str_table(struct aa_str_table *t) { int i; if (t) { if (!t->table) return; for (i = 0; i < t->size; i++) kfree_sensitive(t->table[i]); kfree_sensitive(t->table); t->table = NULL; t->size = 0; } } /** * aa_split_fqname - split a fqname into a profile and namespace name * @fqname: a full qualified name in namespace profile format (NOT NULL) * @ns_name: pointer to portion of the string containing the ns name (NOT NULL) * * Returns: profile name or NULL if one is not specified * * Split a namespace name from a profile name (see policy.c for naming * description). If a portion of the name is missing it returns NULL for * that portion. * * NOTE: may modify the @fqname string. The pointers returned point * into the @fqname string. */ char *aa_split_fqname(char *fqname, char **ns_name) { char *name = strim(fqname); *ns_name = NULL; if (name[0] == ':') { char *split = strchr(&name[1], ':'); *ns_name = skip_spaces(&name[1]); if (split) { /* overwrite ':' with \0 */ *split++ = 0; if (strncmp(split, "//", 2) == 0) split += 2; name = skip_spaces(split); } else /* a ns name without a following profile is allowed */ name = NULL; } if (name && *name == 0) name = NULL; return name; } /** * skipn_spaces - Removes leading whitespace from @str. * @str: The string to be stripped. * @n: length of str to parse, will stop at \0 if encountered before n * * Returns a pointer to the first non-whitespace character in @str. * if all whitespace will return NULL */ const char *skipn_spaces(const char *str, size_t n) { for (; n && isspace(*str); --n) ++str; if (n) return (char *)str; return NULL; } const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, size_t *ns_len) { const char *end = fqname + n; const char *name = skipn_spaces(fqname, n); *ns_name = NULL; *ns_len = 0; if (!name) return NULL; if (name[0] == ':') { char *split = strnchr(&name[1], end - &name[1], ':'); *ns_name = skipn_spaces(&name[1], end - &name[1]); if (!*ns_name) return NULL; if (split) { *ns_len = split - *ns_name; if (*ns_len == 0) *ns_name = NULL; split++; if (end - split > 1 && strncmp(split, "//", 2) == 0) split += 2; name = skipn_spaces(split, end - split); } else { /* a ns name without a following profile is allowed */ name = NULL; *ns_len = end - *ns_name; } } if (name && *name == 0) name = NULL; return name; } /** * aa_info_message - log a none profile related status message * @str: message to log */ void aa_info_message(const char *str) { if (audit_enabled) { DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); ad.info = str; aa_audit_msg(AUDIT_APPARMOR_STATUS, &ad, NULL); } printk(KERN_INFO "AppArmor: %s\n", str); } __counted char *aa_str_alloc(int size, gfp_t gfp) { struct counted_str *str; str = kmalloc(struct_size(str, name, size), gfp); if (!str) return NULL; kref_init(&str->count); return str->name; } void aa_str_kref(struct kref *kref) { kfree(container_of(kref, struct counted_str, count)); } const char aa_file_perm_chrs[] = "xwracd km l "; const char *aa_file_perm_names[] = { "exec", "write", "read", "append", "create", "delete", "open", "rename", "setattr", "getattr", "setcred", "getcred", "chmod", "chown", "chgrp", "lock", "mmap", "mprot", "link", "snapshot", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "stack", "change_onexec", "change_profile", "change_hat", }; /** * aa_perm_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @str_size: size of the @str buffer * @chrs: NUL-terminated character buffer of permission characters * @mask: permission mask to convert */ void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs, u32 mask) { unsigned int i, perm = 1; size_t num_chrs = strlen(chrs); for (i = 0; i < num_chrs; perm <<= 1, i++) { if (mask & perm) { /* Ensure that one byte is left for NUL-termination */ if (WARN_ON_ONCE(str_size <= 1)) break; *str++ = chrs[i]; str_size--; } } *str = '\0'; } void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, u32 mask) { const char *fmt = "%s"; unsigned int i, perm = 1; bool prev = false; for (i = 0; i < 32; perm <<= 1, i++) { if (mask & perm) { audit_log_format(ab, fmt, names[i]); if (!prev) { prev = true; fmt = " %s"; } } } } void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, u32 chrsmask, const char * const *names, u32 namesmask) { char str[33]; audit_log_format(ab, "\""); if ((mask & chrsmask) && chrs) { aa_perm_mask_to_str(str, sizeof(str), chrs, mask & chrsmask); mask &= ~chrsmask; audit_log_format(ab, "%s", str); if (mask & namesmask) audit_log_format(ab, " "); } if ((mask & namesmask) && names) aa_audit_perm_names(ab, names, mask & namesmask); audit_log_format(ab, "\""); } /** * aa_audit_perms_cb - generic callback fn for auditing perms * @ab: audit buffer (NOT NULL) * @va: audit struct to audit values of (NOT NULL) */ static void aa_audit_perms_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; struct apparmor_audit_data *ad = aad(sa); if (ad->request) { audit_log_format(ab, " requested_mask="); aa_audit_perm_mask(ab, ad->request, aa_file_perm_chrs, PERMS_CHRS_MASK, aa_file_perm_names, PERMS_NAMES_MASK); } if (ad->denied) { audit_log_format(ab, "denied_mask="); aa_audit_perm_mask(ab, ad->denied, aa_file_perm_chrs, PERMS_CHRS_MASK, aa_file_perm_names, PERMS_NAMES_MASK); } audit_log_format(ab, " peer="); aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } /** * aa_apply_modes_to_perms - apply namespace and profile flags to perms * @profile: that perms where computed from * @perms: perms to apply mode modifiers to * * TODO: split into profile and ns based flags for when accumulating perms */ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms) { switch (AUDIT_MODE(profile)) { case AUDIT_ALL: perms->audit = ALL_PERMS_MASK; fallthrough; case AUDIT_NOQUIET: perms->quiet = 0; break; case AUDIT_QUIET: perms->audit = 0; fallthrough; case AUDIT_QUIET_DENIED: perms->quiet = ALL_PERMS_MASK; break; } if (KILL_MODE(profile)) perms->kill = ALL_PERMS_MASK; else if (COMPLAIN_MODE(profile)) perms->complain = ALL_PERMS_MASK; else if (USER_MODE(profile)) perms->prompt = ALL_PERMS_MASK; } void aa_profile_match_label(struct aa_profile *profile, struct aa_ruleset *rules, struct aa_label *label, int type, u32 request, struct aa_perms *perms) { /* TODO: doesn't yet handle extended types */ aa_state_t state; state = aa_dfa_next(rules->policy->dfa, rules->policy->start[AA_CLASS_LABEL], type); aa_label_match(profile, rules, label, state, false, request, perms); } /* currently unused */ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, u32 request, int type, u32 *deny, struct apparmor_audit_data *ad) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); struct aa_perms perms; ad->peer = &target->label; ad->request = request; aa_profile_match_label(profile, rules, &target->label, type, request, &perms); aa_apply_modes_to_perms(profile, &perms); *deny |= request & perms.deny; return aa_check_perms(profile, &perms, request, ad, aa_audit_perms_cb); } /** * aa_check_perms - do audit mode selection based on perms set * @profile: profile being checked * @perms: perms computed for the request * @request: requested perms * @ad: initialized audit structure (MAY BE NULL if not auditing) * @cb: callback fn for type specific fields (MAY BE NULL) * * Returns: 0 if permission else error code * * Note: profile audit modes need to be set before calling by setting the * perm masks appropriately. * * If not auditing then complain mode is not enabled and the * error code will indicate whether there was an explicit deny * with a positive value. */ int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, u32 request, struct apparmor_audit_data *ad, void (*cb)(struct audit_buffer *, void *)) { int type, error; u32 denied = request & (~perms->allow | perms->deny); if (likely(!denied)) { /* mask off perms that are not being force audited */ request &= perms->audit; if (!request || !ad) return 0; type = AUDIT_APPARMOR_AUDIT; error = 0; } else { error = -EACCES; if (denied & perms->kill) type = AUDIT_APPARMOR_KILL; else if (denied == (denied & perms->complain)) type = AUDIT_APPARMOR_ALLOWED; else type = AUDIT_APPARMOR_DENIED; if (denied == (denied & perms->hide)) error = -ENOENT; denied &= ~perms->quiet; if (!ad || !denied) return error; } if (ad) { ad->subj_label = &profile->label; ad->request = request; ad->denied = denied; ad->error = error; aa_audit_msg(type, ad, cb); } if (type == AUDIT_APPARMOR_ALLOWED) error = 0; return error; } /** * aa_policy_init - initialize a policy structure * @policy: policy to initialize (NOT NULL) * @prefix: prefix name if any is required. (MAYBE NULL) * @name: name of the policy, init will make a copy of it (NOT NULL) * @gfp: allocation mode * * Note: this fn creates a copy of strings passed in * * Returns: true if policy init successful */ bool aa_policy_init(struct aa_policy *policy, const char *prefix, const char *name, gfp_t gfp) { char *hname; /* freed by policy_free */ if (prefix) { hname = aa_str_alloc(strlen(prefix) + strlen(name) + 3, gfp); if (hname) sprintf(hname, "%s//%s", prefix, name); } else { hname = aa_str_alloc(strlen(name) + 1, gfp); if (hname) strcpy(hname, name); } if (!hname) return false; policy->hname = hname; /* base.name is a substring of fqname */ policy->name = basename(policy->hname); INIT_LIST_HEAD(&policy->list); INIT_LIST_HEAD(&policy->profiles); return true; } /** * aa_policy_destroy - free the elements referenced by @policy * @policy: policy that is to have its elements freed (NOT NULL) */ void aa_policy_destroy(struct aa_policy *policy) { AA_BUG(on_list_rcu(&policy->profiles)); AA_BUG(on_list_rcu(&policy->list)); /* don't free name as its a subset of hname */ aa_put_str(policy->hname); } |
82 82 5 76 77 77 94 5 5 89 5 5 5 5 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 | // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/dma-resv.h> #include <linux/dma-fence-chain.h> #include <drm/drm_atomic_state_helper.h> #include <drm/drm_atomic_uapi.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_simple_kms_helper.h> #include "drm_internal.h" /** * DOC: overview * * The GEM atomic helpers library implements generic atomic-commit * functions for drivers that use GEM objects. Currently, it provides * synchronization helpers, and plane state and framebuffer BO mappings * for planes with shadow buffers. * * Before scanout, a plane's framebuffer needs to be synchronized with * possible writers that draw into the framebuffer. All drivers should * call drm_gem_plane_helper_prepare_fb() from their implementation of * struct &drm_plane_helper.prepare_fb . It sets the plane's fence from * the framebuffer so that the DRM core can synchronize access automatically. * drm_gem_plane_helper_prepare_fb() can also be used directly as * implementation of prepare_fb. * * .. code-block:: c * * #include <drm/drm_gem_atomic_helper.h> * * struct drm_plane_helper_funcs driver_plane_helper_funcs = { * ..., * . prepare_fb = drm_gem_plane_helper_prepare_fb, * }; * * A driver using a shadow buffer copies the content of the shadow buffers * into the HW's framebuffer memory during an atomic update. This requires * a mapping of the shadow buffer into kernel address space. The mappings * cannot be established by commit-tail functions, such as atomic_update, * as this would violate locking rules around dma_buf_vmap(). * * The helpers for shadow-buffered planes establish and release mappings, * and provide struct drm_shadow_plane_state, which stores the plane's mapping * for commit-tail functions. * * Shadow-buffered planes can easily be enabled by using the provided macros * %DRM_GEM_SHADOW_PLANE_FUNCS and %DRM_GEM_SHADOW_PLANE_HELPER_FUNCS. * These macros set up the plane and plane-helper callbacks to point to the * shadow-buffer helpers. * * .. code-block:: c * * #include <drm/drm_gem_atomic_helper.h> * * struct drm_plane_funcs driver_plane_funcs = { * ..., * DRM_GEM_SHADOW_PLANE_FUNCS, * }; * * struct drm_plane_helper_funcs driver_plane_helper_funcs = { * ..., * DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, * }; * * In the driver's atomic-update function, shadow-buffer mappings are available * from the plane state. Use to_drm_shadow_plane_state() to upcast from * struct drm_plane_state. * * .. code-block:: c * * void driver_plane_atomic_update(struct drm_plane *plane, * struct drm_plane_state *old_plane_state) * { * struct drm_plane_state *plane_state = plane->state; * struct drm_shadow_plane_state *shadow_plane_state = * to_drm_shadow_plane_state(plane_state); * * // access shadow buffer via shadow_plane_state->map * } * * A mapping address for each of the framebuffer's buffer object is stored in * struct &drm_shadow_plane_state.map. The mappings are valid while the state * is being used. * * Drivers that use struct drm_simple_display_pipe can use * %DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS to initialize the rsp * callbacks. Access to shadow-buffer mappings is similar to regular * atomic_update. * * .. code-block:: c * * struct drm_simple_display_pipe_funcs driver_pipe_funcs = { * ..., * DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, * }; * * void driver_pipe_enable(struct drm_simple_display_pipe *pipe, * struct drm_crtc_state *crtc_state, * struct drm_plane_state *plane_state) * { * struct drm_shadow_plane_state *shadow_plane_state = * to_drm_shadow_plane_state(plane_state); * * // access shadow buffer via shadow_plane_state->map * } */ /* * Plane Helpers */ /** * drm_gem_plane_helper_prepare_fb() - Prepare a GEM backed framebuffer * @plane: Plane * @state: Plane state the fence will be attached to * * This function extracts the exclusive fence from &drm_gem_object.resv and * attaches it to plane state for the atomic helper to wait on. This is * necessary to correctly implement implicit synchronization for any buffers * shared as a struct &dma_buf. This function can be used as the * &drm_plane_helper_funcs.prepare_fb callback. * * There is no need for &drm_plane_helper_funcs.cleanup_fb hook for simple * GEM based framebuffer drivers which have their buffers always pinned in * memory. * * This function is the default implementation for GEM drivers of * &drm_plane_helper_funcs.prepare_fb if no callback is provided. */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { struct dma_fence *fence = dma_fence_get(state->fence); enum dma_resv_usage usage; size_t i; int ret; if (!state->fb) return 0; /* * Only add the kernel fences here if there is already a fence set via * explicit fencing interfaces on the atomic ioctl. * * This way explicit fencing can be used to overrule implicit fencing, * which is important to make explicit fencing use-cases work: One * example is using one buffer for 2 screens with different refresh * rates. Implicit fencing will clamp rendering to the refresh rate of * the slower screen, whereas explicit fence allows 2 independent * render and display loops on a single buffer. If a driver allows * obeys both implicit and explicit fences for plane updates, then it * will break all the benefits of explicit fencing. */ usage = fence ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_WRITE; for (i = 0; i < state->fb->format->num_planes; ++i) { struct drm_gem_object *obj = drm_gem_fb_get_obj(state->fb, i); struct dma_fence *new; if (!obj) { ret = -EINVAL; goto error; } ret = dma_resv_get_singleton(obj->resv, usage, &new); if (ret) goto error; if (new && fence) { struct dma_fence_chain *chain = dma_fence_chain_alloc(); if (!chain) { ret = -ENOMEM; goto error; } dma_fence_chain_init(chain, fence, new, 1); fence = &chain->base; } else if (new) { fence = new; } } dma_fence_put(state->fence); state->fence = fence; return 0; error: dma_fence_put(fence); return ret; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb); /* * Shadow-buffered Planes */ /** * __drm_gem_duplicate_shadow_plane_state - duplicates shadow-buffered plane state * @plane: the plane * @new_shadow_plane_state: the new shadow-buffered plane state * * This function duplicates shadow-buffered plane state. This is helpful for drivers * that subclass struct drm_shadow_plane_state. * * The function does not duplicate existing mappings of the shadow buffers. * Mappings are maintained during the atomic commit by the plane's prepare_fb * and cleanup_fb helpers. See drm_gem_prepare_shadow_fb() and drm_gem_cleanup_shadow_fb() * for corresponding helpers. */ void __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane, struct drm_shadow_plane_state *new_shadow_plane_state) { struct drm_plane_state *plane_state = plane->state; struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); __drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base); drm_format_conv_state_copy(&new_shadow_plane_state->fmtcnv_state, &shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state); /** * drm_gem_duplicate_shadow_plane_state - duplicates shadow-buffered plane state * @plane: the plane * * This function implements struct &drm_plane_funcs.atomic_duplicate_state for * shadow-buffered planes. It assumes the existing state to be of type * struct drm_shadow_plane_state and it allocates the new state to be of this * type. * * The function does not duplicate existing mappings of the shadow buffers. * Mappings are maintained during the atomic commit by the plane's prepare_fb * and cleanup_fb helpers. See drm_gem_prepare_shadow_fb() and drm_gem_cleanup_shadow_fb() * for corresponding helpers. * * Returns: * A pointer to a new plane state on success, or NULL otherwise. */ struct drm_plane_state * drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane) { struct drm_plane_state *plane_state = plane->state; struct drm_shadow_plane_state *new_shadow_plane_state; if (!plane_state) return NULL; new_shadow_plane_state = kzalloc(sizeof(*new_shadow_plane_state), GFP_KERNEL); if (!new_shadow_plane_state) return NULL; __drm_gem_duplicate_shadow_plane_state(plane, new_shadow_plane_state); return &new_shadow_plane_state->base; } EXPORT_SYMBOL(drm_gem_duplicate_shadow_plane_state); /** * __drm_gem_destroy_shadow_plane_state - cleans up shadow-buffered plane state * @shadow_plane_state: the shadow-buffered plane state * * This function cleans up shadow-buffered plane state. Helpful for drivers that * subclass struct drm_shadow_plane_state. */ void __drm_gem_destroy_shadow_plane_state(struct drm_shadow_plane_state *shadow_plane_state) { drm_format_conv_state_release(&shadow_plane_state->fmtcnv_state); __drm_atomic_helper_plane_destroy_state(&shadow_plane_state->base); } EXPORT_SYMBOL(__drm_gem_destroy_shadow_plane_state); /** * drm_gem_destroy_shadow_plane_state - deletes shadow-buffered plane state * @plane: the plane * @plane_state: the plane state of type struct drm_shadow_plane_state * * This function implements struct &drm_plane_funcs.atomic_destroy_state * for shadow-buffered planes. It expects that mappings of shadow buffers * have been released already. */ void drm_gem_destroy_shadow_plane_state(struct drm_plane *plane, struct drm_plane_state *plane_state) { struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); __drm_gem_destroy_shadow_plane_state(shadow_plane_state); kfree(shadow_plane_state); } EXPORT_SYMBOL(drm_gem_destroy_shadow_plane_state); /** * __drm_gem_reset_shadow_plane - resets a shadow-buffered plane * @plane: the plane * @shadow_plane_state: the shadow-buffered plane state * * This function resets state for shadow-buffered planes. Helpful * for drivers that subclass struct drm_shadow_plane_state. */ void __drm_gem_reset_shadow_plane(struct drm_plane *plane, struct drm_shadow_plane_state *shadow_plane_state) { __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_reset_shadow_plane); /** * drm_gem_reset_shadow_plane - resets a shadow-buffered plane * @plane: the plane * * This function implements struct &drm_plane_funcs.reset_plane for * shadow-buffered planes. It assumes the current plane state to be * of type struct drm_shadow_plane and it allocates the new state of * this type. */ void drm_gem_reset_shadow_plane(struct drm_plane *plane) { struct drm_shadow_plane_state *shadow_plane_state; if (plane->state) { drm_gem_destroy_shadow_plane_state(plane, plane->state); plane->state = NULL; /* must be set to NULL here */ } shadow_plane_state = kzalloc(sizeof(*shadow_plane_state), GFP_KERNEL); if (!shadow_plane_state) return; __drm_gem_reset_shadow_plane(plane, shadow_plane_state); } EXPORT_SYMBOL(drm_gem_reset_shadow_plane); /** * drm_gem_begin_shadow_fb_access - prepares shadow framebuffers for CPU access * @plane: the plane * @plane_state: the plane state of type struct drm_shadow_plane_state * * This function implements struct &drm_plane_helper_funcs.begin_fb_access. It * maps all buffer objects of the plane's framebuffer into kernel address * space and stores them in struct &drm_shadow_plane_state.map. The first data * bytes are available in struct &drm_shadow_plane_state.data. * * See drm_gem_end_shadow_fb_access() for cleanup. * * Returns: * 0 on success, or a negative errno code otherwise. */ int drm_gem_begin_shadow_fb_access(struct drm_plane *plane, struct drm_plane_state *plane_state) { struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; if (!fb) return 0; return drm_gem_fb_vmap(fb, shadow_plane_state->map, shadow_plane_state->data); } EXPORT_SYMBOL(drm_gem_begin_shadow_fb_access); /** * drm_gem_end_shadow_fb_access - releases shadow framebuffers from CPU access * @plane: the plane * @plane_state: the plane state of type struct drm_shadow_plane_state * * This function implements struct &drm_plane_helper_funcs.end_fb_access. It * undoes all effects of drm_gem_begin_shadow_fb_access() in reverse order. * * See drm_gem_begin_shadow_fb_access() for more information. */ void drm_gem_end_shadow_fb_access(struct drm_plane *plane, struct drm_plane_state *plane_state) { struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; if (!fb) return; drm_gem_fb_vunmap(fb, shadow_plane_state->map); } EXPORT_SYMBOL(drm_gem_end_shadow_fb_access); /** * drm_gem_simple_kms_begin_shadow_fb_access - prepares shadow framebuffers for CPU access * @pipe: the simple display pipe * @plane_state: the plane state of type struct drm_shadow_plane_state * * This function implements struct drm_simple_display_funcs.begin_fb_access. * * See drm_gem_begin_shadow_fb_access() for details and * drm_gem_simple_kms_cleanup_shadow_fb() for cleanup. * * Returns: * 0 on success, or a negative errno code otherwise. */ int drm_gem_simple_kms_begin_shadow_fb_access(struct drm_simple_display_pipe *pipe, struct drm_plane_state *plane_state) { return drm_gem_begin_shadow_fb_access(&pipe->plane, plane_state); } EXPORT_SYMBOL(drm_gem_simple_kms_begin_shadow_fb_access); /** * drm_gem_simple_kms_end_shadow_fb_access - releases shadow framebuffers from CPU access * @pipe: the simple display pipe * @plane_state: the plane state of type struct drm_shadow_plane_state * * This function implements struct drm_simple_display_funcs.end_fb_access. * It undoes all effects of drm_gem_simple_kms_begin_shadow_fb_access() in * reverse order. * * See drm_gem_simple_kms_begin_shadow_fb_access(). */ void drm_gem_simple_kms_end_shadow_fb_access(struct drm_simple_display_pipe *pipe, struct drm_plane_state *plane_state) { drm_gem_end_shadow_fb_access(&pipe->plane, plane_state); } EXPORT_SYMBOL(drm_gem_simple_kms_end_shadow_fb_access); /** * drm_gem_simple_kms_reset_shadow_plane - resets a shadow-buffered plane * @pipe: the simple display pipe * * This function implements struct drm_simple_display_funcs.reset_plane * for shadow-buffered planes. */ void drm_gem_simple_kms_reset_shadow_plane(struct drm_simple_display_pipe *pipe) { drm_gem_reset_shadow_plane(&pipe->plane); } EXPORT_SYMBOL(drm_gem_simple_kms_reset_shadow_plane); /** * drm_gem_simple_kms_duplicate_shadow_plane_state - duplicates shadow-buffered plane state * @pipe: the simple display pipe * * This function implements struct drm_simple_display_funcs.duplicate_plane_state * for shadow-buffered planes. It does not duplicate existing mappings of the shadow * buffers. Mappings are maintained during the atomic commit by the plane's prepare_fb * and cleanup_fb helpers. * * Returns: * A pointer to a new plane state on success, or NULL otherwise. */ struct drm_plane_state * drm_gem_simple_kms_duplicate_shadow_plane_state(struct drm_simple_display_pipe *pipe) { return drm_gem_duplicate_shadow_plane_state(&pipe->plane); } EXPORT_SYMBOL(drm_gem_simple_kms_duplicate_shadow_plane_state); /** * drm_gem_simple_kms_destroy_shadow_plane_state - resets shadow-buffered plane state * @pipe: the simple display pipe * @plane_state: the plane state of type struct drm_shadow_plane_state * * This function implements struct drm_simple_display_funcs.destroy_plane_state * for shadow-buffered planes. It expects that mappings of shadow buffers * have been released already. */ void drm_gem_simple_kms_destroy_shadow_plane_state(struct drm_simple_display_pipe *pipe, struct drm_plane_state *plane_state) { drm_gem_destroy_shadow_plane_state(&pipe->plane, plane_state); } EXPORT_SYMBOL(drm_gem_simple_kms_destroy_shadow_plane_state); |
8 1 1 2 4 2 2 2 1 1 1 3 2 2 2 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Checksum updating actions * * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org> */ #include <linux/types.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/netlink.h> #include <net/netlink.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/icmp.h> #include <linux/icmpv6.h> #include <linux/igmp.h> #include <net/tcp.h> #include <net/udp.h> #include <net/ip6_checksum.h> #include <net/sctp/checksum.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> #include <net/tc_wrapper.h> static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_csum *parm; struct tcf_csum *p; int ret = 0, err; u32 index; if (nla == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_CSUM_MAX, nla, csum_policy, NULL); if (err < 0) return err; if (tb[TCA_CSUM_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_csum_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (err > 0) { if (bind) /* dont override defaults */ return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } } else { return err; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; p = to_tcf_csum(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { err = -ENOMEM; goto put_chain; } params_new->update_flags = parm->update_flags; spin_lock_bh(&p->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params_new = rcu_replace_pointer(p->params, params_new, lockdep_is_held(&p->tcf_lock)); spin_unlock_bh(&p->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (params_new) kfree_rcu(params_new, rcu); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } /** * tcf_csum_skb_nextlayer - Get next layer pointer * @skb: sk_buff to use * @ihl: previous summed headers length * @ipl: complete packet length * @jhl: next header length * * Check the expected next layer availability in the specified sk_buff. * Return the next layer pointer if pass, NULL otherwise. */ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, unsigned int jhl) { int ntkoff = skb_network_offset(skb); int hl = ihl + jhl; if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || skb_try_make_writable(skb, hl + ntkoff)) return NULL; else return (void *)(skb_network_header(skb) + ihl); } static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct icmphdr *icmph; icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph)); if (icmph == NULL) return 0; icmph->checksum = 0; skb->csum = csum_partial(icmph, ipl - ihl, 0); icmph->checksum = csum_fold(skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv4_igmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct igmphdr *igmph; igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph)); if (igmph == NULL) return 0; igmph->csum = 0; skb->csum = csum_partial(igmph, ipl - ihl, 0); igmph->csum = csum_fold(skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct icmp6hdr *icmp6h; const struct ipv6hdr *ip6h; icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); if (icmp6h == NULL) return 0; ip6h = ipv6_hdr(skb); icmp6h->icmp6_cksum = 0; skb->csum = csum_partial(icmp6h, ipl - ihl, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ipl - ihl, IPPROTO_ICMPV6, skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; const struct iphdr *iph; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) return 1; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; iph = ip_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = tcp_v4_check(ipl - ihl, iph->saddr, iph->daddr, skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; const struct ipv6hdr *ip6h; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) return 1; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; ip6h = ipv6_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ipl - ihl, IPPROTO_TCP, skb->csum); skb->ip_summed = CHECKSUM_NONE; return 1; } static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; const struct iphdr *iph; u16 ul; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) return 1; /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, * UDPLITE uses udph->len for another thing, * Use iph->tot_len, or just ipl. */ udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); if (udph == NULL) return 0; iph = ip_hdr(skb); ul = ntohs(udph->len); if (udplite || udph->check) { udph->check = 0; if (udplite) { if (ul == 0) skb->csum = csum_partial(udph, ipl - ihl, 0); else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) skb->csum = csum_partial(udph, ul, 0); else goto ignore_obscure_skb; } else { if (ul != ipl - ihl) goto ignore_obscure_skb; skb->csum = csum_partial(udph, ul, 0); } udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, ul, iph->protocol, skb->csum); if (!udph->check) udph->check = CSUM_MANGLED_0; } skb->ip_summed = CHECKSUM_NONE; ignore_obscure_skb: return 1; } static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; const struct ipv6hdr *ip6h; u16 ul; if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) return 1; /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, * UDPLITE uses udph->len for another thing, * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl. */ udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); if (udph == NULL) return 0; ip6h = ipv6_hdr(skb); ul = ntohs(udph->len); udph->check = 0; if (udplite) { if (ul == 0) skb->csum = csum_partial(udph, ipl - ihl, 0); else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) skb->csum = csum_partial(udph, ul, 0); else goto ignore_obscure_skb; } else { if (ul != ipl - ihl) goto ignore_obscure_skb; skb->csum = csum_partial(udph, ul, 0); } udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul, udplite ? IPPROTO_UDPLITE : IPPROTO_UDP, skb->csum); if (!udph->check) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_NONE; ignore_obscure_skb: return 1; } static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct sctphdr *sctph; if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return 1; sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph)); if (!sctph) return 0; sctph->checksum = sctp_compute_cksum(skb, skb_network_offset(skb) + ihl); skb_reset_csum_not_inet(skb); return 1; } static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) { const struct iphdr *iph; int ntkoff; ntkoff = skb_network_offset(skb); if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff)) goto fail; iph = ip_hdr(skb); switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { case IPPROTO_ICMP: if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_IGMP: if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP) if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 0)) goto fail; break; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 1)) goto fail; break; case IPPROTO_SCTP: if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) && !tcf_csum_sctp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; } if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff)) goto fail; ip_send_check(ip_hdr(skb)); } return 1; fail: return 0; } static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl, unsigned int *pl) { int off, len, optlen; unsigned char *xh = (void *)ip6xh; off = sizeof(*ip6xh); len = ixhl - off; while (len > 1) { switch (xh[off]) { case IPV6_TLV_PAD1: optlen = 1; break; case IPV6_TLV_JUMBO: optlen = xh[off + 1] + 2; if (optlen != 6 || len < 6 || (off & 3) != 2) /* wrong jumbo option length/alignment */ return 0; *pl = ntohl(*(__be32 *)(xh + off + 2)); goto done; default: optlen = xh[off + 1] + 2; if (optlen > len) /* ignore obscure options */ goto done; break; } off += optlen; len -= optlen; } done: return 1; } static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) { struct ipv6hdr *ip6h; struct ipv6_opt_hdr *ip6xh; unsigned int hl, ixhl; unsigned int pl; int ntkoff; u8 nexthdr; ntkoff = skb_network_offset(skb); hl = sizeof(*ip6h); if (!pskb_may_pull(skb, hl + ntkoff)) goto fail; ip6h = ipv6_hdr(skb); pl = ntohs(ip6h->payload_len); nexthdr = ip6h->nexthdr; do { switch (nexthdr) { case NEXTHDR_FRAGMENT: goto ignore_skb; case NEXTHDR_ROUTING: case NEXTHDR_HOP: case NEXTHDR_DEST: if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff)) goto fail; ip6xh = (void *)(skb_network_header(skb) + hl); ixhl = ipv6_optlen(ip6xh); if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) goto fail; ip6xh = (void *)(skb_network_header(skb) + hl); if ((nexthdr == NEXTHDR_HOP) && !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) goto fail; nexthdr = ip6xh->nexthdr; hl += ixhl; break; case IPPROTO_ICMPV6: if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) if (!tcf_csum_ipv6_icmp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) if (!tcf_csum_ipv6_tcp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 0)) goto fail; goto done; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 1)) goto fail; goto done; case IPPROTO_SCTP: if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) && !tcf_csum_sctp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; default: goto ignore_skb; } } while (pskb_may_pull(skb, hl + 1 + ntkoff)); done: ignore_skb: return 1; fail: return 0; } TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); bool orig_vlan_tag_present = false; unsigned int vlan_hdr_count = 0; struct tcf_csum_params *params; u32 update_flags; __be16 protocol; int action; params = rcu_dereference_bh(p->params); tcf_lastuse_update(&p->tcf_tm); tcf_action_update_bstats(&p->common, skb); action = READ_ONCE(p->tcf_action); if (unlikely(action == TC_ACT_SHOT)) goto drop; update_flags = params->update_flags; protocol = skb_protocol(skb, false); again: switch (protocol) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; break; case cpu_to_be16(ETH_P_IPV6): if (!tcf_csum_ipv6(skb, update_flags)) goto drop; break; case cpu_to_be16(ETH_P_8021AD): fallthrough; case cpu_to_be16(ETH_P_8021Q): if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) { protocol = skb->protocol; orig_vlan_tag_present = true; } else { struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data; protocol = vlan->h_vlan_encapsulated_proto; skb_pull(skb, VLAN_HLEN); skb_reset_network_header(skb); vlan_hdr_count++; } goto again; } out: /* Restore the skb for the pulled VLAN tags */ while (vlan_hdr_count--) { skb_push(skb, VLAN_HLEN); skb_reset_network_header(skb); } return action; drop: tcf_action_inc_drop_qstats(&p->common); action = TC_ACT_SHOT; goto out; } static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); struct tcf_csum_params *params; struct tc_csum opt = { .index = p->tcf_index, .refcnt = refcount_read(&p->tcf_refcnt) - ref, .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, }; struct tcf_t t; spin_lock_bh(&p->tcf_lock); params = rcu_dereference_protected(p->params, lockdep_is_held(&p->tcf_lock)); opt.action = p->tcf_action; opt.update_flags = params->update_flags; if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) goto nla_put_failure; tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD)) goto nla_put_failure; spin_unlock_bh(&p->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&p->tcf_lock); nlmsg_trim(skb, b); return -1; } static void tcf_csum_cleanup(struct tc_action *a) { struct tcf_csum *p = to_tcf_csum(a); struct tcf_csum_params *params; params = rcu_dereference_protected(p->params, 1); if (params) kfree_rcu(params, rcu); } static size_t tcf_csum_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_csum)); } static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; entry->id = FLOW_ACTION_CSUM; entry->csum_flags = tcf_csum_update_flags(act); *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; fl_action->id = FLOW_ACTION_CSUM; } return 0; } static struct tc_action_ops act_csum_ops = { .kind = "csum", .id = TCA_ID_CSUM, .owner = THIS_MODULE, .act = tcf_csum_act, .dump = tcf_csum_dump, .init = tcf_csum_init, .cleanup = tcf_csum_cleanup, .get_fill_size = tcf_csum_get_fill_size, .offload_act_setup = tcf_csum_offload_act_setup, .size = sizeof(struct tcf_csum), }; MODULE_ALIAS_NET_ACT("csum"); static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_csum_ops.net_id); } static struct pernet_operations csum_net_ops = { .init = csum_init_net, .exit_batch = csum_exit_net, .id = &act_csum_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_DESCRIPTION("Checksum updating actions"); MODULE_LICENSE("GPL"); static int __init csum_init_module(void) { return tcf_register_action(&act_csum_ops, &csum_net_ops); } static void __exit csum_cleanup_module(void) { tcf_unregister_action(&act_csum_ops, &csum_net_ops); } module_init(csum_init_module); module_exit(csum_cleanup_module); |
1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | // SPDX-License-Identifier: GPL-2.0 #ifndef __KVM_X86_MMU_TDP_ITER_H #define __KVM_X86_MMU_TDP_ITER_H #include <linux/kvm_host.h> #include "mmu.h" #include "spte.h" /* * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs) * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be * batched without having to collect the list of zapped SPs. Flows that can * remove SPs must service pending TLB flushes prior to dropping RCU protection. */ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep) { return READ_ONCE(*rcu_dereference(sptep)); } static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte) { return xchg(rcu_dereference(sptep), new_spte); } static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte) { WRITE_ONCE(*rcu_dereference(sptep), new_spte); } /* * SPTEs must be modified atomically if they are shadow-present, leaf * SPTEs, and have volatile bits, i.e. has bits that can be set outside * of mmu_lock. The Writable bit can be set by KVM's fast page fault * handler, and Accessed and Dirty bits can be set by the CPU. * * Note, non-leaf SPTEs do have Accessed bits and those bits are * technically volatile, but KVM doesn't consume the Accessed bit of * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This * logic needs to be reassessed if KVM were to use non-leaf Accessed * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs. */ static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level) { return is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) && spte_has_volatile_bits(old_spte); } static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, u64 new_spte, int level) { if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte); __kvm_tdp_mmu_write_spte(sptep, new_spte); return old_spte; } static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte, u64 mask, int level) { atomic64_t *sptep_atomic; if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) { sptep_atomic = (atomic64_t *)rcu_dereference(sptep); return (u64)atomic64_fetch_and(~mask, sptep_atomic); } __kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask); return old_spte; } /* * A TDP iterator performs a pre-order walk over a TDP paging structure. */ struct tdp_iter { /* * The iterator will traverse the paging structure towards the mapping * for this GFN. */ gfn_t next_last_level_gfn; /* * The next_last_level_gfn at the time when the thread last * yielded. Only yielding when the next_last_level_gfn != * yielded_gfn helps ensure forward progress. */ gfn_t yielded_gfn; /* Pointers to the page tables traversed to reach the current SPTE */ tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL]; /* A pointer to the current SPTE */ tdp_ptep_t sptep; /* The lowest GFN mapped by the current SPTE */ gfn_t gfn; /* The level of the root page given to the iterator */ int root_level; /* The lowest level the iterator should traverse to */ int min_level; /* The iterator's current level within the paging structure */ int level; /* The address space ID, i.e. SMM vs. regular. */ int as_id; /* A snapshot of the value at sptep */ u64 old_spte; /* * Whether the iterator has a valid state. This will be false if the * iterator walks off the end of the paging structure. */ bool valid; /* * True if KVM dropped mmu_lock and yielded in the middle of a walk, in * which case tdp_iter_next() needs to restart the walk at the root * level instead of advancing to the next entry. */ bool yielded; }; /* * Iterates over every SPTE mapping the GFN range [start, end) in a * preorder traversal. */ #define for_each_tdp_pte_min_level(iter, root, min_level, start, end) \ for (tdp_iter_start(&iter, root, min_level, start); \ iter.valid && iter.gfn < end; \ tdp_iter_next(&iter)) #define for_each_tdp_pte(iter, root, start, end) \ for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) tdp_ptep_t spte_to_child_pt(u64 pte, int level); void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, int min_level, gfn_t next_last_level_gfn); void tdp_iter_next(struct tdp_iter *iter); void tdp_iter_restart(struct tdp_iter *iter); #endif /* __KVM_X86_MMU_TDP_ITER_H */ |
570 336 336 334 336 336 1 2 336 297 38 335 568 570 570 299 336 237 11 10 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 | // SPDX-License-Identifier: GPL-2.0 /* * Functions to sequence PREFLUSH and FUA writes. * * Copyright (C) 2011 Max Planck Institute for Gravitational Physics * Copyright (C) 2011 Tejun Heo <tj@kernel.org> * * REQ_{PREFLUSH|FUA} requests are decomposed to sequences consisted of three * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request * properties and hardware capability. * * If a request doesn't have data, only REQ_PREFLUSH makes sense, which * indicates a simple flush request. If there is data, REQ_PREFLUSH indicates * that the device cache should be flushed before the data is executed, and * REQ_FUA means that the data must be on non-volatile media on request * completion. * * If the device doesn't have writeback cache, PREFLUSH and FUA don't make any * difference. The requests are either completed immediately if there's no data * or executed as normal requests otherwise. * * If the device has writeback cache and supports FUA, REQ_PREFLUSH is * translated to PREFLUSH but REQ_FUA is passed down directly with DATA. * * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH * is translated to PREFLUSH and REQ_FUA to POSTFLUSH. * * The actual execution of flush is double buffered. Whenever a request * needs to execute PRE or POSTFLUSH, it queues at * fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a * REQ_OP_FLUSH is issued and the pending_idx is toggled. When the flush * completes, all the requests which were pending are proceeded to the next * step. This allows arbitrary merging of different types of PREFLUSH/FUA * requests. * * Currently, the following conditions are used to determine when to issue * flush. * * C1. At any given time, only one flush shall be in progress. This makes * double buffering sufficient. * * C2. Flush is deferred if any request is executing DATA of its sequence. * This avoids issuing separate POSTFLUSHes for requests which shared * PREFLUSH. * * C3. The second condition is ignored if there is a request which has * waited longer than FLUSH_PENDING_TIMEOUT. This is to avoid * starvation in the unlikely case where there are continuous stream of * FUA (without PREFLUSH) requests. * * For devices which support FUA, it isn't clear whether C2 (and thus C3) * is beneficial. * * Note that a sequenced PREFLUSH/FUA request with DATA is completed twice. * Once while executing DATA and again after the whole sequence is * complete. The first completion updates the contained bio but doesn't * finish it so that the bio submitter is notified only after the whole * sequence is complete. This is implemented by testing RQF_FLUSH_SEQ in * req_bio_endio(). * * The above peculiarity requires that each PREFLUSH/FUA request has only one * bio attached to it, which is guaranteed as they aren't allowed to be * merged in the usual way. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/gfp.h> #include <linux/part_stat.h> #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" /* PREFLUSH/FUA sequences */ enum { REQ_FSEQ_PREFLUSH = (1 << 0), /* pre-flushing in progress */ REQ_FSEQ_DATA = (1 << 1), /* data write in progress */ REQ_FSEQ_POSTFLUSH = (1 << 2), /* post-flushing in progress */ REQ_FSEQ_DONE = (1 << 3), REQ_FSEQ_ACTIONS = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA | REQ_FSEQ_POSTFLUSH, /* * If flush has been pending longer than the following timeout, * it's issued even if flush_data requests are still in flight. */ FLUSH_PENDING_TIMEOUT = 5 * HZ, }; static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, blk_opf_t flags); static inline struct blk_flush_queue * blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx) { return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq; } static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq) { unsigned int policy = 0; if (blk_rq_sectors(rq)) policy |= REQ_FSEQ_DATA; if (fflags & (1UL << QUEUE_FLAG_WC)) { if (rq->cmd_flags & REQ_PREFLUSH) policy |= REQ_FSEQ_PREFLUSH; if (!(fflags & (1UL << QUEUE_FLAG_FUA)) && (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } return policy; } static unsigned int blk_flush_cur_seq(struct request *rq) { return 1 << ffz(rq->flush.seq); } static void blk_flush_restore_request(struct request *rq) { /* * After flush data completion, @rq->bio is %NULL but we need to * complete the bio again. @rq->biotail is guaranteed to equal the * original @rq->bio. Restore it. */ rq->bio = rq->biotail; if (rq->bio) rq->__sector = rq->bio->bi_iter.bi_sector; /* make @rq a normal request */ rq->rq_flags &= ~RQF_FLUSH_SEQ; rq->end_io = rq->flush.saved_end_io; } static void blk_account_io_flush(struct request *rq) { struct block_device *part = rq->q->disk->part0; part_stat_lock(); part_stat_inc(part, ios[STAT_FLUSH]); part_stat_add(part, nsecs[STAT_FLUSH], blk_time_get_ns() - rq->start_time_ns); part_stat_unlock(); } /** * blk_flush_complete_seq - complete flush sequence * @rq: PREFLUSH/FUA request being sequenced * @fq: flush queue * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero) * @error: whether an error occurred * * @rq just completed @seq part of its flush sequence, record the * completion and trigger the next step. * * CONTEXT: * spin_lock_irq(fq->mq_flush_lock) */ static void blk_flush_complete_seq(struct request *rq, struct blk_flush_queue *fq, unsigned int seq, blk_status_t error) { struct request_queue *q = rq->q; struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; blk_opf_t cmd_flags; BUG_ON(rq->flush.seq & seq); rq->flush.seq |= seq; cmd_flags = rq->cmd_flags; if (likely(!error)) seq = blk_flush_cur_seq(rq); else seq = REQ_FSEQ_DONE; switch (seq) { case REQ_FSEQ_PREFLUSH: case REQ_FSEQ_POSTFLUSH: /* queue for flush */ if (list_empty(pending)) fq->flush_pending_since = jiffies; list_move_tail(&rq->queuelist, pending); break; case REQ_FSEQ_DATA: fq->flush_data_in_flight++; spin_lock(&q->requeue_lock); list_move(&rq->queuelist, &q->requeue_list); spin_unlock(&q->requeue_lock); blk_mq_kick_requeue_list(q); break; case REQ_FSEQ_DONE: /* * @rq was previously adjusted by blk_insert_flush() for * flush sequencing and may already have gone through the * flush data request completion path. Restore @rq for * normal completion and end it. */ list_del_init(&rq->queuelist); blk_flush_restore_request(rq); blk_mq_end_request(rq, error); break; default: BUG(); } blk_kick_flush(q, fq, cmd_flags); } static enum rq_end_io_ret flush_end_io(struct request *flush_rq, blk_status_t error) { struct request_queue *q = flush_rq->q; struct list_head *running; struct request *rq, *n; unsigned long flags = 0; struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx); /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); if (!req_ref_put_and_test(flush_rq)) { fq->rq_status = error; spin_unlock_irqrestore(&fq->mq_flush_lock, flags); return RQ_END_IO_NONE; } blk_account_io_flush(flush_rq); /* * Flush request has to be marked as IDLE when it is really ended * because its .end_io() is called from timeout code path too for * avoiding use-after-free. */ WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE); if (fq->rq_status != BLK_STS_OK) { error = fq->rq_status; fq->rq_status = BLK_STS_OK; } if (!q->elevator) { flush_rq->tag = BLK_MQ_NO_TAG; } else { blk_mq_put_driver_tag(flush_rq); flush_rq->internal_tag = BLK_MQ_NO_TAG; } running = &fq->flush_queue[fq->flush_running_idx]; BUG_ON(fq->flush_pending_idx == fq->flush_running_idx); /* account completion of the flush request */ fq->flush_running_idx ^= 1; /* and push the waiting requests to the next stage */ list_for_each_entry_safe(rq, n, running, queuelist) { unsigned int seq = blk_flush_cur_seq(rq); BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); blk_flush_complete_seq(rq, fq, seq, error); } spin_unlock_irqrestore(&fq->mq_flush_lock, flags); return RQ_END_IO_NONE; } bool is_flush_rq(struct request *rq) { return rq->end_io == flush_end_io; } /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked * @fq: flush queue * @flags: cmd_flags of the original request * * Flush related states of @q have changed, consider issuing flush request. * Please read the comment at the top of this file for more info. * * CONTEXT: * spin_lock_irq(fq->mq_flush_lock) * */ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, blk_opf_t flags) { struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; struct request *first_rq = list_first_entry(pending, struct request, queuelist); struct request *flush_rq = fq->flush_rq; /* C1 described at the top of this file */ if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending)) return; /* C2 and C3 */ if (fq->flush_data_in_flight && time_before(jiffies, fq->flush_pending_since + FLUSH_PENDING_TIMEOUT)) return; /* * Issue flush and toggle pending_idx. This makes pending_idx * different from running_idx, which means flush is in flight. */ fq->flush_pending_idx ^= 1; blk_rq_init(q, flush_rq); /* * In case of none scheduler, borrow tag from the first request * since they can't be in flight at the same time. And acquire * the tag's ownership for flush req. * * In case of IO scheduler, flush rq need to borrow scheduler tag * just for cheating put/get driver tag. */ flush_rq->mq_ctx = first_rq->mq_ctx; flush_rq->mq_hctx = first_rq->mq_hctx; if (!q->elevator) flush_rq->tag = first_rq->tag; else flush_rq->internal_tag = first_rq->internal_tag; flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK); flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->end_io = flush_end_io; /* * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one * implied in refcount_inc_not_zero() called from * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref * and READ flush_rq->end_io */ smp_wmb(); req_ref_set(flush_rq, 1); spin_lock(&q->requeue_lock); list_add_tail(&flush_rq->queuelist, &q->flush_list); spin_unlock(&q->requeue_lock); blk_mq_kick_requeue_list(q); } static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq, blk_status_t error) { struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct blk_mq_ctx *ctx = rq->mq_ctx; unsigned long flags; struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx); if (q->elevator) { WARN_ON(rq->tag < 0); blk_mq_put_driver_tag(rq); } /* * After populating an empty queue, kick it to avoid stall. Read * the comment in flush_end_io(). */ spin_lock_irqsave(&fq->mq_flush_lock, flags); fq->flush_data_in_flight--; /* * May have been corrupted by rq->rq_next reuse, we need to * re-initialize rq->queuelist before reusing it here. */ INIT_LIST_HEAD(&rq->queuelist); blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); blk_mq_sched_restart(hctx); return RQ_END_IO_NONE; } static void blk_rq_init_flush(struct request *rq) { rq->flush.seq = 0; rq->rq_flags |= RQF_FLUSH_SEQ; rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ rq->end_io = mq_flush_data_end_io; } /* * Insert a PREFLUSH/FUA request into the flush state machine. * Returns true if the request has been consumed by the flush state machine, * or false if the caller should continue to process it. */ bool blk_insert_flush(struct request *rq) { struct request_queue *q = rq->q; unsigned long fflags = q->queue_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); /* FLUSH/FUA request must never be merged */ WARN_ON_ONCE(rq->bio != rq->biotail); /* * @policy now records what operations need to be done. Adjust * REQ_PREFLUSH and FUA for the driver. */ rq->cmd_flags &= ~REQ_PREFLUSH; if (!(fflags & (1UL << QUEUE_FLAG_FUA))) rq->cmd_flags &= ~REQ_FUA; /* * REQ_PREFLUSH|REQ_FUA implies REQ_SYNC, so if we clear any * of those flags, we have to set REQ_SYNC to avoid skewing * the request accounting. */ rq->cmd_flags |= REQ_SYNC; switch (policy) { case 0: /* * An empty flush handed down from a stacking driver may * translate into nothing if the underlying device does not * advertise a write-back cache. In this case, simply * complete the request. */ blk_mq_end_request(rq, 0); return true; case REQ_FSEQ_DATA: /* * If there's data, but no flush is necessary, the request can * be processed directly without going through flush machinery. * Queue for normal execution. */ return false; case REQ_FSEQ_DATA | REQ_FSEQ_POSTFLUSH: /* * Initialize the flush fields and completion handler to trigger * the post flush, and then just pass the command on. */ blk_rq_init_flush(rq); rq->flush.seq |= REQ_FSEQ_PREFLUSH; spin_lock_irq(&fq->mq_flush_lock); fq->flush_data_in_flight++; spin_unlock_irq(&fq->mq_flush_lock); return false; default: /* * Mark the request as part of a flush sequence and submit it * for further processing to the flush state machine. */ blk_rq_init_flush(rq); spin_lock_irq(&fq->mq_flush_lock); blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0); spin_unlock_irq(&fq->mq_flush_lock); return true; } } /** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for * * Description: * Issue a flush for the block device in question. */ int blkdev_issue_flush(struct block_device *bdev) { struct bio bio; bio_init(&bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH); return submit_bio_wait(&bio); } EXPORT_SYMBOL(blkdev_issue_flush); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags) { struct blk_flush_queue *fq; int rq_sz = sizeof(struct request); fq = kzalloc_node(sizeof(*fq), flags, node); if (!fq) goto fail; spin_lock_init(&fq->mq_flush_lock); rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); fq->flush_rq = kzalloc_node(rq_sz, flags, node); if (!fq->flush_rq) goto fail_rq; INIT_LIST_HEAD(&fq->flush_queue[0]); INIT_LIST_HEAD(&fq->flush_queue[1]); return fq; fail_rq: kfree(fq); fail: return NULL; } void blk_free_flush_queue(struct blk_flush_queue *fq) { /* bio based request queue hasn't flush queue */ if (!fq) return; kfree(fq->flush_rq); kfree(fq); } /* * Allow driver to set its own lock class to fq->mq_flush_lock for * avoiding lockdep complaint. * * flush_end_io() may be called recursively from some driver, such as * nvme-loop, so lockdep may complain 'possible recursive locking' because * all 'struct blk_flush_queue' instance share same mq_flush_lock lock class * key. We need to assign different lock class for these driver's * fq->mq_flush_lock for avoiding the lockdep warning. * * Use dynamically allocated lock class key for each 'blk_flush_queue' * instance is over-kill, and more worse it introduces horrible boot delay * issue because synchronize_rcu() is implied in lockdep_unregister_key which * is called for each hctx release. SCSI probing may synchronously create and * destroy lots of MQ request_queues for non-existent devices, and some robot * test kernel always enable lockdep option. It is observed that more than half * an hour is taken during SCSI MQ probe with per-fq lock class. */ void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key) { lockdep_set_class(&hctx->fq->mq_flush_lock, key); } EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 | // SPDX-License-Identifier: GPL-2.0+ /* * Driver for Realtek RTS51xx USB card reader * * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. * * Author: * wwang (wei_wang@realsil.com.cn) * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China */ #include <linux/module.h> #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/sched.h> #include <linux/kernel.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <linux/cdrom.h> #include <linux/usb.h> #include <linux/slab.h> #include <linux/usb_usual.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-realtek" MODULE_DESCRIPTION("Driver for Realtek USB Card Reader"); MODULE_AUTHOR("wwang <wei_wang@realsil.com.cn>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(USB_STORAGE); static int auto_delink_en = 1; module_param(auto_delink_en, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(auto_delink_en, "auto delink mode (0=firmware, 1=software [default])"); #ifdef CONFIG_REALTEK_AUTOPM static int ss_en = 1; module_param(ss_en, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ss_en, "enable selective suspend"); static int ss_delay = 50; module_param(ss_delay, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ss_delay, "seconds to delay before entering selective suspend"); enum RTS51X_STAT { RTS51X_STAT_INIT, RTS51X_STAT_IDLE, RTS51X_STAT_RUN, RTS51X_STAT_SS }; #define POLLING_INTERVAL 50 #define rts51x_set_stat(chip, stat) \ ((chip)->state = (enum RTS51X_STAT)(stat)) #define rts51x_get_stat(chip) ((chip)->state) #define SET_LUN_READY(chip, lun) ((chip)->lun_ready |= ((u8)1 << (lun))) #define CLR_LUN_READY(chip, lun) ((chip)->lun_ready &= ~((u8)1 << (lun))) #define TST_LUN_READY(chip, lun) ((chip)->lun_ready & ((u8)1 << (lun))) #endif struct rts51x_status { u16 vid; u16 pid; u8 cur_lun; u8 card_type; u8 total_lun; u16 fw_ver; u8 phy_exist; u8 multi_flag; u8 multi_card; u8 log_exist; union { u8 detailed_type1; u8 detailed_type2; } detailed_type; u8 function[2]; }; struct rts51x_chip { u16 vendor_id; u16 product_id; char max_lun; struct rts51x_status *status; int status_len; u32 flag; struct us_data *us; #ifdef CONFIG_REALTEK_AUTOPM struct timer_list rts51x_suspend_timer; unsigned long timer_expires; int pwr_state; u8 lun_ready; enum RTS51X_STAT state; int support_auto_delink; #endif /* used to back up the protocol chosen in probe1 phase */ proto_cmnd proto_handler_backup; }; /* flag definition */ #define FLIDX_AUTO_DELINK 0x01 #define SCSI_LUN(srb) ((srb)->device->lun) /* Bit Operation */ #define SET_BIT(data, idx) ((data) |= 1 << (idx)) #define CLR_BIT(data, idx) ((data) &= ~(1 << (idx))) #define CHK_BIT(data, idx) ((data) & (1 << (idx))) #define SET_AUTO_DELINK(chip) ((chip)->flag |= FLIDX_AUTO_DELINK) #define CLR_AUTO_DELINK(chip) ((chip)->flag &= ~FLIDX_AUTO_DELINK) #define CHK_AUTO_DELINK(chip) ((chip)->flag & FLIDX_AUTO_DELINK) #define RTS51X_GET_VID(chip) ((chip)->vendor_id) #define RTS51X_GET_PID(chip) ((chip)->product_id) #define VENDOR_ID(chip) ((chip)->status[0].vid) #define PRODUCT_ID(chip) ((chip)->status[0].pid) #define FW_VERSION(chip) ((chip)->status[0].fw_ver) #define STATUS_LEN(chip) ((chip)->status_len) #define STATUS_SUCCESS 0 #define STATUS_FAIL 1 /* Check card reader function */ #define SUPPORT_DETAILED_TYPE1(chip) \ CHK_BIT((chip)->status[0].function[0], 1) #define SUPPORT_OT(chip) \ CHK_BIT((chip)->status[0].function[0], 2) #define SUPPORT_OC(chip) \ CHK_BIT((chip)->status[0].function[0], 3) #define SUPPORT_AUTO_DELINK(chip) \ CHK_BIT((chip)->status[0].function[0], 4) #define SUPPORT_SDIO(chip) \ CHK_BIT((chip)->status[0].function[1], 0) #define SUPPORT_DETAILED_TYPE2(chip) \ CHK_BIT((chip)->status[0].function[1], 1) #define CHECK_PID(chip, pid) (RTS51X_GET_PID(chip) == (pid)) #define CHECK_FW_VER(chip, fw_ver) (FW_VERSION(chip) == (fw_ver)) #define CHECK_ID(chip, pid, fw_ver) \ (CHECK_PID((chip), (pid)) && CHECK_FW_VER((chip), (fw_ver))) static int init_realtek_cr(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ {\ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) \ } static const struct usb_device_id realtek_cr_ids[] = { # include "unusual_realtek.h" {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, realtek_cr_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static struct us_unusual_dev realtek_cr_unusual_dev_list[] = { # include "unusual_realtek.h" {} /* Terminating entry */ }; #undef UNUSUAL_DEV static int rts51x_bulk_transport(struct us_data *us, u8 lun, u8 *cmd, int cmd_len, u8 *buf, int buf_len, enum dma_data_direction dir, int *act_len) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *)us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *)us->iobuf; int result; unsigned int residue; unsigned int cswlen; unsigned int cbwlen = US_BULK_CB_WRAP_LEN; /* set up the command wrapper */ bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = cpu_to_le32(buf_len); bcb->Flags = (dir == DMA_FROM_DEVICE) ? US_BULK_FLAG_IN : 0; bcb->Tag = ++us->tag; bcb->Lun = lun; bcb->Length = cmd_len; /* copy the command payload */ memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, cmd, bcb->Length); /* send it to out endpoint */ result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, cbwlen, NULL); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* DATA STAGE */ /* send/receive data payload, if there is any */ if (buf && buf_len) { unsigned int pipe = (dir == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_transfer_buf(us, pipe, buf, buf_len, NULL); if (result == USB_STOR_XFER_ERROR) return USB_STOR_TRANSPORT_ERROR; } /* get CSW for device status */ result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* check bulk status */ if (bcs->Signature != cpu_to_le32(US_BULK_CS_SIGN)) { usb_stor_dbg(us, "Signature mismatch: got %08X, expecting %08X\n", le32_to_cpu(bcs->Signature), US_BULK_CS_SIGN); return USB_STOR_TRANSPORT_ERROR; } residue = bcs->Residue; if (bcs->Tag != us->tag) return USB_STOR_TRANSPORT_ERROR; /* * try to compute the actual residue, based on how much data * was really transferred and what the device tells us */ if (residue) residue = residue < buf_len ? residue : buf_len; if (act_len) *act_len = buf_len - residue; /* based on the status code, we report good or bad */ switch (bcs->Status) { case US_BULK_STAT_OK: /* command good -- note that data could be short */ return USB_STOR_TRANSPORT_GOOD; case US_BULK_STAT_FAIL: /* command failed */ return USB_STOR_TRANSPORT_FAILED; case US_BULK_STAT_PHASE: /* * phase error -- note that a transport reset will be * invoked by the invoke_transport() function */ return USB_STOR_TRANSPORT_ERROR; } /* we should never get here, but if we do, we're in trouble */ return USB_STOR_TRANSPORT_ERROR; } static int rts51x_bulk_transport_special(struct us_data *us, u8 lun, u8 *cmd, int cmd_len, u8 *buf, int buf_len, enum dma_data_direction dir, int *act_len) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *) us->iobuf; int result; unsigned int cswlen; unsigned int cbwlen = US_BULK_CB_WRAP_LEN; /* set up the command wrapper */ bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = cpu_to_le32(buf_len); bcb->Flags = (dir == DMA_FROM_DEVICE) ? US_BULK_FLAG_IN : 0; bcb->Tag = ++us->tag; bcb->Lun = lun; bcb->Length = cmd_len; /* copy the command payload */ memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, cmd, bcb->Length); /* send it to out endpoint */ result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, cbwlen, NULL); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* DATA STAGE */ /* send/receive data payload, if there is any */ if (buf && buf_len) { unsigned int pipe = (dir == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_transfer_buf(us, pipe, buf, buf_len, NULL); if (result == USB_STOR_XFER_ERROR) return USB_STOR_TRANSPORT_ERROR; } /* get CSW for device status */ result = usb_bulk_msg(us->pusb_dev, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen, 250); return result; } /* Determine what the maximum LUN supported is */ static int rts51x_get_max_lun(struct us_data *us) { int result; /* issue the command */ us->iobuf[0] = 0; result = usb_stor_control_msg(us, us->recv_ctrl_pipe, US_BULK_GET_MAX_LUN, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, us->ifnum, us->iobuf, 1, 10 * HZ); usb_stor_dbg(us, "GetMaxLUN command result is %d, data is %d\n", result, us->iobuf[0]); /* if we have a successful request, return the result */ if (result > 0) return us->iobuf[0]; return 0; } static int rts51x_read_mem(struct us_data *us, u16 addr, u8 *data, u16 len) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmalloc(len, GFP_NOIO); if (buf == NULL) return -ENOMEM; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); cmnd[0] = 0xF0; cmnd[1] = 0x0D; cmnd[2] = (u8) (addr >> 8); cmnd[3] = (u8) addr; cmnd[4] = (u8) (len >> 8); cmnd[5] = (u8) len; retval = rts51x_bulk_transport(us, 0, cmnd, 12, buf, len, DMA_FROM_DEVICE, NULL); if (retval != USB_STOR_TRANSPORT_GOOD) { kfree(buf); return -EIO; } memcpy(data, buf, len); kfree(buf); return 0; } static int rts51x_write_mem(struct us_data *us, u16 addr, u8 *data, u16 len) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmemdup(data, len, GFP_NOIO); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); cmnd[0] = 0xF0; cmnd[1] = 0x0E; cmnd[2] = (u8) (addr >> 8); cmnd[3] = (u8) addr; cmnd[4] = (u8) (len >> 8); cmnd[5] = (u8) len; retval = rts51x_bulk_transport(us, 0, cmnd, 12, buf, len, DMA_TO_DEVICE, NULL); kfree(buf); if (retval != USB_STOR_TRANSPORT_GOOD) return -EIO; return 0; } static int rts51x_read_status(struct us_data *us, u8 lun, u8 *status, int len, int *actlen) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmalloc(len, GFP_NOIO); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "lun = %d\n", lun); cmnd[0] = 0xF0; cmnd[1] = 0x09; retval = rts51x_bulk_transport(us, lun, cmnd, 12, buf, len, DMA_FROM_DEVICE, actlen); if (retval != USB_STOR_TRANSPORT_GOOD) { kfree(buf); return -EIO; } memcpy(status, buf, len); kfree(buf); return 0; } static int rts51x_check_status(struct us_data *us, u8 lun) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 buf[16]; retval = rts51x_read_status(us, lun, buf, 16, &(chip->status_len)); if (retval != STATUS_SUCCESS) return -EIO; usb_stor_dbg(us, "chip->status_len = %d\n", chip->status_len); chip->status[lun].vid = ((u16) buf[0] << 8) | buf[1]; chip->status[lun].pid = ((u16) buf[2] << 8) | buf[3]; chip->status[lun].cur_lun = buf[4]; chip->status[lun].card_type = buf[5]; chip->status[lun].total_lun = buf[6]; chip->status[lun].fw_ver = ((u16) buf[7] << 8) | buf[8]; chip->status[lun].phy_exist = buf[9]; chip->status[lun].multi_flag = buf[10]; chip->status[lun].multi_card = buf[11]; chip->status[lun].log_exist = buf[12]; if (chip->status_len == 16) { chip->status[lun].detailed_type.detailed_type1 = buf[13]; chip->status[lun].function[0] = buf[14]; chip->status[lun].function[1] = buf[15]; } return 0; } static int enable_oscillator(struct us_data *us) { int retval; u8 value; retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; value |= 0x04; retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; if (!(value & 0x04)) return -EIO; return 0; } static int __do_config_autodelink(struct us_data *us, u8 *data, u16 len) { int retval; u8 cmnd[12] = {0}; u8 *buf; usb_stor_dbg(us, "addr = 0xfe47, len = %d\n", len); buf = kmemdup(data, len, GFP_NOIO); if (!buf) return USB_STOR_TRANSPORT_ERROR; cmnd[0] = 0xF0; cmnd[1] = 0x0E; cmnd[2] = 0xfe; cmnd[3] = 0x47; cmnd[4] = (u8)(len >> 8); cmnd[5] = (u8)len; retval = rts51x_bulk_transport_special(us, 0, cmnd, 12, buf, len, DMA_TO_DEVICE, NULL); kfree(buf); if (retval != USB_STOR_TRANSPORT_GOOD) { return -EIO; } return 0; } static int do_config_autodelink(struct us_data *us, int enable, int force) { int retval; u8 value; retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (enable) { if (force) value |= 0x03; else value |= 0x01; } else { value &= ~0x03; } usb_stor_dbg(us, "set 0xfe47 to 0x%x\n", value); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; return 0; } static int config_autodelink_after_power_on(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 value; if (!CHK_AUTO_DELINK(chip)) return 0; retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (auto_delink_en) { CLR_BIT(value, 0); CLR_BIT(value, 1); SET_BIT(value, 2); if (CHECK_ID(chip, 0x0138, 0x3882)) CLR_BIT(value, 2); SET_BIT(value, 7); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; retval = enable_oscillator(us); if (retval == 0) (void)do_config_autodelink(us, 1, 0); } else { /* Autodelink controlled by firmware */ SET_BIT(value, 2); if (CHECK_ID(chip, 0x0138, 0x3882)) CLR_BIT(value, 2); if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880)) { CLR_BIT(value, 0); CLR_BIT(value, 7); } /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0xFF; retval = rts51x_write_mem(us, 0xFE79, &value, 1); if (retval < 0) return -EIO; value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } } return 0; } #ifdef CONFIG_PM static int config_autodelink_before_power_down(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 value; if (!CHK_AUTO_DELINK(chip)) return 0; if (auto_delink_en) { retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; SET_BIT(value, 2); retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; SET_BIT(value, 0); if (CHECK_ID(chip, 0x0138, 0x3882)) SET_BIT(value, 2); retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; } else { if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880) || CHECK_ID(chip, 0x0138, 0x3882)) { retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880)) { SET_BIT(value, 0); SET_BIT(value, 7); } if (CHECK_ID(chip, 0x0138, 0x3882)) SET_BIT(value, 2); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; } if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } } return 0; } static void fw5895_init(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 val; if ((PRODUCT_ID(chip) != 0x0158) || (FW_VERSION(chip) != 0x5895)) { usb_stor_dbg(us, "Not the specified device, return immediately!\n"); } else { retval = rts51x_read_mem(us, 0xFD6F, &val, 1); if (retval == STATUS_SUCCESS && (val & 0x1F) == 0) { val = 0x1F; retval = rts51x_write_mem(us, 0xFD70, &val, 1); if (retval != STATUS_SUCCESS) usb_stor_dbg(us, "Write memory fail\n"); } else { usb_stor_dbg(us, "Read memory fail, OR (val & 0x1F) != 0\n"); } } } #endif #ifdef CONFIG_REALTEK_AUTOPM static void fw5895_set_mmc_wp(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 buf[13]; if ((PRODUCT_ID(chip) != 0x0158) || (FW_VERSION(chip) != 0x5895)) { usb_stor_dbg(us, "Not the specified device, return immediately!\n"); } else { retval = rts51x_read_mem(us, 0xFD6F, buf, 1); if (retval == STATUS_SUCCESS && (buf[0] & 0x24) == 0x24) { /* SD Exist and SD WP */ retval = rts51x_read_mem(us, 0xD04E, buf, 1); if (retval == STATUS_SUCCESS) { buf[0] |= 0x04; retval = rts51x_write_mem(us, 0xFD70, buf, 1); if (retval != STATUS_SUCCESS) usb_stor_dbg(us, "Write memory fail\n"); } else { usb_stor_dbg(us, "Read memory fail\n"); } } else { usb_stor_dbg(us, "Read memory fail, OR (buf[0]&0x24)!=0x24\n"); } } } static void rts51x_modi_suspend_timer(struct rts51x_chip *chip) { struct us_data *us = chip->us; usb_stor_dbg(us, "state:%d\n", rts51x_get_stat(chip)); chip->timer_expires = jiffies + msecs_to_jiffies(1000*ss_delay); mod_timer(&chip->rts51x_suspend_timer, chip->timer_expires); } static void rts51x_suspend_timer_fn(struct timer_list *t) { struct rts51x_chip *chip = from_timer(chip, t, rts51x_suspend_timer); struct us_data *us = chip->us; switch (rts51x_get_stat(chip)) { case RTS51X_STAT_INIT: case RTS51X_STAT_RUN: rts51x_modi_suspend_timer(chip); break; case RTS51X_STAT_IDLE: case RTS51X_STAT_SS: usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) { usb_stor_dbg(us, "Ready to enter SS state\n"); rts51x_set_stat(chip, RTS51X_STAT_SS); /* ignore mass storage interface's children */ pm_suspend_ignore_children(&us->pusb_intf->dev, true); usb_autopm_put_interface_async(us->pusb_intf); usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); } break; default: usb_stor_dbg(us, "Unknown state !!!\n"); break; } } static inline int working_scsi(struct scsi_cmnd *srb) { if ((srb->cmnd[0] == TEST_UNIT_READY) || (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL)) { return 0; } return 1; } static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); static int card_first_show = 1; static u8 media_not_present[] = { 0x70, 0, 0x02, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x3A, 0, 0, 0, 0, 0 }; static u8 invalid_cmd_field[] = { 0x70, 0, 0x05, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x24, 0, 0, 0, 0, 0 }; int ret; if (working_scsi(srb)) { usb_stor_dbg(us, "working scsi, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) { ret = usb_autopm_get_interface(us->pusb_intf); usb_stor_dbg(us, "working scsi, ret=%d\n", ret); } if (rts51x_get_stat(chip) != RTS51X_STAT_RUN) rts51x_set_stat(chip, RTS51X_STAT_RUN); chip->proto_handler_backup(srb, us); } else { if (rts51x_get_stat(chip) == RTS51X_STAT_SS) { usb_stor_dbg(us, "NOT working scsi\n"); if ((srb->cmnd[0] == TEST_UNIT_READY) && (chip->pwr_state == US_SUSPEND)) { if (TST_LUN_READY(chip, srb->device->lun)) { srb->result = SAM_STAT_GOOD; } else { srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, media_not_present, US_SENSE_SIZE); } usb_stor_dbg(us, "TEST_UNIT_READY\n"); goto out; } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { int prevent = srb->cmnd[4] & 0x1; if (prevent) { srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, invalid_cmd_field, US_SENSE_SIZE); } else { srb->result = SAM_STAT_GOOD; } usb_stor_dbg(us, "ALLOW_MEDIUM_REMOVAL\n"); goto out; } } else { usb_stor_dbg(us, "NOT working scsi, not SS\n"); chip->proto_handler_backup(srb, us); /* Check whether card is plugged in */ if (srb->cmnd[0] == TEST_UNIT_READY) { if (srb->result == SAM_STAT_GOOD) { SET_LUN_READY(chip, srb->device->lun); if (card_first_show) { card_first_show = 0; fw5895_set_mmc_wp(us); } } else { CLR_LUN_READY(chip, srb->device->lun); card_first_show = 1; } } if (rts51x_get_stat(chip) != RTS51X_STAT_IDLE) rts51x_set_stat(chip, RTS51X_STAT_IDLE); } } out: usb_stor_dbg(us, "state:%d\n", rts51x_get_stat(chip)); if (rts51x_get_stat(chip) == RTS51X_STAT_RUN) rts51x_modi_suspend_timer(chip); } static int realtek_cr_autosuspend_setup(struct us_data *us) { struct rts51x_chip *chip; struct rts51x_status *status = NULL; u8 buf[16]; int retval; chip = (struct rts51x_chip *)us->extra; chip->support_auto_delink = 0; chip->pwr_state = US_RESUME; chip->lun_ready = 0; rts51x_set_stat(chip, RTS51X_STAT_INIT); retval = rts51x_read_status(us, 0, buf, 16, &(chip->status_len)); if (retval != STATUS_SUCCESS) { usb_stor_dbg(us, "Read status fail\n"); return -EIO; } status = chip->status; status->vid = ((u16) buf[0] << 8) | buf[1]; status->pid = ((u16) buf[2] << 8) | buf[3]; status->cur_lun = buf[4]; status->card_type = buf[5]; status->total_lun = buf[6]; status->fw_ver = ((u16) buf[7] << 8) | buf[8]; status->phy_exist = buf[9]; status->multi_flag = buf[10]; status->multi_card = buf[11]; status->log_exist = buf[12]; if (chip->status_len == 16) { status->detailed_type.detailed_type1 = buf[13]; status->function[0] = buf[14]; status->function[1] = buf[15]; } /* back up the proto_handler in us->extra */ chip = (struct rts51x_chip *)(us->extra); chip->proto_handler_backup = us->proto_handler; /* Set the autosuspend_delay to 0 */ pm_runtime_set_autosuspend_delay(&us->pusb_dev->dev, 0); /* override us->proto_handler setted in get_protocol() */ us->proto_handler = rts51x_invoke_transport; chip->timer_expires = 0; timer_setup(&chip->rts51x_suspend_timer, rts51x_suspend_timer_fn, 0); fw5895_init(us); /* enable autosuspend function of the usb device */ usb_enable_autosuspend(us->pusb_dev); return 0; } #endif static void realtek_cr_destructor(void *extra) { struct rts51x_chip *chip = extra; if (!chip) return; #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) { del_timer(&chip->rts51x_suspend_timer); chip->timer_expires = 0; } #endif kfree(chip->status); } #ifdef CONFIG_PM static int realtek_cr_suspend(struct usb_interface *iface, pm_message_t message) { struct us_data *us = usb_get_intfdata(iface); /* wait until no command is running */ mutex_lock(&us->dev_mutex); config_autodelink_before_power_down(us); mutex_unlock(&us->dev_mutex); return 0; } static int realtek_cr_resume(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); fw5895_init(us); config_autodelink_after_power_on(us); return 0; } #else #define realtek_cr_suspend NULL #define realtek_cr_resume NULL #endif static int init_realtek_cr(struct us_data *us) { struct rts51x_chip *chip; int size, i, retval; chip = kzalloc(sizeof(struct rts51x_chip), GFP_KERNEL); if (!chip) return -ENOMEM; us->extra = chip; us->extra_destructor = realtek_cr_destructor; us->max_lun = chip->max_lun = rts51x_get_max_lun(us); chip->us = us; usb_stor_dbg(us, "chip->max_lun = %d\n", chip->max_lun); size = (chip->max_lun + 1) * sizeof(struct rts51x_status); chip->status = kzalloc(size, GFP_KERNEL); if (!chip->status) goto INIT_FAIL; for (i = 0; i <= (int)(chip->max_lun); i++) { retval = rts51x_check_status(us, (u8) i); if (retval < 0) goto INIT_FAIL; } if (CHECK_PID(chip, 0x0138) || CHECK_PID(chip, 0x0158) || CHECK_PID(chip, 0x0159)) { if (CHECK_FW_VER(chip, 0x5888) || CHECK_FW_VER(chip, 0x5889) || CHECK_FW_VER(chip, 0x5901)) SET_AUTO_DELINK(chip); if (STATUS_LEN(chip) == 16) { if (SUPPORT_AUTO_DELINK(chip)) SET_AUTO_DELINK(chip); } } #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) realtek_cr_autosuspend_setup(us); #endif usb_stor_dbg(us, "chip->flag = 0x%x\n", chip->flag); (void)config_autodelink_after_power_on(us); return 0; INIT_FAIL: if (us->extra) { kfree(chip->status); kfree(us->extra); us->extra = NULL; } return -EIO; } static struct scsi_host_template realtek_cr_host_template; static int realtek_cr_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; dev_dbg(&intf->dev, "Probe Realtek Card Reader!\n"); result = usb_stor_probe1(&us, intf, id, (id - realtek_cr_ids) + realtek_cr_unusual_dev_list, &realtek_cr_host_template); if (result) return result; result = usb_stor_probe2(us); return result; } static struct usb_driver realtek_cr_driver = { .name = DRV_NAME, .probe = realtek_cr_probe, .disconnect = usb_stor_disconnect, /* .suspend = usb_stor_suspend, */ /* .resume = usb_stor_resume, */ .reset_resume = usb_stor_reset_resume, .suspend = realtek_cr_suspend, .resume = realtek_cr_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = realtek_cr_ids, .soft_unbind = 1, .supports_autosuspend = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(realtek_cr_driver, realtek_cr_host_template, DRV_NAME); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for some sunplus "special" devices * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize >= 112 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && rdesc[106] == 0x03) { hid_info(hdev, "fixing up Sunplus Wireless Desktop report descriptor\n"); rdesc[105] = rdesc[110] = 0x03; rdesc[106] = rdesc[111] = 0x21; } return rdesc; } #define sp_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ EV_KEY, (c)) static int sp_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) != HID_UP_CONSUMER) return 0; switch (usage->hid & HID_USAGE) { case 0x2003: sp_map_key_clear(KEY_ZOOMIN); break; case 0x2103: sp_map_key_clear(KEY_ZOOMOUT); break; default: return 0; } return 1; } static const struct hid_device_id sp_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) }, { } }; MODULE_DEVICE_TABLE(hid, sp_devices); static struct hid_driver sp_driver = { .name = "sunplus", .id_table = sp_devices, .report_fixup = sp_report_fixup, .input_mapping = sp_input_mapping, }; module_hid_driver(sp_driver); MODULE_LICENSE("GPL"); |
405 243 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BH_H #define _LINUX_BH_H #include <linux/instruction_pointer.h> #include <linux/preempt.h> #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { preempt_count_add(cnt); barrier(); } #endif static inline void local_bh_disable(void) { __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } extern void _local_bh_enable(void); extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt); static inline void local_bh_enable_ip(unsigned long ip) { __local_bh_enable_ip(ip, SOFTIRQ_DISABLE_OFFSET); } static inline void local_bh_enable(void) { __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } #ifdef CONFIG_PREEMPT_RT extern bool local_bh_blocked(void); #else static inline bool local_bh_blocked(void) { return false; } #endif #endif /* _LINUX_BH_H */ |
2 3 9 10 51 8 9 60 21 23 25 6 14 3 8 73 23 9 18 61 20 7 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/bitmap.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/export.h> #include <linux/hex.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include "kstrtox.h" /** * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap * * @ubuf: pointer to user buffer containing string. * @ulen: buffer size in bytes. If string is smaller than this * then it must be terminated with a \0. * @maskp: pointer to bitmap array that will contain result. * @nmaskbits: size of bitmap, in bits. */ int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *maskp, int nmaskbits) { char *buf; int ret; buf = memdup_user_nul(ubuf, ulen); if (IS_ERR(buf)) return PTR_ERR(buf); ret = bitmap_parse(buf, UINT_MAX, maskp, nmaskbits); kfree(buf); return ret; } EXPORT_SYMBOL(bitmap_parse_user); /** * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string * @list: indicates whether the bitmap must be list * @buf: page aligned buffer into which string is placed * @maskp: pointer to bitmap to convert * @nmaskbits: size of bitmap, in bits * * Output format is a comma-separated list of decimal numbers and * ranges if list is specified or hex digits grouped into comma-separated * sets of 8 digits/set. Returns the number of characters written to buf. * * It is assumed that @buf is a pointer into a PAGE_SIZE, page-aligned * area and that sufficient storage remains at @buf to accommodate the * bitmap_print_to_pagebuf() output. Returns the number of characters * actually printed to @buf, excluding terminating '\0'. */ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits) { ptrdiff_t len = PAGE_SIZE - offset_in_page(buf); return list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) : scnprintf(buf, len, "%*pb\n", nmaskbits, maskp); } EXPORT_SYMBOL(bitmap_print_to_pagebuf); /** * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string * @list: indicates whether the bitmap must be list * true: print in decimal list format * false: print in hexadecimal bitmask format * @buf: buffer into which string is placed * @maskp: pointer to bitmap to convert * @nmaskbits: size of bitmap, in bits * @off: in the string from which we are copying, We copy to @buf * @count: the maximum number of bytes to print */ static int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp, int nmaskbits, loff_t off, size_t count) { const char *fmt = list ? "%*pbl\n" : "%*pb\n"; ssize_t size; void *data; data = kasprintf(GFP_KERNEL, fmt, nmaskbits, maskp); if (!data) return -ENOMEM; size = memory_read_from_buffer(buf, count, &off, data, strlen(data) + 1); kfree(data); return size; } /** * bitmap_print_bitmask_to_buf - convert bitmap to hex bitmask format ASCII string * @buf: buffer into which string is placed * @maskp: pointer to bitmap to convert * @nmaskbits: size of bitmap, in bits * @off: in the string from which we are copying, We copy to @buf * @count: the maximum number of bytes to print * * The bitmap_print_to_pagebuf() is used indirectly via its cpumap wrapper * cpumap_print_to_pagebuf() or directly by drivers to export hexadecimal * bitmask and decimal list to userspace by sysfs ABI. * Drivers might be using a normal attribute for this kind of ABIs. A * normal attribute typically has show entry as below:: * * static ssize_t example_attribute_show(struct device *dev, * struct device_attribute *attr, char *buf) * { * ... * return bitmap_print_to_pagebuf(true, buf, &mask, nr_trig_max); * } * * show entry of attribute has no offset and count parameters and this * means the file is limited to one page only. * bitmap_print_to_pagebuf() API works terribly well for this kind of * normal attribute with buf parameter and without offset, count:: * * bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, * int nmaskbits) * { * } * * The problem is once we have a large bitmap, we have a chance to get a * bitmask or list more than one page. Especially for list, it could be * as complex as 0,3,5,7,9,... We have no simple way to know it exact size. * It turns out bin_attribute is a way to break this limit. bin_attribute * has show entry as below:: * * static ssize_t * example_bin_attribute_show(struct file *filp, struct kobject *kobj, * struct bin_attribute *attr, char *buf, * loff_t offset, size_t count) * { * ... * } * * With the new offset and count parameters, this makes sysfs ABI be able * to support file size more than one page. For example, offset could be * >= 4096. * bitmap_print_bitmask_to_buf(), bitmap_print_list_to_buf() wit their * cpumap wrapper cpumap_print_bitmask_to_buf(), cpumap_print_list_to_buf() * make those drivers be able to support large bitmask and list after they * move to use bin_attribute. In result, we have to pass the corresponding * parameters such as off, count from bin_attribute show entry to this API. * * The role of cpumap_print_bitmask_to_buf() and cpumap_print_list_to_buf() * is similar with cpumap_print_to_pagebuf(), the difference is that * bitmap_print_to_pagebuf() mainly serves sysfs attribute with the assumption * the destination buffer is exactly one page and won't be more than one page. * cpumap_print_bitmask_to_buf() and cpumap_print_list_to_buf(), on the other * hand, mainly serves bin_attribute which doesn't work with exact one page, * and it can break the size limit of converted decimal list and hexadecimal * bitmask. * * WARNING! * * This function is not a replacement for sprintf() or bitmap_print_to_pagebuf(). * It is intended to workaround sysfs limitations discussed above and should be * used carefully in general case for the following reasons: * * - Time complexity is O(nbits^2/count), comparing to O(nbits) for snprintf(). * - Memory complexity is O(nbits), comparing to O(1) for snprintf(). * - @off and @count are NOT offset and number of bits to print. * - If printing part of bitmap as list, the resulting string is not a correct * list representation of bitmap. Particularly, some bits within or out of * related interval may be erroneously set or unset. The format of the string * may be broken, so bitmap_parselist-like parser may fail parsing it. * - If printing the whole bitmap as list by parts, user must ensure the order * of calls of the function such that the offset is incremented linearly. * - If printing the whole bitmap as list by parts, user must keep bitmap * unchanged between the very first and very last call. Otherwise concatenated * result may be incorrect, and format may be broken. * * Returns the number of characters actually printed to @buf */ int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, loff_t off, size_t count) { return bitmap_print_to_buf(false, buf, maskp, nmaskbits, off, count); } EXPORT_SYMBOL(bitmap_print_bitmask_to_buf); /** * bitmap_print_list_to_buf - convert bitmap to decimal list format ASCII string * @buf: buffer into which string is placed * @maskp: pointer to bitmap to convert * @nmaskbits: size of bitmap, in bits * @off: in the string from which we are copying, We copy to @buf * @count: the maximum number of bytes to print * * Everything is same with the above bitmap_print_bitmask_to_buf() except * the print format. */ int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, loff_t off, size_t count) { return bitmap_print_to_buf(true, buf, maskp, nmaskbits, off, count); } EXPORT_SYMBOL(bitmap_print_list_to_buf); /* * Region 9-38:4/10 describes the following bitmap structure: * 0 9 12 18 38 N * .........****......****......****.................. * ^ ^ ^ ^ ^ * start off group_len end nbits */ struct region { unsigned int start; unsigned int off; unsigned int group_len; unsigned int end; unsigned int nbits; }; static void bitmap_set_region(const struct region *r, unsigned long *bitmap) { unsigned int start; for (start = r->start; start <= r->end; start += r->group_len) bitmap_set(bitmap, start, min(r->end - start + 1, r->off)); } static int bitmap_check_region(const struct region *r) { if (r->start > r->end || r->group_len == 0 || r->off > r->group_len) return -EINVAL; if (r->end >= r->nbits) return -ERANGE; return 0; } static const char *bitmap_getnum(const char *str, unsigned int *num, unsigned int lastbit) { unsigned long long n; unsigned int len; if (str[0] == 'N') { *num = lastbit; return str + 1; } len = _parse_integer(str, 10, &n); if (!len) return ERR_PTR(-EINVAL); if (len & KSTRTOX_OVERFLOW || n != (unsigned int)n) return ERR_PTR(-EOVERFLOW); *num = n; return str + len; } static inline bool end_of_str(char c) { return c == '\0' || c == '\n'; } static inline bool __end_of_region(char c) { return isspace(c) || c == ','; } static inline bool end_of_region(char c) { return __end_of_region(c) || end_of_str(c); } /* * The format allows commas and whitespaces at the beginning * of the region. */ static const char *bitmap_find_region(const char *str) { while (__end_of_region(*str)) str++; return end_of_str(*str) ? NULL : str; } static const char *bitmap_find_region_reverse(const char *start, const char *end) { while (start <= end && __end_of_region(*end)) end--; return end; } static const char *bitmap_parse_region(const char *str, struct region *r) { unsigned int lastbit = r->nbits - 1; if (!strncasecmp(str, "all", 3)) { r->start = 0; r->end = lastbit; str += 3; goto check_pattern; } str = bitmap_getnum(str, &r->start, lastbit); if (IS_ERR(str)) return str; if (end_of_region(*str)) goto no_end; if (*str != '-') return ERR_PTR(-EINVAL); str = bitmap_getnum(str + 1, &r->end, lastbit); if (IS_ERR(str)) return str; check_pattern: if (end_of_region(*str)) goto no_pattern; if (*str != ':') return ERR_PTR(-EINVAL); str = bitmap_getnum(str + 1, &r->off, lastbit); if (IS_ERR(str)) return str; if (*str != '/') return ERR_PTR(-EINVAL); return bitmap_getnum(str + 1, &r->group_len, lastbit); no_end: r->end = r->start; no_pattern: r->off = r->end + 1; r->group_len = r->end + 1; return end_of_str(*str) ? NULL : str; } /** * bitmap_parselist - convert list format ASCII string to bitmap * @buf: read user string from this buffer; must be terminated * with a \0 or \n. * @maskp: write resulting mask here * @nmaskbits: number of bits in mask to be written * * Input format is a comma-separated list of decimal numbers and * ranges. Consecutively set bits are shown as two hyphen-separated * decimal numbers, the smallest and largest bit numbers set in * the range. * Optionally each range can be postfixed to denote that only parts of it * should be set. The range will divided to groups of specific size. * From each group will be used only defined amount of bits. * Syntax: range:used_size/group_size * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769 * The value 'N' can be used as a dynamically substituted token for the * maximum allowed value; i.e (nmaskbits - 1). Keep in mind that it is * dynamic, so if system changes cause the bitmap width to change, such * as more cores in a CPU list, then any ranges using N will also change. * * Returns: 0 on success, -errno on invalid input strings. Error values: * * - ``-EINVAL``: wrong region format * - ``-EINVAL``: invalid character in string * - ``-ERANGE``: bit number specified too large for mask * - ``-EOVERFLOW``: integer overflow in the input parameters */ int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits) { struct region r; long ret; r.nbits = nmaskbits; bitmap_zero(maskp, r.nbits); while (buf) { buf = bitmap_find_region(buf); if (buf == NULL) return 0; buf = bitmap_parse_region(buf, &r); if (IS_ERR(buf)) return PTR_ERR(buf); ret = bitmap_check_region(&r); if (ret) return ret; bitmap_set_region(&r, maskp); } return 0; } EXPORT_SYMBOL(bitmap_parselist); /** * bitmap_parselist_user() - convert user buffer's list format ASCII * string to bitmap * * @ubuf: pointer to user buffer containing string. * @ulen: buffer size in bytes. If string is smaller than this * then it must be terminated with a \0. * @maskp: pointer to bitmap array that will contain result. * @nmaskbits: size of bitmap, in bits. * * Wrapper for bitmap_parselist(), providing it with user buffer. */ int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, unsigned long *maskp, int nmaskbits) { char *buf; int ret; buf = memdup_user_nul(ubuf, ulen); if (IS_ERR(buf)) return PTR_ERR(buf); ret = bitmap_parselist(buf, maskp, nmaskbits); kfree(buf); return ret; } EXPORT_SYMBOL(bitmap_parselist_user); static const char *bitmap_get_x32_reverse(const char *start, const char *end, u32 *num) { u32 ret = 0; int c, i; for (i = 0; i < 32; i += 4) { c = hex_to_bin(*end--); if (c < 0) return ERR_PTR(-EINVAL); ret |= c << i; if (start > end || __end_of_region(*end)) goto out; } if (hex_to_bin(*end--) >= 0) return ERR_PTR(-EOVERFLOW); out: *num = ret; return end; } /** * bitmap_parse - convert an ASCII hex string into a bitmap. * @start: pointer to buffer containing string. * @buflen: buffer size in bytes. If string is smaller than this * then it must be terminated with a \0 or \n. In that case, * UINT_MAX may be provided instead of string length. * @maskp: pointer to bitmap array that will contain result. * @nmaskbits: size of bitmap, in bits. * * Commas group hex digits into chunks. Each chunk defines exactly 32 * bits of the resultant bitmask. No chunk may specify a value larger * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value * then leading 0-bits are prepended. %-EINVAL is returned for illegal * characters. Grouping such as "1,,5", ",44", "," or "" is allowed. * Leading, embedded and trailing whitespace accepted. */ int bitmap_parse(const char *start, unsigned int buflen, unsigned long *maskp, int nmaskbits) { const char *end = strnchrnul(start, buflen, '\n') - 1; int chunks = BITS_TO_U32(nmaskbits); u32 *bitmap = (u32 *)maskp; int unset_bit; int chunk; for (chunk = 0; ; chunk++) { end = bitmap_find_region_reverse(start, end); if (start > end) break; if (!chunks--) return -EOVERFLOW; #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) end = bitmap_get_x32_reverse(start, end, &bitmap[chunk ^ 1]); #else end = bitmap_get_x32_reverse(start, end, &bitmap[chunk]); #endif if (IS_ERR(end)) return PTR_ERR(end); } unset_bit = (BITS_TO_U32(nmaskbits) - chunks) * 32; if (unset_bit < nmaskbits) { bitmap_clear(maskp, unset_bit, nmaskbits - unset_bit); return 0; } if (find_next_bit(maskp, unset_bit, nmaskbits) != unset_bit) return -EOVERFLOW; return 0; } EXPORT_SYMBOL(bitmap_parse); |
211 211 212 212 212 211 211 210 212 401 71 1 71 5 67 54 54 218 1 28 108 102 31 211 212 201 10 81 204 4 2 205 6 210 2 175 31 6 2 336 4 80 257 315 22 252 61 5 4 233 183 177 6 178 98 76 167 1 167 165 7 153 2 3 102 59 90 70 167 208 3 18 188 10 184 126 139 311 28 2 282 8 1 18 12 3 290 3 86 223 234 30 66 70 75 186 13 85 68 67 1 203 12 12 12 9 3 11 21 10 10 29 2 14 13 27 1652 1652 1306 375 1197 1196 5 1 4 233 51 183 194 56 197 140 2 128 11 33 47 55 34 215 141 1 31 2 14 2 53 2 77 128 127 3 3 1 143 143 410 411 52 52 40 11 4 39 8 46 3 16 44 1 11 11 33 18 18 15 33 3 33 4 1 3 27 27 5 2 2 2 15 15 15 1 1 36 14 8 5 5 5 1 1 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * PF_INET protocol family socket handler. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> * Alan Cox, <A.Cox@swansea.ac.uk> * * Changes (see also sock.c) * * piggy, * Karl Knutson : Socket protocol table * A.N.Kuznetsov : Socket death error in accept(). * John Richardson : Fix non blocking error in connect() * so sockets that fail to connect * don't return -EINPROGRESS. * Alan Cox : Asynchronous I/O support * Alan Cox : Keep correct socket pointer on sock * structures * when accept() ed * Alan Cox : Semantics of SO_LINGER aren't state * moved to close when you look carefully. * With this fixed and the accept bug fixed * some RPC stuff seems happier. * Niibe Yutaka : 4.4BSD style write async I/O * Alan Cox, * Tony Gale : Fixed reuse semantics. * Alan Cox : bind() shouldn't abort existing but dead * sockets. Stops FTP netin:.. I hope. * Alan Cox : bind() works correctly for RAW sockets. * Note that FreeBSD at least was broken * in this respect so be careful with * compatibility tests... * Alan Cox : routing cache support * Alan Cox : memzero the socket structure for * compactness. * Matt Day : nonblock connect error handler * Alan Cox : Allow large numbers of pending sockets * (eg for big web sites), but only if * specifically application requested. * Alan Cox : New buffering throughout IP. Used * dumbly. * Alan Cox : New buffering now used smartly. * Alan Cox : BSD rather than common sense * interpretation of listen. * Germano Caronni : Assorted small races. * Alan Cox : sendmsg/recvmsg basic support. * Alan Cox : Only sendmsg/recvmsg now supported. * Alan Cox : Locked down bind (see security list). * Alan Cox : Loosened bind a little. * Mike McLagan : ADD/DEL DLCI Ioctls * Willy Konynenberg : Transparent proxying support. * David S. Miller : New socket lookup architecture. * Some other random speedups. * Cyrus Durgin : Cleaned up file for kmod hacks. * Andi Kleen : Fix inet_stream_connect TCP race. */ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/err.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/sched.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/capability.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/netfilter_ipv4.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/igmp.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <net/checksum.h> #include <net/ip.h> #include <net/protocol.h> #include <net/arp.h> #include <net/route.h> #include <net/ip_fib.h> #include <net/inet_connection_sock.h> #include <net/gro.h> #include <net/gso.h> #include <net/tcp.h> #include <net/udp.h> #include <net/udplite.h> #include <net/ping.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/raw.h> #include <net/icmp.h> #include <net/inet_common.h> #include <net/ip_tunnels.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/secure_seq.h> #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> #endif #include <net/l3mdev.h> #include <net/compat.h> #include <net/rps.h> #include <trace/events/sock.h> /* The inetsw table contains everything that inet_create needs to * build a new socket. */ static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); /* New destruction routine */ void inet_sock_destruct(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_error_queue); sk_mem_reclaim_final(sk); if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { pr_err("Attempt to release TCP socket in state %d %p\n", sk->sk_state, sk); return; } if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive inet socket %p\n", sk); return; } WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc)); WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); WARN_ON_ONCE(sk->sk_wmem_queued); WARN_ON_ONCE(sk_forward_alloc_get(sk)); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1)); } EXPORT_SYMBOL(inet_sock_destruct); /* * The routines beyond this point handle the behaviour of an AF_INET * socket object. Mostly it punts to the subprotocols of IP to do * the work. */ /* * Automatically bind an unbound socket. */ static int inet_autobind(struct sock *sk) { struct inet_sock *inet; /* We may need to bind the socket. */ lock_sock(sk); inet = inet_sk(sk); if (!inet->inet_num) { if (sk->sk_prot->get_port(sk, 0)) { release_sock(sk); return -EAGAIN; } inet->inet_sport = htons(inet->inet_num); } release_sock(sk); return 0; } int __inet_listen_sk(struct sock *sk, int backlog) { unsigned char old_state = sk->sk_state; int err, tcp_fastopen; if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) return -EINVAL; WRITE_ONCE(sk->sk_max_ack_backlog, backlog); /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { /* Enable TFO w/o requiring TCP_FASTOPEN socket option. * Note that only TCP sockets (SOCK_STREAM) will reach here. * Also fastopen backlog may already been set via the option * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(sock_net(sk)); } err = inet_csk_listen_start(sk); if (err) return err; tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL); } return 0; } /* * Move a socket into listening state. */ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int err = -EINVAL; lock_sock(sk); if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) goto out; err = __inet_listen_sk(sk, backlog); out: release_sock(sk); return err; } EXPORT_SYMBOL(inet_listen); /* * Create an inet socket. */ static int inet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; int try_loading_module = 0; int err; if (protocol < 0 || protocol >= IPPROTO_MAX) return -EINVAL; sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } err = -EPROTONOSUPPORT; } if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* * Be more specific, e.g. net-pf-2-proto-132-type-1 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM) */ if (++try_loading_module == 1) request_module("net-pf-%d-proto-%d-type-%d", PF_INET, protocol, sock->type); /* * Fall back to generic, e.g. net-pf-2-proto-132 * (net-pf-PF_INET-proto-IPPROTO_SCTP) */ else request_module("net-pf-%d-proto-%d", PF_INET, protocol); goto lookup_protocol; } else goto out_rcu_unlock; } err = -EPERM; if (sock->type == SOCK_RAW && !kern && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; answer_prot = answer->prot; answer_flags = answer->flags; rcu_read_unlock(); WARN_ON(!answer_prot->slab); err = -ENOMEM; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; err = 0; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; if (INET_PROTOSW_ICSK & answer_flags) inet_init_csk_locks(sk); inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); inet_clear_bit(NODEFRAG, sk); if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet_set_bit(HDRINCL, sk); } if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; atomic_set(&inet->inet_id, 0); sock_init_data(sock, sk); sk->sk_destruct = inet_sock_destruct; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); inet->uc_ttl = -1; inet_set_bit(MC_LOOP, sk); inet->mc_ttl = 1; inet_set_bit(MC_ALL, sk); inet->mc_index = 0; inet->mc_list = NULL; inet->rcv_tos = 0; if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ err = sk->sk_prot->hash(sk); if (err) { sk_common_release(sk); goto out; } } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); if (err) { sk_common_release(sk); goto out; } } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); if (err) { sk_common_release(sk); goto out; } } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; } /* * The peer socket should always be NULL (or else). When we call this * function we are destroying the object and from then on nobody * should refer to it. */ int inet_release(struct socket *sock) { struct sock *sk = sock->sk; if (sk) { long timeout; if (!sk->sk_kern_sock) BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk); /* Applications forget to leave groups before exiting */ ip_mc_drop_socket(sk); /* If linger is set, we don't return until the close * is complete. Otherwise we return immediately. The * actually closing is done the same either way. * * If the close is due to the process exiting, we never * linger.. */ timeout = 0; if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) timeout = sk->sk_lingertime; sk->sk_prot->close(sk, timeout); sock->sk = NULL; } return 0; } EXPORT_SYMBOL(inet_release); int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) { u32 flags = BIND_WITH_LOCK; int err; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { return sk->sk_prot->bind(sk, uaddr, addr_len); } if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; return __inet_bind(sk, uaddr, addr_len, flags); } int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { return inet_bind_sk(sock->sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_bind); int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); unsigned short snum; int chk_addr_ret; u32 tb_id = RT_TABLE_LOCAL; int err; if (addr->sin_family != AF_INET) { /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) * only if s_addr is INADDR_ANY. */ err = -EAFNOSUPPORT; if (addr->sin_family != AF_UNSPEC || addr->sin_addr.s_addr != htonl(INADDR_ANY)) goto out; } tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since * allowing applications to make a non-local bind solves * several problems with systems using dynamic addressing. * (ie. your servers still start up even if your ISDN link * is temporarily down) */ err = -EADDRNOTAVAIL; if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr, chk_addr_ret)) goto out; snum = ntohs(addr->sin_port); err = -EACCES; if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) && snum && inet_port_requires_bind_service(net, snum) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) goto out; /* We keep a pair of addresses. rcv_saddr is the one * used by hash lookups, and saddr is used for transmit. * * In the BSD API these are the same except where it * would be illegal to use them (multicast/broadcast) in * which case the sending device address is used. */ if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ err = -EINVAL; if (sk->sk_state != TCP_CLOSE || inet->inet_num) goto out_release_sock; inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) || (flags & BIND_FORCE_ADDRESS_NO_PORT))) { err = sk->sk_prot->get_port(sk, snum); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; goto out_release_sock; } if (!(flags & BIND_FROM_BPF)) { err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; if (sk->sk_prot->put_port) sk->sk_prot->put_port(sk); goto out_release_sock; } } } if (inet->inet_rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); inet->inet_daddr = 0; inet->inet_dport = 0; sk_dst_reset(sk); err = 0; out_release_sock: if (flags & BIND_WITH_LOCK) release_sock(sk); out: return err; } int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; const struct proto *prot; int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); if (uaddr->sa_family == AF_UNSPEC) return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { err = prot->pre_connect(sk, uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; return prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. * Connect() does not allow to get error notifications * without closing the socket. */ while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { release_sock(sk); timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); if (signal_pending(current) || !timeo) break; } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; return timeo; } /* * Connect to a remote host. There is regrettably still a little * TCP 'magic' in here. */ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags, int is_sendmsg) { struct sock *sk = sock->sk; int err; long timeo; /* * uaddr can be NULL and addr_len can be 0 if: * sk is a TCP fastopen active socket and * TCP_FASTOPEN_CONNECT sockopt is set and * we already have a valid cookie for this socket. * In this case, user can call write() after connect(). * write() will invoke tcp_sendmsg_fastopen() which calls * __inet_stream_connect(). */ if (uaddr) { if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) { sk->sk_disconnects++; err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; goto out; } } switch (sock->state) { default: err = -EINVAL; goto out; case SS_CONNECTED: err = -EISCONN; goto out; case SS_CONNECTING: if (inet_test_bit(DEFER_CONNECT, sk)) err = is_sendmsg ? -EINPROGRESS : -EISCONN; else err = -EALREADY; /* Fall out of switch with err, set for this state */ break; case SS_UNCONNECTED: err = -EISCONN; if (sk->sk_state != TCP_CLOSE) goto out; if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); if (err) goto out; } err = sk->sk_prot->connect(sk, uaddr, addr_len); if (err < 0) goto out; sock->state = SS_CONNECTING; if (!err && inet_test_bit(DEFER_CONNECT, sk)) goto out; /* Just entered SS_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. */ err = -EINPROGRESS; break; } timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { int writebias = (sk->sk_protocol == IPPROTO_TCP) && tcp_sk(sk)->fastopen_req && tcp_sk(sk)->fastopen_req->data ? 1 : 0; int dis = sk->sk_disconnects; /* Error code is set above */ if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) goto out; err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; if (dis != sk->sk_disconnects) { err = -EPIPE; goto out; } } /* Connection was closed by RST, timeout, ICMP error * or another process disconnected us. */ if (sk->sk_state == TCP_CLOSE) goto sock_error; /* sk->sk_err may be not zero now, if RECVERR was ordered by user * and error was received after socket entered established state. * Hence, it is handled normally after connect() return successfully. */ sock->state = SS_CONNECTED; err = 0; out: return err; sock_error: err = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; sk->sk_disconnects++; if (sk->sk_prot->disconnect(sk, flags)) sock->state = SS_DISCONNECTING; goto out; } EXPORT_SYMBOL(__inet_stream_connect); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { int err; lock_sock(sock->sk); err = __inet_stream_connect(sock, uaddr, addr_len, flags, 0); release_sock(sock->sk); return err; } EXPORT_SYMBOL(inet_stream_connect); void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk) { sock_rps_record_flow(newsk); WARN_ON(!((1 << newsk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | TCPF_CLOSE_WAIT | TCPF_CLOSE))); if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) set_bit(SOCK_SUPPORT_ZC, &newsock->flags); sock_graft(newsk, newsock); newsock->state = SS_CONNECTED; } /* * Accept a pending connection. The TCP layer now gives BSD semantics. */ int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { struct sock *sk1 = sock->sk, *sk2; int err = -EINVAL; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); if (!sk2) return err; lock_sock(sk2); __inet_accept(sock, newsock, sk2); release_sock(sk2); return 0; } EXPORT_SYMBOL(inet_accept); /* * This does both peername and sockname. */ int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); if (peer) { if (!inet->inet_dport || (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1)) { release_sock(sk); return -ENOTCONN; } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; if (!addr) addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return sin_addr_len; } EXPORT_SYMBOL(inet_getname); int inet_send_prepare(struct sock *sk) { sock_rps_record_flow(sk); /* We may need to bind the socket. */ if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN; return 0; } EXPORT_SYMBOL_GPL(inet_send_prepare); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg, sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); void inet_splice_eof(struct socket *sock) { const struct proto *prot; struct sock *sk = sock->sk; if (unlikely(inet_send_prepare(sk))) return; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); if (prot->splice_eof) prot->splice_eof(sock); } EXPORT_SYMBOL_GPL(inet_splice_eof); INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, size_t, int, int *)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; } EXPORT_SYMBOL(inet_recvmsg); int inet_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int err = 0; /* This should really check to make sure * the socket is a TCP socket. (WHY AC...) */ how++; /* maps 0->1 has the advantage of making bit 1 rcvs and 1->2 bit 2 snds. 2->3 */ if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */ return -EINVAL; lock_sock(sk); if (sock->state == SS_CONNECTING) { if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) sock->state = SS_DISCONNECTING; else sock->state = SS_CONNECTED; } switch (sk->sk_state) { case TCP_CLOSE: err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ fallthrough; default: WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how); if (sk->sk_prot->shutdown) sk->sk_prot->shutdown(sk, how); break; /* Remaining two branches are temporary solution for missing * close() in multithreaded environment. It is _not_ a good idea, * but we have no choice until close() is repaired at VFS level. */ case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; fallthrough; case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; break; } /* Wake up anyone sleeping in poll. */ sk->sk_state_change(sk); release_sock(sk); return err; } EXPORT_SYMBOL(inet_shutdown); /* * ioctl() calls you can issue on an INET socket. Most of these are * device configuration and stuff and very rarely used. Some ioctls * pass on to the socket itself. * * NOTE: I like the idea of a module for the config stuff. ie ifconfig * loads the devconfigure module does its configuring and unloads it. * There's a good 20K of config code hanging around the kernel. */ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = 0; struct net *net = sock_net(sk); void __user *p = (void __user *)arg; struct ifreq ifr; struct rtentry rt; switch (cmd) { case SIOCADDRT: case SIOCDELRT: if (copy_from_user(&rt, p, sizeof(struct rtentry))) return -EFAULT; err = ip_rt_ioctl(net, cmd, &rt); break; case SIOCRTMSG: err = -EINVAL; break; case SIOCDARP: case SIOCGARP: case SIOCSARP: err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCGIFBRDADDR: case SIOCGIFNETMASK: case SIOCGIFDSTADDR: case SIOCGIFPFLAGS: if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); if (!err && put_user_ifreq(&ifr, p)) err = -EFAULT; break; case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFNETMASK: case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: case SIOCSIFFLAGS: if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); break; default: if (sk->sk_prot->ioctl) err = sk_ioctl(sk, cmd, (void __user *)arg); else err = -ENOIOCTLCMD; break; } return err; } EXPORT_SYMBOL(inet_ioctl); #ifdef CONFIG_COMPAT static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd, struct compat_rtentry __user *ur) { compat_uptr_t rtdev; struct rtentry rt; if (copy_from_user(&rt.rt_dst, &ur->rt_dst, 3 * sizeof(struct sockaddr)) || get_user(rt.rt_flags, &ur->rt_flags) || get_user(rt.rt_metric, &ur->rt_metric) || get_user(rt.rt_mtu, &ur->rt_mtu) || get_user(rt.rt_window, &ur->rt_window) || get_user(rt.rt_irtt, &ur->rt_irtt) || get_user(rtdev, &ur->rt_dev)) return -EFAULT; rt.rt_dev = compat_ptr(rtdev); return ip_rt_ioctl(sock_net(sk), cmd, &rt); } static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; switch (cmd) { case SIOCADDRT: case SIOCDELRT: return inet_compat_routing_ioctl(sk, cmd, argp); default: if (!sk->sk_prot->compat_ioctl) return -ENOIOCTLCMD; return sk->sk_prot->compat_ioctl(sk, cmd, arg); } } #endif /* CONFIG_COMPAT */ const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, .poll = tcp_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, #ifdef CONFIG_MMU .mmap = tcp_mmap, #endif .splice_eof = inet_splice_eof, .splice_read = tcp_splice_read, .set_peek_off = sk_set_peek_off, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, .sendmsg_locked = tcp_sendmsg_locked, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, }; EXPORT_SYMBOL(inet_stream_ops); const struct proto_ops inet_dgram_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, .poll = udp_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .read_skb = udp_read_skb, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .splice_eof = inet_splice_eof, .set_peek_off = udp_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_dgram_ops); /* * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without * udp_poll */ static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, .poll = datagram_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .splice_eof = inet_splice_eof, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif }; static const struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, .owner = THIS_MODULE, }; /* Upon startup we insert all the elements in inetsw_array[] into * the linked list inetsw. */ static struct inet_protosw inetsw_array[] = { { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, .flags = INET_PROTOSW_PERMANENT, }, { .type = SOCK_DGRAM, .protocol = IPPROTO_ICMP, .prot = &ping_prot, .ops = &inet_sockraw_ops, .flags = INET_PROTOSW_REUSE, }, { .type = SOCK_RAW, .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, .flags = INET_PROTOSW_REUSE, } }; #define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array) void inet_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; int protocol = p->protocol; struct list_head *last_perm; spin_lock_bh(&inetsw_lock); if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ last_perm = &inetsw[p->type]; list_for_each(lh, &inetsw[p->type]) { answer = list_entry(lh, struct inet_protosw, list); /* Check only the non-wild match. */ if ((INET_PROTOSW_PERMANENT & answer->flags) == 0) break; if (protocol == answer->protocol) goto out_permanent; last_perm = lh; } /* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with * a wild-card protocol. But it is allowed to override any existing * non-permanent entry. This means that when we remove this entry, the * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); out: spin_unlock_bh(&inetsw_lock); return; out_permanent: pr_err("Attempt to override permanent protocol %d\n", protocol); goto out; out_illegal: pr_err("Ignoring attempt to register invalid socket type %d\n", p->type); goto out; } EXPORT_SYMBOL(inet_register_protosw); void inet_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { pr_err("Attempt to unregister permanent protocol %d\n", p->protocol); } else { spin_lock_bh(&inetsw_lock); list_del_rcu(&p->list); spin_unlock_bh(&inetsw_lock); synchronize_net(); } } EXPORT_SYMBOL(inet_unregister_protosw); static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; struct flowi4 *fl4; struct rtable *rt; __be32 new_saddr; struct ip_options_rcu *inet_opt; int err; inet_opt = rcu_dereference_protected(inet->inet_opt, lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; /* Query new route. */ fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if, sk->sk_protocol, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); new_saddr = fl4->saddr; if (new_saddr == old_saddr) { sk_setup_caps(sk, &rt->dst); return 0; } err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET); if (err) { ip_rt_put(rt); return err; } sk_setup_caps(sk, &rt->dst); if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } /* * XXX The only one ugly spot where we need to * XXX really change the sockets identity after * XXX it has entered the hashes. -DaveM * * Besides that, it does not check for connection * uniqueness. Wait for troubles. */ return __sk_prot_rehash(sk); } int inet_sk_rebuild_header(struct sock *sk) { struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0)); struct inet_sock *inet = inet_sk(sk); __be32 daddr; struct ip_options_rcu *inet_opt; struct flowi4 *fl4; int err; /* Route is OK, nothing to do. */ if (rt) return 0; /* Reroute. */ rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); daddr = inet->inet_daddr; if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; rcu_read_unlock(); fl4 = &inet->cork.fl.u.ip4; rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, ip_sock_rt_tos(sk), sk->sk_bound_dev_if); if (!IS_ERR(rt)) { err = 0; sk_setup_caps(sk, &rt->dst); } else { err = PTR_ERR(rt); /* Routing failed... */ sk->sk_route_caps = 0; /* * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) WRITE_ONCE(sk->sk_err_soft, -err); } return err; } EXPORT_SYMBOL(inet_sk_rebuild_header); void inet_sk_set_state(struct sock *sk, int state) { trace_inet_sock_set_state(sk, sk->sk_state, state); sk->sk_state = state; } EXPORT_SYMBOL(inet_sk_set_state); void inet_sk_state_store(struct sock *sk, int newstate) { trace_inet_sock_set_state(sk, sk->sk_state, newstate); smp_store_release(&sk->sk_state, newstate); } struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; int ihl; int id; skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; iph = ip_hdr(skb); ihl = iph->ihl * 4; if (ihl < sizeof(*iph)) goto out; id = ntohs(iph->id); proto = iph->protocol; /* Warning: after this point, iph might be no longer valid */ if (unlikely(!pskb_may_pull(skb, ihl))) goto out; __skb_pull(skb, ihl); encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += ihl; skb_reset_transport_header(skb); segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF))) goto out; } ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { segs = ops->callbacks.gso_segment(skb, features); if (!segs) skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) goto out; gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); if (udpfrag) { iph->frag_off = htons(offset >> 3); if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; tot_len = skb->len - nhoff; } else if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; } if (gso_partial) tot_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)iph; else tot_len = skb->len - nhoff; } else { if (!fixedid) iph->id = htons(id++); tot_len = skb->len - nhoff; } iph->tot_len = htons(tot_len); ip_send_check(iph); if (encap) skb_reset_inner_headers(skb); skb->network_header = (u8 *)iph - skb->head; skb_reset_mac_len(skb); } while ((skb = skb->next)); out: return segs; } static struct sk_buff *ipip_gso_segment(struct sk_buff *skb, netdev_features_t features) { if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) return ERR_PTR(-EINVAL); return inet_gso_segment(skb, features); } struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff *pp = NULL; const struct iphdr *iph; struct sk_buff *p; unsigned int hlen; unsigned int off; int flush = 1; int proto; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); iph = skb_gro_header(skb, hlen, off); if (unlikely(!iph)) goto out; proto = iph->protocol; ops = rcu_dereference(inet_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) goto out; if (*(u8 *)iph != 0x45) goto out; if (ip_is_fragment(iph)) goto out; if (unlikely(ip_fast_csum((u8 *)iph, 5))) goto out; NAPI_GRO_CB(skb)->proto = proto; flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF)); list_for_each_entry(p, head, list) { struct iphdr *iph2; if (!NAPI_GRO_CB(p)->same_flow) continue; iph2 = (struct iphdr *)(p->data + off); /* The above works because, with the exception of the top * (inner most) layer, we only aggregate pkts with the same * hdr length so all the hdrs we'll need to verify will start * at the same offset. */ if ((iph->protocol ^ iph2->protocol) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } NAPI_GRO_CB(skb)->flush |= flush; NAPI_GRO_CB(skb)->inner_network_offset = off; /* Note : No need to call skb_gro_postpull_rcsum() here, * as we already checked checksum over ipv4 header was 0 */ skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive, ops->callbacks.gro_receive, head, skb); out: skb_gro_flush_final(skb, pp, flush); return pp; } static struct sk_buff *ipip_gro_receive(struct list_head *head, struct sk_buff *skb) { if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } NAPI_GRO_CB(skb)->encap_mark = 1; return inet_gro_receive(head, skb); } #define SECONDS_PER_DAY 86400 /* inet_current_timestamp - Return IP network timestamp * * Return milliseconds since midnight in network byte order. */ __be32 inet_current_timestamp(void) { u32 secs; u32 msecs; struct timespec64 ts; ktime_get_real_ts64(&ts); /* Get secs since midnight. */ (void)div_u64_rem(ts.tv_sec, SECONDS_PER_DAY, &secs); /* Convert to msecs. */ msecs = secs * MSEC_PER_SEC; /* Convert nsec to msec. */ msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC; /* Convert to network byte order. */ return htonl(msecs); } EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { unsigned int family = READ_ONCE(sk->sk_family); if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; } EXPORT_SYMBOL(inet_recv_error); int inet_gro_complete(struct sk_buff *skb, int nhoff) { struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); const struct net_offload *ops; __be16 totlen = iph->tot_len; int proto = iph->protocol; int err = -ENOSYS; if (skb->encapsulation) { skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP)); skb_set_inner_network_header(skb, nhoff); } iph_set_totlen(iph, skb->len - nhoff); csum_replace2(&iph->check, totlen, iph->tot_len); ops = rcu_dereference(inet_offloads[proto]); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out; /* Only need to add sizeof(*iph) to get to the next hdr below * because any hdr with option will have been flushed in * inet_gro_receive(). */ err = INDIRECT_CALL_2(ops->callbacks.gro_complete, tcp4_gro_complete, udp4_gro_complete, skb, nhoff + sizeof(*iph)); out: return err; } static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return inet_gro_complete(skb, nhoff); } int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) { struct socket *sock; int rc = sock_create_kern(net, family, type, protocol, &sock); if (rc == 0) { *sk = sock->sk; (*sk)->sk_allocation = GFP_ATOMIC; (*sk)->sk_use_task_frag = false; /* * Unhash it so that IP input processing does not even see it, * we do not wish this socket to see incoming packets. */ (*sk)->sk_prot->unhash(*sk); } return rc; } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; int i; for_each_possible_cpu(i) res += snmp_get_cpu_field(mib, i, offt); return res; } EXPORT_SYMBOL_GPL(snmp_fold_field); #if BITS_PER_LONG==32 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt, size_t syncp_offset) { void *bhptr; struct u64_stats_sync *syncp; u64 v; unsigned int start; bhptr = per_cpu_ptr(mib, cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin(syncp); v = *(((u64 *)bhptr) + offt); } while (u64_stats_fetch_retry(syncp, start)); return v; } EXPORT_SYMBOL_GPL(snmp_get_cpu_field64); u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) { u64 res = 0; int cpu; for_each_possible_cpu(cpu) { res += snmp_get_cpu_field64(mib, cpu, offt, syncp_offset); } return res; } EXPORT_SYMBOL_GPL(snmp_fold_field64); #endif #ifdef CONFIG_IP_MULTICAST static const struct net_protocol igmp_protocol = { .handler = igmp_rcv, }; #endif static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, .no_policy = 1, }; static __net_init int ipv4_mib_init_net(struct net *net) { int i; net->mib.tcp_statistics = alloc_percpu(struct tcp_mib); if (!net->mib.tcp_statistics) goto err_tcp_mib; net->mib.ip_statistics = alloc_percpu(struct ipstats_mib); if (!net->mib.ip_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet_stats; af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i); u64_stats_init(&af_inet_stats->syncp); } net->mib.net_statistics = alloc_percpu(struct linux_mib); if (!net->mib.net_statistics) goto err_net_mib; net->mib.udp_statistics = alloc_percpu(struct udp_mib); if (!net->mib.udp_statistics) goto err_udp_mib; net->mib.udplite_statistics = alloc_percpu(struct udp_mib); if (!net->mib.udplite_statistics) goto err_udplite_mib; net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); if (!net->mib.icmp_statistics) goto err_icmp_mib; net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), GFP_KERNEL); if (!net->mib.icmpmsg_statistics) goto err_icmpmsg_mib; tcp_mib_init(net); return 0; err_icmpmsg_mib: free_percpu(net->mib.icmp_statistics); err_icmp_mib: free_percpu(net->mib.udplite_statistics); err_udplite_mib: free_percpu(net->mib.udp_statistics); err_udp_mib: free_percpu(net->mib.net_statistics); err_net_mib: free_percpu(net->mib.ip_statistics); err_ip_mib: free_percpu(net->mib.tcp_statistics); err_tcp_mib: return -ENOMEM; } static __net_exit void ipv4_mib_exit_net(struct net *net) { kfree(net->mib.icmpmsg_statistics); free_percpu(net->mib.icmp_statistics); free_percpu(net->mib.udplite_statistics); free_percpu(net->mib.udp_statistics); free_percpu(net->mib.net_statistics); free_percpu(net->mib.ip_statistics); free_percpu(net->mib.tcp_statistics); #ifdef CONFIG_MPTCP /* allocated on demand, see mptcp_init_sock() */ free_percpu(net->mib.mptcp_statistics); #endif } static __net_initdata struct pernet_operations ipv4_mib_ops = { .init = ipv4_mib_init_net, .exit = ipv4_mib_exit_net, }; static int __init init_ipv4_mibs(void) { return register_pernet_subsys(&ipv4_mib_ops); } static __net_init int inet_init_net(struct net *net) { /* * Set defaults for local port range */ net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u; seqlock_init(&net->ipv4.ping_group_range.lock); /* * Sane defaults - nobody may create ping sockets. * Boot scripts should set this to distro-specific group. */ net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); /* Default values for sysctl-controlled parameters. * We set them here, in case sysctl is not compiled. */ net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; net->ipv4.sysctl_ip_fwd_update_priority = 1; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; net->ipv4.sysctl_udp_early_demux = 1; net->ipv4.sysctl_tcp_early_demux = 1; net->ipv4.sysctl_nexthop_compat_mode = 1; #ifdef CONFIG_SYSCTL net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif /* Some igmp sysctl, whose values are always used */ net->ipv4.sysctl_igmp_max_memberships = 20; net->ipv4.sysctl_igmp_max_msf = 10; /* IGMP reports for link-local multicast groups are enabled by default */ net->ipv4.sysctl_igmp_llm_reports = 1; net->ipv4.sysctl_igmp_qrv = 2; net->ipv4.sysctl_fib_notify_on_flag_change = 0; return 0; } static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, }; static int __init init_inet_pernet_ops(void) { return register_pernet_subsys(&af_inet_ops); } static int ipv4_proc_init(void); /* * IP protocol layer initialiser */ static const struct net_offload ipip_offload = { .callbacks = { .gso_segment = ipip_gso_segment, .gro_receive = ipip_gro_receive, .gro_complete = ipip_gro_complete, }, }; static int __init ipip_offload_init(void) { return inet_add_offload(&ipip_offload, IPPROTO_IPIP); } static int __init ipv4_offload_init(void) { /* * Add offloads */ if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (tcpv4_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); if (ipip_offload_init() < 0) pr_crit("%s: Cannot add IPIP protocol offload\n", __func__); net_hotdata.ip_packet_offload = (struct packet_offload) { .type = cpu_to_be16(ETH_P_IP), .callbacks = { .gso_segment = inet_gso_segment, .gro_receive = inet_gro_receive, .gro_complete = inet_gro_complete, }, }; dev_add_offload(&net_hotdata.ip_packet_offload); return 0; } fs_initcall(ipv4_offload_init); static struct packet_type ip_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, .list_func = ip_list_rcv, }; static int __init inet_init(void) { struct inet_protosw *q; struct list_head *r; int rc; sock_skb_cb_check_size(sizeof(struct inet_skb_parm)); raw_hashinfo_init(&raw_v4_hashinfo); rc = proto_register(&tcp_prot, 1); if (rc) goto out; rc = proto_register(&udp_prot, 1); if (rc) goto out_unregister_tcp_proto; rc = proto_register(&raw_prot, 1); if (rc) goto out_unregister_udp_proto; rc = proto_register(&ping_prot, 1); if (rc) goto out_unregister_raw_proto; /* * Tell SOCKET that we are alive... */ (void)sock_register(&inet_family_ops); #ifdef CONFIG_SYSCTL ip_static_sysctl_init(); #endif /* * Add all the base protocols. */ if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) pr_crit("%s: Cannot add ICMP protocol\n", __func__); net_hotdata.udp_protocol = (struct net_protocol) { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, }; if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0) pr_crit("%s: Cannot add UDP protocol\n", __func__); net_hotdata.tcp_protocol = (struct net_protocol) { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, .icmp_strict_tag_validation = 1, }; if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0) pr_crit("%s: Cannot add TCP protocol\n", __func__); #ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) pr_crit("%s: Cannot add IGMP protocol\n", __func__); #endif /* Register the socket-side information for inet_create. */ for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q); /* * Set the ARP module up */ arp_init(); /* * Set the IP module up */ ip_init(); /* Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) panic("%s: Cannot init ipv4 mibs\n", __func__); /* Setup TCP slab cache for open requests. */ tcp_init(); /* Setup UDP memory threshold */ udp_init(); /* Add UDP-Lite (RFC 3828) */ udplite4_register(); raw_init(); ping_init(); /* * Set the ICMP layer up */ if (icmp_init() < 0) panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) if (ip_mr_init()) pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); ipv4_proc_init(); ipfrag_init(); dev_add_pack(&ip_packet_type); ip_tunnel_core_init(); rc = 0; out: return rc; out_unregister_raw_proto: proto_unregister(&raw_prot); out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); goto out; } fs_initcall(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS static int __init ipv4_proc_init(void) { int rc = 0; if (raw_proc_init()) goto out_raw; if (tcp4_proc_init()) goto out_tcp; if (udp4_proc_init()) goto out_udp; if (ping_proc_init()) goto out_ping; if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: ping_proc_exit(); out_ping: udp4_proc_exit(); out_udp: tcp4_proc_exit(); out_tcp: raw_proc_exit(); out_raw: rc = -ENOMEM; goto out; } #else /* CONFIG_PROC_FS */ static int __init ipv4_proc_init(void) { return 0; } #endif /* CONFIG_PROC_FS */ |
16 16 10 16 16 16 16 1 1 10 16 6 1 14 11 14 18 2 15 16 4 1 4 1 6 14 6 6 6 6 1 2 5 1 7 1 7 5 4 7 1 3 4 6 1 5 5 4 1 18 11 4 3 7 1 5 1 2 1 2 2 1 1 16 13 3 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 | /* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README * * Trivial changes by Alan Cox to add the LFS fixes * * Trivial Changes: * Rights granted to Hans Reiser to redistribute under other terms providing * he accepts all liability including but not limited to patent, fitness * for purpose, and direct or indirect claims arising from failure to perform. * * NO WARRANTY */ #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/time.h> #include <linux/uaccess.h> #include "reiserfs.h" #include "acl.h" #include "xattr.h" #include <linux/init.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/quotaops.h> #include <linux/vfs.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/crc32.h> #include <linux/seq_file.h> struct file_system_type reiserfs_fs_type; static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING; static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; int is_reiserfs_3_5(struct reiserfs_super_block *rs) { return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string, strlen(reiserfs_3_5_magic_string)); } int is_reiserfs_3_6(struct reiserfs_super_block *rs) { return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string, strlen(reiserfs_3_6_magic_string)); } int is_reiserfs_jr(struct reiserfs_super_block *rs) { return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string, strlen(reiserfs_jr_magic_string)); } static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) { return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) || is_reiserfs_jr(rs)); } static int reiserfs_remount(struct super_block *s, int *flags, char *data); static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); static int reiserfs_sync_fs(struct super_block *s, int wait) { struct reiserfs_transaction_handle th; /* * Writeback quota in non-journalled quota case - journalled quota has * no dirty dquots */ dquot_writeback_dquots(s, -1); reiserfs_write_lock(s); if (!journal_begin(&th, s, 1)) if (!journal_end_sync(&th)) reiserfs_flush_old_commits(s); reiserfs_write_unlock(s); return 0; } static void flush_old_commits(struct work_struct *work) { struct reiserfs_sb_info *sbi; struct super_block *s; sbi = container_of(work, struct reiserfs_sb_info, old_work.work); s = sbi->s_journal->j_work_sb; /* * We need s_umount for protecting quota writeback. We have to use * trylock as reiserfs_cancel_old_flush() may be waiting for this work * to complete with s_umount held. */ if (!down_read_trylock(&s->s_umount)) { /* Requeue work if we are not cancelling it */ spin_lock(&sbi->old_work_lock); if (sbi->work_queued == 1) queue_delayed_work(system_long_wq, &sbi->old_work, HZ); spin_unlock(&sbi->old_work_lock); return; } spin_lock(&sbi->old_work_lock); /* Avoid clobbering the cancel state... */ if (sbi->work_queued == 1) sbi->work_queued = 0; spin_unlock(&sbi->old_work_lock); reiserfs_sync_fs(s, 1); up_read(&s->s_umount); } void reiserfs_schedule_old_flush(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); unsigned long delay; /* * Avoid scheduling flush when sb is being shut down. It can race * with journal shutdown and free still queued delayed work. */ if (sb_rdonly(s) || !(s->s_flags & SB_ACTIVE)) return; spin_lock(&sbi->old_work_lock); if (!sbi->work_queued) { delay = msecs_to_jiffies(dirty_writeback_interval * 10); queue_delayed_work(system_long_wq, &sbi->old_work, delay); sbi->work_queued = 1; } spin_unlock(&sbi->old_work_lock); } void reiserfs_cancel_old_flush(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); spin_lock(&sbi->old_work_lock); /* Make sure no new flushes will be queued */ sbi->work_queued = 2; spin_unlock(&sbi->old_work_lock); cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); } static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; reiserfs_cancel_old_flush(s); reiserfs_write_lock(s); if (!sb_rdonly(s)) { int err = journal_begin(&th, s, 1); if (err) { reiserfs_block_writes(&th); } else { reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); reiserfs_block_writes(&th); journal_end_sync(&th); } } reiserfs_write_unlock(s); return 0; } static int reiserfs_unfreeze(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); reiserfs_allow_writes(s); spin_lock(&sbi->old_work_lock); /* Allow old_work to run again */ sbi->work_queued = 0; spin_unlock(&sbi->old_work_lock); return 0; } extern const struct in_core_key MAX_IN_CORE_KEY; /* * this is used to delete "save link" when there are no items of a * file it points to. It can either happen if unlink is completed but * "save unlink" removal, or if file has both unlink and truncate * pending and as unlink completes first (because key of "save link" * protecting unlink is bigger that a key lf "save link" which * protects truncate), so there left no items to make truncate * completion on */ static int remove_save_link_only(struct super_block *s, struct reiserfs_key *key, int oid_free) { struct reiserfs_transaction_handle th; int err; /* we are going to do one balancing */ err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT); if (err) return err; reiserfs_delete_solid_item(&th, NULL, key); if (oid_free) /* removals are protected by direct items */ reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); return journal_end(&th); } #ifdef CONFIG_QUOTA static int reiserfs_quota_on_mount(struct super_block *, int); #endif /* * Look for uncompleted unlinks and truncates and complete them * * Called with superblock write locked. If quotas are enabled, we have to * release/retake lest we call dquot_quota_on_mount(), proceed to * schedule_on_each_cpu() in invalidate_bdev() and deadlock waiting for the per * cpu worklets to complete flush_async_commits() that in turn wait for the * superblock write lock. */ static int finish_unfinished(struct super_block *s) { INITIALIZE_PATH(path); struct cpu_key max_cpu_key, obj_key; struct reiserfs_key save_link_key, last_inode_key; int retval = 0; struct item_head *ih; struct buffer_head *bh; int item_pos; char *item; int done; struct inode *inode; int truncate; #ifdef CONFIG_QUOTA int i; int ms_active_set; int quota_enabled[REISERFS_MAXQUOTAS]; #endif /* compose key to look for "save" links */ max_cpu_key.version = KEY_FORMAT_3_5; max_cpu_key.on_disk_key.k_dir_id = ~0U; max_cpu_key.on_disk_key.k_objectid = ~0U; set_cpu_key_k_offset(&max_cpu_key, ~0U); max_cpu_key.key_length = 3; memset(&last_inode_key, 0, sizeof(last_inode_key)); #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ if (s->s_flags & SB_ACTIVE) { ms_active_set = 0; } else { ms_active_set = 1; s->s_flags |= SB_ACTIVE; } /* Turn on quotas so that they are updated correctly */ for (i = 0; i < REISERFS_MAXQUOTAS; i++) { quota_enabled[i] = 1; if (REISERFS_SB(s)->s_qf_names[i]) { int ret; if (sb_has_quota_active(s, i)) { quota_enabled[i] = 0; continue; } reiserfs_write_unlock(s); ret = reiserfs_quota_on_mount(s, i); reiserfs_write_lock(s); if (ret < 0) reiserfs_warning(s, "reiserfs-2500", "cannot turn on journaled " "quota: error %d", ret); } } #endif done = 0; REISERFS_SB(s)->s_is_unlinked_ok = 1; while (!retval) { int depth; retval = search_item(s, &max_cpu_key, &path); if (retval != ITEM_NOT_FOUND) { reiserfs_error(s, "vs-2140", "search_by_key returned %d", retval); break; } bh = get_last_bh(&path); item_pos = get_item_pos(&path); if (item_pos != B_NR_ITEMS(bh)) { reiserfs_warning(s, "vs-2060", "wrong position found"); break; } item_pos--; ih = item_head(bh, item_pos); if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) /* there are no "save" links anymore */ break; save_link_key = ih->ih_key; if (is_indirect_le_ih(ih)) truncate = 1; else truncate = 0; /* reiserfs_iget needs k_dirid and k_objectid only */ item = ih_item_body(bh, ih); obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); obj_key.on_disk_key.k_objectid = le32_to_cpu(ih->ih_key.k_objectid); obj_key.on_disk_key.k_offset = 0; obj_key.on_disk_key.k_type = 0; pathrelse(&path); inode = reiserfs_iget(s, &obj_key); if (IS_ERR_OR_NULL(inode)) { /* * the unlink almost completed, it just did not * manage to remove "save" link and release objectid */ reiserfs_warning(s, "vs-2180", "iget failed for %K", &obj_key); retval = remove_save_link_only(s, &save_link_key, 1); continue; } if (!truncate && inode->i_nlink) { /* file is not unlinked */ reiserfs_warning(s, "vs-2185", "file %K is not unlinked", &obj_key); retval = remove_save_link_only(s, &save_link_key, 0); continue; } depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_initialize(inode); reiserfs_write_lock_nested(inode->i_sb, depth); if (truncate && S_ISDIR(inode->i_mode)) { /* * We got a truncate request for a dir which * is impossible. The only imaginable way is to * execute unfinished truncate request then boot * into old kernel, remove the file and create dir * with the same key. */ reiserfs_warning(s, "green-2101", "impossible truncate on a " "directory %k. Please report", INODE_PKEY(inode)); retval = remove_save_link_only(s, &save_link_key, 0); truncate = 0; iput(inode); continue; } if (truncate) { REISERFS_I(inode)->i_flags |= i_link_saved_truncate_mask; /* * not completed truncate found. New size was * committed together with "save" link */ reiserfs_info(s, "Truncating %k to %lld ..", INODE_PKEY(inode), inode->i_size); /* don't update modification time */ reiserfs_truncate_file(inode, 0); retval = remove_save_link(inode, truncate); } else { REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; /* not completed unlink (rmdir) found */ reiserfs_info(s, "Removing %k..", INODE_PKEY(inode)); if (memcmp(&last_inode_key, INODE_PKEY(inode), sizeof(last_inode_key))){ last_inode_key = *INODE_PKEY(inode); /* removal gets completed in iput */ retval = 0; } else { reiserfs_warning(s, "super-2189", "Dead loop " "in finish_unfinished " "detected, just remove " "save link\n"); retval = remove_save_link_only(s, &save_link_key, 0); } } iput(inode); printk("done\n"); done++; } REISERFS_SB(s)->s_is_unlinked_ok = 0; #ifdef CONFIG_QUOTA /* Turn quotas off */ reiserfs_write_unlock(s); for (i = 0; i < REISERFS_MAXQUOTAS; i++) { if (sb_dqopt(s)->files[i] && quota_enabled[i]) dquot_quota_off(s, i); } reiserfs_write_lock(s); if (ms_active_set) /* Restore the flag back */ s->s_flags &= ~SB_ACTIVE; #endif pathrelse(&path); if (done) reiserfs_info(s, "There were %d uncompleted unlinks/truncates. " "Completed\n", done); return retval; } /* * to protect file being unlinked from getting lost we "safe" link files * being unlinked. This link will be deleted in the same transaction with last * item of file. mounting the filesystem we scan all these links and remove * files which almost got lost */ void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate) { INITIALIZE_PATH(path); int retval; struct cpu_key key; struct item_head ih; __le32 link; BUG_ON(!th->t_trans_id); /* file can only get one "save link" of each kind */ RFALSE(truncate && (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask), "saved link already exists for truncated inode %lx", (long)inode->i_ino); RFALSE(!truncate && (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask), "saved link already exists for unlinked inode %lx", (long)inode->i_ino); /* setup key of "save" link */ key.version = KEY_FORMAT_3_5; key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; key.on_disk_key.k_objectid = inode->i_ino; if (!truncate) { /* unlink, rmdir, rename */ set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize); set_cpu_key_k_type(&key, TYPE_DIRECT); /* item head of "safe" link */ make_le_item_head(&ih, &key, key.version, 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, 4 /*length */ , 0xffff /*free space */ ); } else { /* truncate */ if (S_ISDIR(inode->i_mode)) reiserfs_warning(inode->i_sb, "green-2102", "Adding a truncate savelink for " "a directory %k! Please report", INODE_PKEY(inode)); set_cpu_key_k_offset(&key, 1); set_cpu_key_k_type(&key, TYPE_INDIRECT); /* item head of "safe" link */ make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT, 4 /*length */ , 0 /*free space */ ); } key.key_length = 3; /* look for its place in the tree */ retval = search_item(inode->i_sb, &key, &path); if (retval != ITEM_NOT_FOUND) { if (retval != -ENOSPC) reiserfs_error(inode->i_sb, "vs-2100", "search_by_key (%K) returned %d", &key, retval); pathrelse(&path); return; } /* body of "save" link */ link = INODE_PKEY(inode)->k_dir_id; /* put "save" link into tree, don't charge quota to anyone */ retval = reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); if (retval) { if (retval != -ENOSPC) reiserfs_error(inode->i_sb, "vs-2120", "insert_item returned %d", retval); } else { if (truncate) REISERFS_I(inode)->i_flags |= i_link_saved_truncate_mask; else REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; } } /* this opens transaction unlike add_save_link */ int remove_save_link(struct inode *inode, int truncate) { struct reiserfs_transaction_handle th; struct reiserfs_key key; int err; /* we are going to do one balancing only */ err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); if (err) return err; /* setup key of "save" link */ key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID); key.k_objectid = INODE_PKEY(inode)->k_objectid; if (!truncate) { /* unlink, rmdir, rename */ set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1 + inode->i_sb->s_blocksize); set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT); } else { /* truncate */ set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1); set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT); } if ((truncate && (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) || (!truncate && (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask))) /* don't take quota bytes from anywhere */ reiserfs_delete_solid_item(&th, NULL, &key); if (!truncate) { reiserfs_release_objectid(&th, inode->i_ino); REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask; } else REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; return journal_end(&th); } static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { reiserfs_proc_info_done(s); /* * Force any pending inode evictions to occur now. Any * inodes to be removed that have extended attributes * associated with them need to clean them up before * we can release the extended attribute root dentries. * shrink_dcache_for_umount will BUG if we don't release * those before it's called so ->put_super is too late. */ shrink_dcache_sb(s); dput(REISERFS_SB(s)->xattr_root); REISERFS_SB(s)->xattr_root = NULL; dput(REISERFS_SB(s)->priv_root); REISERFS_SB(s)->priv_root = NULL; } kill_block_super(s); } #ifdef CONFIG_QUOTA static int reiserfs_quota_off(struct super_block *sb, int type); static void reiserfs_quota_off_umount(struct super_block *s) { int type; for (type = 0; type < REISERFS_MAXQUOTAS; type++) reiserfs_quota_off(s, type); } #else static inline void reiserfs_quota_off_umount(struct super_block *s) { } #endif static void reiserfs_put_super(struct super_block *s) { struct reiserfs_transaction_handle th; th.t_trans_id = 0; reiserfs_quota_off_umount(s); reiserfs_write_lock(s); /* * change file system state to current state if it was mounted * with read-write permissions */ if (!sb_rdonly(s)) { if (!journal_begin(&th, s, 10)) { reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), REISERFS_SB(s)->s_mount_state); journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); } } /* * note, journal_release checks for readonly mount, and can * decide not to do a journal_end */ journal_release(&th, s); reiserfs_free_bitmap_cache(s); brelse(SB_BUFFER_WITH_SB(s)); print_statistics(s); if (REISERFS_SB(s)->reserved_blocks != 0) { reiserfs_warning(s, "green-2005", "reserved blocks left %d", REISERFS_SB(s)->reserved_blocks); } reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); destroy_workqueue(REISERFS_SB(s)->commit_wq); kfree(REISERFS_SB(s)->s_jdev); kfree(s->s_fs_info); s->s_fs_info = NULL; } static struct kmem_cache *reiserfs_inode_cachep; static struct inode *reiserfs_alloc_inode(struct super_block *sb) { struct reiserfs_inode_info *ei; ei = alloc_inode_sb(sb, reiserfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; atomic_set(&ei->openers, 0); mutex_init(&ei->tailpack); #ifdef CONFIG_QUOTA memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif return &ei->vfs_inode; } static void reiserfs_free_inode(struct inode *inode) { kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } static void init_once(void *foo) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); } static int __init init_inodecache(void) { reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", sizeof(struct reiserfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), init_once); if (reiserfs_inode_cachep == NULL) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(reiserfs_inode_cachep); } /* we don't mark inodes dirty, we just log them */ static void reiserfs_dirty_inode(struct inode *inode, int flags) { struct reiserfs_transaction_handle th; int err = 0; if (sb_rdonly(inode->i_sb)) { reiserfs_warning(inode->i_sb, "clm-6006", "writing inode %lu on readonly FS", inode->i_ino); return; } reiserfs_write_lock(inode->i_sb); /* * this is really only used for atime updates, so they don't have * to be included in O_SYNC or fsync */ err = journal_begin(&th, inode->i_sb, 1); if (err) goto out; reiserfs_update_sd(&th, inode); journal_end(&th); out: reiserfs_write_unlock(inode->i_sb); } static int reiserfs_show_options(struct seq_file *seq, struct dentry *root) { struct super_block *s = root->d_sb; struct reiserfs_journal *journal = SB_JOURNAL(s); long opts = REISERFS_SB(s)->s_mount_opt; if (opts & (1 << REISERFS_LARGETAIL)) seq_puts(seq, ",tails=on"); else if (!(opts & (1 << REISERFS_SMALLTAIL))) seq_puts(seq, ",notail"); /* tails=small is default so we don't show it */ if (!(opts & (1 << REISERFS_BARRIER_FLUSH))) seq_puts(seq, ",barrier=none"); /* barrier=flush is default so we don't show it */ if (opts & (1 << REISERFS_ERROR_CONTINUE)) seq_puts(seq, ",errors=continue"); else if (opts & (1 << REISERFS_ERROR_PANIC)) seq_puts(seq, ",errors=panic"); /* errors=ro is default so we don't show it */ if (opts & (1 << REISERFS_DATA_LOG)) seq_puts(seq, ",data=journal"); else if (opts & (1 << REISERFS_DATA_WRITEBACK)) seq_puts(seq, ",data=writeback"); /* data=ordered is default so we don't show it */ if (opts & (1 << REISERFS_ATTRS)) seq_puts(seq, ",attrs"); if (opts & (1 << REISERFS_XATTRS_USER)) seq_puts(seq, ",user_xattr"); if (opts & (1 << REISERFS_EXPOSE_PRIVROOT)) seq_puts(seq, ",expose_privroot"); if (opts & (1 << REISERFS_POSIXACL)) seq_puts(seq, ",acl"); if (REISERFS_SB(s)->s_jdev) seq_show_option(seq, "jdev", REISERFS_SB(s)->s_jdev); if (journal->j_max_commit_age != journal->j_default_max_commit_age) seq_printf(seq, ",commit=%d", journal->j_max_commit_age); #ifdef CONFIG_QUOTA if (REISERFS_SB(s)->s_qf_names[USRQUOTA]) seq_show_option(seq, "usrjquota", REISERFS_SB(s)->s_qf_names[USRQUOTA]); else if (opts & (1 << REISERFS_USRQUOTA)) seq_puts(seq, ",usrquota"); if (REISERFS_SB(s)->s_qf_names[GRPQUOTA]) seq_show_option(seq, "grpjquota", REISERFS_SB(s)->s_qf_names[GRPQUOTA]); else if (opts & (1 << REISERFS_GRPQUOTA)) seq_puts(seq, ",grpquota"); if (REISERFS_SB(s)->s_jquota_fmt) { if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD) seq_puts(seq, ",jqfmt=vfsold"); else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0) seq_puts(seq, ",jqfmt=vfsv0"); } #endif /* Block allocator options */ if (opts & (1 << REISERFS_NO_BORDER)) seq_puts(seq, ",block-allocator=noborder"); if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION)) seq_puts(seq, ",block-allocator=no_unhashed_relocation"); if (opts & (1 << REISERFS_HASHED_RELOCATION)) seq_puts(seq, ",block-allocator=hashed_relocation"); if (opts & (1 << REISERFS_TEST4)) seq_puts(seq, ",block-allocator=test4"); show_alloc_options(seq, s); return 0; } #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, loff_t); static struct dquot __rcu **reiserfs_get_dquots(struct inode *inode) { return REISERFS_I(inode)->i_dquot; } #endif static const struct super_operations reiserfs_sops = { .alloc_inode = reiserfs_alloc_inode, .free_inode = reiserfs_free_inode, .write_inode = reiserfs_write_inode, .dirty_inode = reiserfs_dirty_inode, .evict_inode = reiserfs_evict_inode, .put_super = reiserfs_put_super, .sync_fs = reiserfs_sync_fs, .freeze_fs = reiserfs_freeze, .unfreeze_fs = reiserfs_unfreeze, .statfs = reiserfs_statfs, .remount_fs = reiserfs_remount, .show_options = reiserfs_show_options, #ifdef CONFIG_QUOTA .quota_read = reiserfs_quota_read, .quota_write = reiserfs_quota_write, .get_dquots = reiserfs_get_dquots, #endif }; #ifdef CONFIG_QUOTA #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") static int reiserfs_write_dquot(struct dquot *); static int reiserfs_acquire_dquot(struct dquot *); static int reiserfs_release_dquot(struct dquot *); static int reiserfs_mark_dquot_dirty(struct dquot *); static int reiserfs_write_info(struct super_block *, int); static int reiserfs_quota_on(struct super_block *, int, int, const struct path *); static const struct dquot_operations reiserfs_quota_operations = { .write_dquot = reiserfs_write_dquot, .acquire_dquot = reiserfs_acquire_dquot, .release_dquot = reiserfs_release_dquot, .mark_dirty = reiserfs_mark_dquot_dirty, .write_info = reiserfs_write_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_next_id = dquot_get_next_id, }; static const struct quotactl_ops reiserfs_qctl_operations = { .quota_on = reiserfs_quota_on, .quota_off = reiserfs_quota_off, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, .get_dqblk = dquot_get_dqblk, .set_dqblk = dquot_set_dqblk, }; #endif static const struct export_operations reiserfs_export_ops = { .encode_fh = reiserfs_encode_fh, .fh_to_dentry = reiserfs_fh_to_dentry, .fh_to_parent = reiserfs_fh_to_parent, .get_parent = reiserfs_get_parent, }; /* * this struct is used in reiserfs_getopt () for containing the value for * those mount options that have values rather than being toggles. */ typedef struct { char *value; /* * bitmask which is to set on mount_options bitmask * when this value is found, 0 is no bits are to be changed. */ int setmask; /* * bitmask which is to clear on mount_options bitmask * when this value is found, 0 is no bits are to be changed. * This is applied BEFORE setmask */ int clrmask; } arg_desc_t; /* Set this bit in arg_required to allow empty arguments */ #define REISERFS_OPT_ALLOWEMPTY 31 /* * this struct is used in reiserfs_getopt() for describing the * set of reiserfs mount options */ typedef struct { char *option_name; /* 0 if argument is not required, not 0 otherwise */ int arg_required; /* list of values accepted by an option */ const arg_desc_t *values; /* * bitmask which is to set on mount_options bitmask * when this value is found, 0 is no bits are to be changed. */ int setmask; /* * bitmask which is to clear on mount_options bitmask * when this value is found, 0 is no bits are to be changed. * This is applied BEFORE setmask */ int clrmask; } opt_desc_t; /* possible values for -o data= */ static const arg_desc_t logging_mode[] = { {"ordered", 1 << REISERFS_DATA_ORDERED, (1 << REISERFS_DATA_LOG | 1 << REISERFS_DATA_WRITEBACK)}, {"journal", 1 << REISERFS_DATA_LOG, (1 << REISERFS |