1 1 1 2 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Stadia controller rumble support. * * Copyright 2023 Google LLC */ #include <linux/hid.h> #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include "hid-ids.h" #define STADIA_FF_REPORT_ID 5 struct stadiaff_device { struct hid_device *hid; struct hid_report *report; spinlock_t lock; bool removed; uint16_t strong_magnitude; uint16_t weak_magnitude; struct work_struct work; }; static void stadiaff_work(struct work_struct *work) { struct stadiaff_device *stadiaff = container_of(work, struct stadiaff_device, work); struct hid_field *rumble_field = stadiaff->report->field[0]; unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); rumble_field->value[0] = stadiaff->strong_magnitude; rumble_field->value[1] = stadiaff->weak_magnitude; spin_unlock_irqrestore(&stadiaff->lock, flags); hid_hw_request(stadiaff->hid, stadiaff->report, HID_REQ_SET_REPORT); } static int stadiaff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct stadiaff_device *stadiaff = hid_get_drvdata(hid); unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); if (!stadiaff->removed) { stadiaff->strong_magnitude = effect->u.rumble.strong_magnitude; stadiaff->weak_magnitude = effect->u.rumble.weak_magnitude; schedule_work(&stadiaff->work); } spin_unlock_irqrestore(&stadiaff->lock, flags); return 0; } static int stadiaff_init(struct hid_device *hid) { struct stadiaff_device *stadiaff; struct hid_report *report; struct hid_input *hidinput; struct input_dev *dev; int error; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hid->inputs.next, struct hid_input, list); dev = hidinput->input; report = hid_validate_values(hid, HID_OUTPUT_REPORT, STADIA_FF_REPORT_ID, 0, 2); if (!report) return -ENODEV; stadiaff = devm_kzalloc(&hid->dev, sizeof(struct stadiaff_device), GFP_KERNEL); if (!stadiaff) return -ENOMEM; hid_set_drvdata(hid, stadiaff); input_set_capability(dev, EV_FF, FF_RUMBLE); error = input_ff_create_memless(dev, NULL, stadiaff_play); if (error) return error; stadiaff->removed = false; stadiaff->hid = hid; stadiaff->report = report; INIT_WORK(&stadiaff->work, stadiaff_work); spin_lock_init(&stadiaff->lock); hid_info(hid, "Force Feedback for Google Stadia controller\n"); return 0; } static int stadia_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } ret = stadiaff_init(hdev); if (ret) { hid_err(hdev, "force feedback init failed\n"); hid_hw_stop(hdev); return ret; } return 0; } static void stadia_remove(struct hid_device *hid) { struct stadiaff_device *stadiaff = hid_get_drvdata(hid); unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); stadiaff->removed = true; spin_unlock_irqrestore(&stadiaff->lock, flags); cancel_work_sync(&stadiaff->work); hid_hw_stop(hid); } static const struct hid_device_id stadia_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STADIA) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STADIA) }, { } }; MODULE_DEVICE_TABLE(hid, stadia_devices); static struct hid_driver stadia_driver = { .name = "stadia", .id_table = stadia_devices, .probe = stadia_probe, .remove = stadia_remove, }; module_hid_driver(stadia_driver); MODULE_DESCRIPTION("Google Stadia controller rumble support."); MODULE_LICENSE("GPL"); |
138 131 138 138 78 18 79 1 92 19 92 14 138 137 137 79 79 138 138 138 138 138 138 138 138 138 136 136 137 92 92 79 5 92 78 5 92 2 2 2 2 2 137 138 2 137 137 18 137 18 18 2 79 79 137 138 138 138 135 11 138 135 8 8 134 134 138 43 140 68 68 103 66 104 103 7 47 100 3 139 61 89 4 138 138 137 138 1 1 138 138 33 117 117 117 115 114 2 117 137 137 136 137 137 136 137 138 138 138 138 138 138 137 138 138 138 135 135 135 17 5 61 5 57 33 33 138 138 136 138 138 138 138 138 138 1 138 138 138 138 137 138 137 138 138 138 3 3 3 138 138 1 137 138 138 138 138 138 136 137 137 137 1 137 104 104 53 68 104 137 137 137 94 137 137 94 136 136 137 137 137 104 135 135 135 135 135 50 135 11 138 4 102 103 53 55 55 55 54 52 55 116 1 116 116 71 55 71 65 65 74 53 74 65 23 22 23 4 23 43 43 9 42 43 8 43 20 20 20 3 20 20 20 20 44 27 27 44 26 27 27 27 17 15 44 45 45 45 45 10 10 10 10 10 3 3 2 2 2 39 39 39 6 19 44 26 27 20 19 15 19 20 19 17 13 2 2 20 20 20 20 20 12 11 11 11 15 13 15 20 19 20 6 6 6 3 3 5 5 3 3 1 1 1 5 2 38 24 21 1 21 20 5 16 1 19 10 10 21 22 19 33 32 30 31 37 20 20 20 20 20 19 20 19 20 18 16 15 15 15 15 15 13 15 16 16 17 19 2 19 19 2 2 2 2 2 2 2 24 19 19 19 19 19 19 19 18 18 22 23 23 3 5 4 3 7 8 8 8 8 8 10 10 9 9 10 10 10 10 2 1 3 4 2 2 4 4 4 2 2 2 34 7 34 31 7 6 3 1 1 3 3 3 4 6 28 28 27 26 3 3 2 27 34 1 1 7 6 6 2 5 4 6 5 4 4 3 2 2 1 1 1 4 3 3 2 4 2 1 1 1 1 8 7 3 6 4 2 1 4 1 5 3 2 2 2 2 3 4 20 53 53 53 53 53 23 53 53 53 25 24 23 25 54 15 54 54 54 54 54 54 54 53 2 53 53 54 1 53 54 54 54 54 54 54 54 28 54 40 54 51 54 54 54 2 2 3 3 3 22 14 22 22 22 22 22 22 123 2 121 2 118 12 12 106 105 6 16 20 20 17 17 1 1 4 3 3 3 24 24 23 4 22 1 1 13 13 11 1 1 3 8 8 5 5 5 11 10 1 1 9 1 2 123 1 1 9 9 20 2 7 2 5 1 21 21 21 46 46 46 5 1 4 4 17 1 16 16 29 14 14 14 14 5 5 29 27 27 27 17 17 26 26 29 12 6 4 4 12 11 10 10 4 10 9 9 7 7 6 6 6 6 6 7 7 6 6 6 2 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Digital Audio (PCM) abstract layer / OSS compatible * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #if 0 #define PLUGIN_DEBUG #endif #if 0 #define OSS_DEBUG #endif #include <linux/init.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/time.h> #include <linux/vmalloc.h> #include <linux/module.h> #include <linux/math64.h> #include <linux/string.h> #include <linux/compat.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include "pcm_plugin.h" #include <sound/info.h> #include <linux/soundcard.h> #include <sound/initval.h> #include <sound/mixer_oss.h> #define OSS_ALSAEMULVER _SIOR ('M', 249, int) static int dsp_map[SNDRV_CARDS]; static int adsp_map[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 1}; static bool nonblock_open = 1; MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, Abramo Bagnara <abramo@alsa-project.org>"); MODULE_DESCRIPTION("PCM OSS emulation for ALSA."); MODULE_LICENSE("GPL"); module_param_array(dsp_map, int, NULL, 0444); MODULE_PARM_DESC(dsp_map, "PCM device number assigned to 1st OSS device."); module_param_array(adsp_map, int, NULL, 0444); MODULE_PARM_DESC(adsp_map, "PCM device number assigned to 2nd OSS device."); module_param(nonblock_open, bool, 0644); MODULE_PARM_DESC(nonblock_open, "Don't block opening busy PCM devices."); MODULE_ALIAS_SNDRV_MINOR(SNDRV_MINOR_OSS_PCM); MODULE_ALIAS_SNDRV_MINOR(SNDRV_MINOR_OSS_PCM1); static int snd_pcm_oss_get_rate(struct snd_pcm_oss_file *pcm_oss_file); static int snd_pcm_oss_get_channels(struct snd_pcm_oss_file *pcm_oss_file); static int snd_pcm_oss_get_format(struct snd_pcm_oss_file *pcm_oss_file); /* * helper functions to process hw_params */ static int snd_interval_refine_min(struct snd_interval *i, unsigned int min, int openmin) { int changed = 0; if (i->min < min) { i->min = min; i->openmin = openmin; changed = 1; } else if (i->min == min && !i->openmin && openmin) { i->openmin = 1; changed = 1; } if (i->integer) { if (i->openmin) { i->min++; i->openmin = 0; } } if (snd_interval_checkempty(i)) { snd_interval_none(i); return -EINVAL; } return changed; } static int snd_interval_refine_max(struct snd_interval *i, unsigned int max, int openmax) { int changed = 0; if (i->max > max) { i->max = max; i->openmax = openmax; changed = 1; } else if (i->max == max && !i->openmax && openmax) { i->openmax = 1; changed = 1; } if (i->integer) { if (i->openmax) { i->max--; i->openmax = 0; } } if (snd_interval_checkempty(i)) { snd_interval_none(i); return -EINVAL; } return changed; } static int snd_interval_refine_set(struct snd_interval *i, unsigned int val) { struct snd_interval t; t.empty = 0; t.min = t.max = val; t.openmin = t.openmax = 0; t.integer = 1; return snd_interval_refine(i, &t); } /** * snd_pcm_hw_param_value_min * @params: the hw_params instance * @var: parameter to retrieve * @dir: pointer to the direction (-1,0,1) or NULL * * Return the minimum value for field PAR. */ static unsigned int snd_pcm_hw_param_value_min(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir) { if (hw_is_mask(var)) { if (dir) *dir = 0; return snd_mask_min(hw_param_mask_c(params, var)); } if (hw_is_interval(var)) { const struct snd_interval *i = hw_param_interval_c(params, var); if (dir) *dir = i->openmin; return snd_interval_min(i); } return -EINVAL; } /** * snd_pcm_hw_param_value_max * @params: the hw_params instance * @var: parameter to retrieve * @dir: pointer to the direction (-1,0,1) or NULL * * Return the maximum value for field PAR. */ static int snd_pcm_hw_param_value_max(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir) { if (hw_is_mask(var)) { if (dir) *dir = 0; return snd_mask_max(hw_param_mask_c(params, var)); } if (hw_is_interval(var)) { const struct snd_interval *i = hw_param_interval_c(params, var); if (dir) *dir = - (int) i->openmax; return snd_interval_max(i); } return -EINVAL; } static int _snd_pcm_hw_param_mask(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, const struct snd_mask *val) { int changed; changed = snd_mask_refine(hw_param_mask(params, var), val); if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } static int snd_pcm_hw_param_mask(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, const struct snd_mask *val) { int changed = _snd_pcm_hw_param_mask(params, var, val); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return 0; } static int _snd_pcm_hw_param_min(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed; int open = 0; if (dir) { if (dir > 0) { open = 1; } else if (dir < 0) { if (val > 0) { open = 1; val--; } } } if (hw_is_mask(var)) changed = snd_mask_refine_min(hw_param_mask(params, var), val + !!open); else if (hw_is_interval(var)) changed = snd_interval_refine_min(hw_param_interval(params, var), val, open); else return -EINVAL; if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /** * snd_pcm_hw_param_min * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @val: minimal value * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS remove from PAR all * values < VAL. Reduce configuration space accordingly. * Return new minimum or -EINVAL if the configuration space is empty */ static int snd_pcm_hw_param_min(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int *dir) { int changed = _snd_pcm_hw_param_min(params, var, val, dir ? *dir : 0); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return snd_pcm_hw_param_value_min(params, var, dir); } static int _snd_pcm_hw_param_max(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed; int open = 0; if (dir) { if (dir < 0) { open = 1; } else if (dir > 0) { open = 1; val++; } } if (hw_is_mask(var)) { if (val == 0 && open) { snd_mask_none(hw_param_mask(params, var)); changed = -EINVAL; } else changed = snd_mask_refine_max(hw_param_mask(params, var), val - !!open); } else if (hw_is_interval(var)) changed = snd_interval_refine_max(hw_param_interval(params, var), val, open); else return -EINVAL; if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /** * snd_pcm_hw_param_max * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @val: maximal value * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS remove from PAR all * values >= VAL + 1. Reduce configuration space accordingly. * Return new maximum or -EINVAL if the configuration space is empty */ static int snd_pcm_hw_param_max(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int *dir) { int changed = _snd_pcm_hw_param_max(params, var, val, dir ? *dir : 0); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return snd_pcm_hw_param_value_max(params, var, dir); } static int boundary_sub(int a, int adir, int b, int bdir, int *c, int *cdir) { adir = adir < 0 ? -1 : (adir > 0 ? 1 : 0); bdir = bdir < 0 ? -1 : (bdir > 0 ? 1 : 0); *c = a - b; *cdir = adir - bdir; if (*cdir == -2) { (*c)--; } else if (*cdir == 2) { (*c)++; } return 0; } static int boundary_lt(unsigned int a, int adir, unsigned int b, int bdir) { if (adir < 0) { a--; adir = 1; } else if (adir > 0) adir = 1; if (bdir < 0) { b--; bdir = 1; } else if (bdir > 0) bdir = 1; return a < b || (a == b && adir < bdir); } /* Return 1 if min is nearer to best than max */ static int boundary_nearer(int min, int mindir, int best, int bestdir, int max, int maxdir) { int dmin, dmindir; int dmax, dmaxdir; boundary_sub(best, bestdir, min, mindir, &dmin, &dmindir); boundary_sub(max, maxdir, best, bestdir, &dmax, &dmaxdir); return boundary_lt(dmin, dmindir, dmax, dmaxdir); } /** * snd_pcm_hw_param_near * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @best: value to set * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS set PAR to the available value * nearest to VAL. Reduce configuration space accordingly. * This function cannot be called for SNDRV_PCM_HW_PARAM_ACCESS, * SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_HW_PARAM_SUBFORMAT. * Return the value found. */ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int best, int *dir) { struct snd_pcm_hw_params *save __free(kfree) = NULL; int v; unsigned int saved_min; int last = 0; int min, max; int mindir, maxdir; int valdir = dir ? *dir : 0; /* FIXME */ if (best > INT_MAX) best = INT_MAX; min = max = best; mindir = maxdir = valdir; if (maxdir > 0) maxdir = 0; else if (maxdir == 0) maxdir = -1; else { maxdir = 1; max--; } save = kmalloc(sizeof(*save), GFP_KERNEL); if (save == NULL) return -ENOMEM; *save = *params; saved_min = min; min = snd_pcm_hw_param_min(pcm, params, var, min, &mindir); if (min >= 0) { struct snd_pcm_hw_params *params1 __free(kfree) = NULL; if (max < 0) goto _end; if ((unsigned int)min == saved_min && mindir == valdir) goto _end; params1 = kmalloc(sizeof(*params1), GFP_KERNEL); if (params1 == NULL) return -ENOMEM; *params1 = *save; max = snd_pcm_hw_param_max(pcm, params1, var, max, &maxdir); if (max < 0) goto _end; if (boundary_nearer(max, maxdir, best, valdir, min, mindir)) { *params = *params1; last = 1; } } else { *params = *save; max = snd_pcm_hw_param_max(pcm, params, var, max, &maxdir); if (max < 0) return max; last = 1; } _end: if (last) v = snd_pcm_hw_param_last(pcm, params, var, dir); else v = snd_pcm_hw_param_first(pcm, params, var, dir); return v; } static int _snd_pcm_hw_param_set(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed; if (hw_is_mask(var)) { struct snd_mask *m = hw_param_mask(params, var); if (val == 0 && dir < 0) { changed = -EINVAL; snd_mask_none(m); } else { if (dir > 0) val++; else if (dir < 0) val--; changed = snd_mask_refine_set(hw_param_mask(params, var), val); } } else if (hw_is_interval(var)) { struct snd_interval *i = hw_param_interval(params, var); if (val == 0 && dir < 0) { changed = -EINVAL; snd_interval_none(i); } else if (dir == 0) changed = snd_interval_refine_set(i, val); else { struct snd_interval t; t.openmin = 1; t.openmax = 1; t.empty = 0; t.integer = 0; if (dir < 0) { t.min = val - 1; t.max = val; } else { t.min = val; t.max = val+1; } changed = snd_interval_refine(i, &t); } } else return -EINVAL; if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /** * snd_pcm_hw_param_set * @pcm: PCM instance * @params: the hw_params instance * @var: parameter to retrieve * @val: value to set * @dir: pointer to the direction (-1,0,1) or NULL * * Inside configuration space defined by PARAMS remove from PAR all * values != VAL. Reduce configuration space accordingly. * Return VAL or -EINVAL if the configuration space is empty */ static int snd_pcm_hw_param_set(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, unsigned int val, int dir) { int changed = _snd_pcm_hw_param_set(params, var, val, dir); if (changed < 0) return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, NULL); } static int _snd_pcm_hw_param_setinteger(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var) { int changed; changed = snd_interval_setinteger(hw_param_interval(params, var)); if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } return changed; } /* * plugin */ #ifdef CONFIG_SND_PCM_OSS_PLUGINS static int snd_pcm_oss_plugin_clear(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_plugin *plugin, *next; plugin = runtime->oss.plugin_first; while (plugin) { next = plugin->next; snd_pcm_plugin_free(plugin); plugin = next; } runtime->oss.plugin_first = runtime->oss.plugin_last = NULL; return 0; } static int snd_pcm_plugin_insert(struct snd_pcm_plugin *plugin) { struct snd_pcm_runtime *runtime = plugin->plug->runtime; plugin->next = runtime->oss.plugin_first; plugin->prev = NULL; if (runtime->oss.plugin_first) { runtime->oss.plugin_first->prev = plugin; runtime->oss.plugin_first = plugin; } else { runtime->oss.plugin_last = runtime->oss.plugin_first = plugin; } return 0; } int snd_pcm_plugin_append(struct snd_pcm_plugin *plugin) { struct snd_pcm_runtime *runtime = plugin->plug->runtime; plugin->next = NULL; plugin->prev = runtime->oss.plugin_last; if (runtime->oss.plugin_last) { runtime->oss.plugin_last->next = plugin; runtime->oss.plugin_last = plugin; } else { runtime->oss.plugin_last = runtime->oss.plugin_first = plugin; } return 0; } #endif /* CONFIG_SND_PCM_OSS_PLUGINS */ static long snd_pcm_oss_bytes(struct snd_pcm_substream *substream, long frames) { struct snd_pcm_runtime *runtime = substream->runtime; long buffer_size = snd_pcm_lib_buffer_bytes(substream); long bytes = frames_to_bytes(runtime, frames); if (buffer_size == runtime->oss.buffer_bytes) return bytes; #if BITS_PER_LONG >= 64 return runtime->oss.buffer_bytes * bytes / buffer_size; #else { u64 bsize = (u64)runtime->oss.buffer_bytes * (u64)bytes; return div_u64(bsize, buffer_size); } #endif } static long snd_pcm_alsa_frames(struct snd_pcm_substream *substream, long bytes) { struct snd_pcm_runtime *runtime = substream->runtime; long buffer_size = snd_pcm_lib_buffer_bytes(substream); if (buffer_size == runtime->oss.buffer_bytes) return bytes_to_frames(runtime, bytes); return bytes_to_frames(runtime, (buffer_size * bytes) / runtime->oss.buffer_bytes); } static inline snd_pcm_uframes_t get_hw_ptr_period(struct snd_pcm_runtime *runtime) { return runtime->hw_ptr_interrupt; } /* define extended formats in the recent OSS versions (if any) */ /* linear formats */ #define AFMT_S32_LE 0x00001000 #define AFMT_S32_BE 0x00002000 #define AFMT_S24_LE 0x00008000 #define AFMT_S24_BE 0x00010000 #define AFMT_S24_PACKED 0x00040000 /* other supported formats */ #define AFMT_FLOAT 0x00004000 #define AFMT_SPDIF_RAW 0x00020000 /* unsupported formats */ #define AFMT_AC3 0x00000400 #define AFMT_VORBIS 0x00000800 static snd_pcm_format_t snd_pcm_oss_format_from(int format) { switch (format) { case AFMT_MU_LAW: return SNDRV_PCM_FORMAT_MU_LAW; case AFMT_A_LAW: return SNDRV_PCM_FORMAT_A_LAW; case AFMT_IMA_ADPCM: return SNDRV_PCM_FORMAT_IMA_ADPCM; case AFMT_U8: return SNDRV_PCM_FORMAT_U8; case AFMT_S16_LE: return SNDRV_PCM_FORMAT_S16_LE; case AFMT_S16_BE: return SNDRV_PCM_FORMAT_S16_BE; case AFMT_S8: return SNDRV_PCM_FORMAT_S8; case AFMT_U16_LE: return SNDRV_PCM_FORMAT_U16_LE; case AFMT_U16_BE: return SNDRV_PCM_FORMAT_U16_BE; case AFMT_MPEG: return SNDRV_PCM_FORMAT_MPEG; case AFMT_S32_LE: return SNDRV_PCM_FORMAT_S32_LE; case AFMT_S32_BE: return SNDRV_PCM_FORMAT_S32_BE; case AFMT_S24_LE: return SNDRV_PCM_FORMAT_S24_LE; case AFMT_S24_BE: return SNDRV_PCM_FORMAT_S24_BE; case AFMT_S24_PACKED: return SNDRV_PCM_FORMAT_S24_3LE; case AFMT_FLOAT: return SNDRV_PCM_FORMAT_FLOAT; case AFMT_SPDIF_RAW: return SNDRV_PCM_FORMAT_IEC958_SUBFRAME; default: return SNDRV_PCM_FORMAT_U8; } } static int snd_pcm_oss_format_to(snd_pcm_format_t format) { switch (format) { case SNDRV_PCM_FORMAT_MU_LAW: return AFMT_MU_LAW; case SNDRV_PCM_FORMAT_A_LAW: return AFMT_A_LAW; case SNDRV_PCM_FORMAT_IMA_ADPCM: return AFMT_IMA_ADPCM; case SNDRV_PCM_FORMAT_U8: return AFMT_U8; case SNDRV_PCM_FORMAT_S16_LE: return AFMT_S16_LE; case SNDRV_PCM_FORMAT_S16_BE: return AFMT_S16_BE; case SNDRV_PCM_FORMAT_S8: return AFMT_S8; case SNDRV_PCM_FORMAT_U16_LE: return AFMT_U16_LE; case SNDRV_PCM_FORMAT_U16_BE: return AFMT_U16_BE; case SNDRV_PCM_FORMAT_MPEG: return AFMT_MPEG; case SNDRV_PCM_FORMAT_S32_LE: return AFMT_S32_LE; case SNDRV_PCM_FORMAT_S32_BE: return AFMT_S32_BE; case SNDRV_PCM_FORMAT_S24_LE: return AFMT_S24_LE; case SNDRV_PCM_FORMAT_S24_BE: return AFMT_S24_BE; case SNDRV_PCM_FORMAT_S24_3LE: return AFMT_S24_PACKED; case SNDRV_PCM_FORMAT_FLOAT: return AFMT_FLOAT; case SNDRV_PCM_FORMAT_IEC958_SUBFRAME: return AFMT_SPDIF_RAW; default: return -EINVAL; } } static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *oss_params, struct snd_pcm_hw_params *slave_params) { ssize_t s; ssize_t oss_buffer_size; ssize_t oss_period_size, oss_periods; ssize_t min_period_size, max_period_size; struct snd_pcm_runtime *runtime = substream->runtime; size_t oss_frame_size; oss_frame_size = snd_pcm_format_physical_width(params_format(oss_params)) * params_channels(oss_params) / 8; oss_buffer_size = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL); if (oss_buffer_size <= 0) return -EINVAL; oss_buffer_size = snd_pcm_plug_client_size(substream, oss_buffer_size * oss_frame_size); if (oss_buffer_size <= 0) return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { if (oss_buffer_size > runtime->oss.mmap_bytes) oss_buffer_size = runtime->oss.mmap_bytes; } if (substream->oss.setup.period_size > 16) oss_period_size = substream->oss.setup.period_size; else if (runtime->oss.fragshift) { oss_period_size = 1 << runtime->oss.fragshift; if (oss_period_size > oss_buffer_size / 2) oss_period_size = oss_buffer_size / 2; } else { int sd; size_t bytes_per_sec = params_rate(oss_params) * snd_pcm_format_physical_width(params_format(oss_params)) * params_channels(oss_params) / 8; oss_period_size = oss_buffer_size; do { oss_period_size /= 2; } while (oss_period_size > bytes_per_sec); if (runtime->oss.subdivision == 0) { sd = 4; if (oss_period_size / sd > 4096) sd *= 2; if (oss_period_size / sd < 4096) sd = 1; } else sd = runtime->oss.subdivision; oss_period_size /= sd; if (oss_period_size < 16) oss_period_size = 16; } min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); if (min_period_size > 0) { min_period_size *= oss_frame_size; min_period_size = roundup_pow_of_two(min_period_size); if (oss_period_size < min_period_size) oss_period_size = min_period_size; } max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); if (max_period_size > 0) { max_period_size *= oss_frame_size; max_period_size = rounddown_pow_of_two(max_period_size); if (oss_period_size > max_period_size) oss_period_size = max_period_size; } oss_periods = oss_buffer_size / oss_period_size; if (substream->oss.setup.periods > 1) oss_periods = substream->oss.setup.periods; s = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL); if (s > 0 && runtime->oss.maxfrags && s > runtime->oss.maxfrags) s = runtime->oss.maxfrags; if (oss_periods > s) oss_periods = s; s = snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL); if (s < 2) s = 2; if (oss_periods < s) oss_periods = s; while (oss_period_size * oss_periods > oss_buffer_size) oss_period_size /= 2; if (oss_period_size < 16) return -EINVAL; /* don't allocate too large period; 1MB period must be enough */ if (oss_period_size > 1024 * 1024) return -ENOMEM; runtime->oss.period_bytes = oss_period_size; runtime->oss.period_frames = 1; runtime->oss.periods = oss_periods; return 0; } static int choose_rate(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, unsigned int best_rate) { const struct snd_interval *it; struct snd_pcm_hw_params *save __free(kfree) = NULL; unsigned int rate, prev; save = kmalloc(sizeof(*save), GFP_KERNEL); if (save == NULL) return -ENOMEM; *save = *params; it = hw_param_interval_c(save, SNDRV_PCM_HW_PARAM_RATE); /* try multiples of the best rate */ rate = best_rate; for (;;) { if (it->max < rate || (it->max == rate && it->openmax)) break; if (it->min < rate || (it->min == rate && !it->openmin)) { int ret; ret = snd_pcm_hw_param_set(substream, params, SNDRV_PCM_HW_PARAM_RATE, rate, 0); if (ret == (int)rate) return rate; *params = *save; } prev = rate; rate += best_rate; if (rate <= prev) break; } /* not found, use the nearest rate */ return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL); } /* parameter locking: returns immediately if tried during streaming */ static int lock_params(struct snd_pcm_runtime *runtime) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; if (atomic_read(&runtime->oss.rw_ref)) { mutex_unlock(&runtime->oss.params_lock); return -EBUSY; } return 0; } static void unlock_params(struct snd_pcm_runtime *runtime) { mutex_unlock(&runtime->oss.params_lock); } static void snd_pcm_oss_release_buffers(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; kvfree(runtime->oss.buffer); runtime->oss.buffer = NULL; #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); #endif } /* call with params_lock held */ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_params *params, *sparams; struct snd_pcm_sw_params *sw_params; ssize_t oss_buffer_size, oss_period_size; size_t oss_frame_size; int err; int direct; snd_pcm_format_t format, sformat; int n; const struct snd_mask *sformat_mask; struct snd_mask mask; if (!runtime->oss.params) return 0; sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL); params = kmalloc(sizeof(*params), GFP_KERNEL); sparams = kmalloc(sizeof(*sparams), GFP_KERNEL); if (!sw_params || !params || !sparams) { err = -ENOMEM; goto failure; } if (atomic_read(&substream->mmap_count)) direct = 1; else direct = substream->oss.setup.direct; _snd_pcm_hw_params_any(sparams); _snd_pcm_hw_param_setinteger(sparams, SNDRV_PCM_HW_PARAM_PERIODS); _snd_pcm_hw_param_min(sparams, SNDRV_PCM_HW_PARAM_PERIODS, 2, 0); snd_mask_none(&mask); if (atomic_read(&substream->mmap_count)) snd_mask_set(&mask, (__force int)SNDRV_PCM_ACCESS_MMAP_INTERLEAVED); else { snd_mask_set(&mask, (__force int)SNDRV_PCM_ACCESS_RW_INTERLEAVED); if (!direct) snd_mask_set(&mask, (__force int)SNDRV_PCM_ACCESS_RW_NONINTERLEAVED); } err = snd_pcm_hw_param_mask(substream, sparams, SNDRV_PCM_HW_PARAM_ACCESS, &mask); if (err < 0) { pcm_dbg(substream->pcm, "No usable accesses\n"); err = -EINVAL; goto failure; } err = choose_rate(substream, sparams, runtime->oss.rate); if (err < 0) goto failure; err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, NULL); if (err < 0) goto failure; format = snd_pcm_oss_format_from(runtime->oss.format); sformat_mask = hw_param_mask_c(sparams, SNDRV_PCM_HW_PARAM_FORMAT); if (direct) sformat = format; else sformat = snd_pcm_plug_slave_format(format, sformat_mask); if ((__force int)sformat < 0 || !snd_mask_test_format(sformat_mask, sformat)) { pcm_for_each_format(sformat) { if (snd_mask_test_format(sformat_mask, sformat) && snd_pcm_oss_format_to(sformat) >= 0) goto format_found; } pcm_dbg(substream->pcm, "Cannot find a format!!!\n"); err = -EINVAL; goto failure; } format_found: err = _snd_pcm_hw_param_set(sparams, SNDRV_PCM_HW_PARAM_FORMAT, (__force int)sformat, 0); if (err < 0) goto failure; if (direct) { memcpy(params, sparams, sizeof(*params)); } else { _snd_pcm_hw_params_any(params); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_ACCESS, (__force int)SNDRV_PCM_ACCESS_RW_INTERLEAVED, 0); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_FORMAT, (__force int)snd_pcm_oss_format_from(runtime->oss.format), 0); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, 0); _snd_pcm_hw_param_set(params, SNDRV_PCM_HW_PARAM_RATE, runtime->oss.rate, 0); pdprintf("client: access = %i, format = %i, channels = %i, rate = %i\n", params_access(params), params_format(params), params_channels(params), params_rate(params)); } pdprintf("slave: access = %i, format = %i, channels = %i, rate = %i\n", params_access(sparams), params_format(sparams), params_channels(sparams), params_rate(sparams)); oss_frame_size = snd_pcm_format_physical_width(params_format(params)) * params_channels(params) / 8; err = snd_pcm_oss_period_size(substream, params, sparams); if (err < 0) goto failure; n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); if (err < 0) goto failure; err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, runtime->oss.periods, NULL); if (err < 0) goto failure; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams); if (err < 0) { pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); goto failure; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); if (!direct) { /* add necessary plugins */ err = snd_pcm_plug_format_plugins(substream, params, sparams); if (err < 0) { pcm_dbg(substream->pcm, "snd_pcm_plug_format_plugins failed: %i\n", err); goto failure; } if (runtime->oss.plugin_first) { struct snd_pcm_plugin *plugin; err = snd_pcm_plugin_build_io(substream, sparams, &plugin); if (err < 0) { pcm_dbg(substream->pcm, "snd_pcm_plugin_build_io failed: %i\n", err); goto failure; } if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { err = snd_pcm_plugin_append(plugin); } else { err = snd_pcm_plugin_insert(plugin); } if (err < 0) goto failure; } } #endif if (runtime->oss.trigger) { sw_params->start_threshold = 1; } else { sw_params->start_threshold = runtime->boundary; } if (atomic_read(&substream->mmap_count) || substream->stream == SNDRV_PCM_STREAM_CAPTURE) sw_params->stop_threshold = runtime->boundary; else sw_params->stop_threshold = runtime->buffer_size; sw_params->tstamp_mode = SNDRV_PCM_TSTAMP_NONE; sw_params->period_step = 1; sw_params->avail_min = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ? 1 : runtime->period_size; if (atomic_read(&substream->mmap_count) || substream->oss.setup.nosilence) { sw_params->silence_threshold = 0; sw_params->silence_size = 0; } else { snd_pcm_uframes_t frames; frames = runtime->period_size + 16; if (frames > runtime->buffer_size) frames = runtime->buffer_size; sw_params->silence_threshold = frames; sw_params->silence_size = frames; } err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_SW_PARAMS, sw_params); if (err < 0) { pcm_dbg(substream->pcm, "SW_PARAMS failed: %i\n", err); goto failure; } runtime->oss.periods = params_periods(sparams); oss_period_size = snd_pcm_plug_client_size(substream, params_period_size(sparams)); if (oss_period_size < 0) { err = -EINVAL; goto failure; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS if (runtime->oss.plugin_first) { err = snd_pcm_plug_alloc(substream, oss_period_size); if (err < 0) goto failure; } #endif oss_period_size = array_size(oss_period_size, oss_frame_size); oss_buffer_size = array_size(oss_period_size, runtime->oss.periods); if (oss_buffer_size <= 0) { err = -EINVAL; goto failure; } runtime->oss.period_bytes = oss_period_size; runtime->oss.buffer_bytes = oss_buffer_size; pdprintf("oss: period bytes = %i, buffer bytes = %i\n", runtime->oss.period_bytes, runtime->oss.buffer_bytes); pdprintf("slave: period_size = %i, buffer_size = %i\n", params_period_size(sparams), params_buffer_size(sparams)); runtime->oss.format = snd_pcm_oss_format_to(params_format(params)); runtime->oss.channels = params_channels(params); runtime->oss.rate = params_rate(params); kvfree(runtime->oss.buffer); runtime->oss.buffer = kvzalloc(runtime->oss.period_bytes, GFP_KERNEL); if (!runtime->oss.buffer) { err = -ENOMEM; goto failure; } runtime->oss.params = 0; runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; if (runtime->dma_area) snd_pcm_format_set_silence(runtime->format, runtime->dma_area, bytes_to_samples(runtime, runtime->dma_bytes)); runtime->oss.period_frames = snd_pcm_alsa_frames(substream, oss_period_size); err = 0; failure: if (err) snd_pcm_oss_release_buffers(substream); kfree(sw_params); kfree(params); kfree(sparams); return err; } /* this one takes the lock by itself */ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, bool trylock) { struct snd_pcm_runtime *runtime = substream->runtime; int err; if (trylock) { if (!(mutex_trylock(&runtime->oss.params_lock))) return -EAGAIN; } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; err = snd_pcm_oss_change_params_locked(substream); mutex_unlock(&runtime->oss.params_lock); return err; } static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_file, struct snd_pcm_substream **r_substream) { int idx, err; struct snd_pcm_substream *asubstream = NULL, *substream; for (idx = 0; idx < 2; idx++) { substream = pcm_oss_file->streams[idx]; if (substream == NULL) continue; if (asubstream == NULL) asubstream = substream; if (substream->runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } } if (!asubstream) return -EIO; if (r_substream) *r_substream = asubstream; return 0; } /* call with params_lock held */ /* NOTE: this always call PREPARE unconditionally no matter whether * runtime->oss.prepare is set or not */ static int snd_pcm_oss_prepare(struct snd_pcm_substream *substream) { int err; struct snd_pcm_runtime *runtime = substream->runtime; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_PREPARE, NULL); if (err < 0) { pcm_dbg(substream->pcm, "snd_pcm_oss_prepare: SNDRV_PCM_IOCTL_PREPARE failed\n"); return err; } runtime->oss.prepare = 0; runtime->oss.prev_hw_ptr_period = 0; runtime->oss.period_ptr = 0; runtime->oss.buffer_used = 0; return 0; } static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; int err; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } if (runtime->oss.prepare) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; err = snd_pcm_oss_prepare(substream); mutex_unlock(&runtime->oss.params_lock); if (err < 0) return err; } return 0; } /* call with params_lock held */ static int snd_pcm_oss_make_ready_locked(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; int err; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params_locked(substream); if (err < 0) return err; } if (runtime->oss.prepare) { err = snd_pcm_oss_prepare(substream); if (err < 0) return err; } return 0; } static int snd_pcm_oss_capture_position_fixup(struct snd_pcm_substream *substream, snd_pcm_sframes_t *delay) { struct snd_pcm_runtime *runtime; snd_pcm_uframes_t frames; int err = 0; while (1) { err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, delay); if (err < 0) break; runtime = substream->runtime; if (*delay <= (snd_pcm_sframes_t)runtime->buffer_size) break; /* in case of overrun, skip whole periods like OSS/Linux driver does */ /* until avail(delay) <= buffer_size */ frames = (*delay - runtime->buffer_size) + runtime->period_size - 1; frames /= runtime->period_size; frames *= runtime->period_size; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_FORWARD, &frames); if (err < 0) break; } return err; } snd_pcm_sframes_t snd_pcm_oss_write3(struct snd_pcm_substream *substream, const char *ptr, snd_pcm_uframes_t frames, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: write: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } mutex_unlock(&runtime->oss.params_lock); ret = __snd_pcm_lib_xfer(substream, (void *)ptr, true, frames, in_kernel); mutex_lock(&runtime->oss.params_lock); if (ret != -EPIPE && ret != -ESTRPIPE) break; /* test, if we can't store new data, because the stream */ /* has not been started */ if (runtime->state == SNDRV_PCM_STATE_PREPARED) return -EAGAIN; } return ret; } snd_pcm_sframes_t snd_pcm_oss_read3(struct snd_pcm_substream *substream, char *ptr, snd_pcm_uframes_t frames, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t delay; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: read: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DRAIN, NULL); if (ret < 0) break; } else if (runtime->state == SNDRV_PCM_STATE_SETUP) { ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } ret = snd_pcm_oss_capture_position_fixup(substream, &delay); if (ret < 0) break; mutex_unlock(&runtime->oss.params_lock); ret = __snd_pcm_lib_xfer(substream, (void *)ptr, true, frames, in_kernel); mutex_lock(&runtime->oss.params_lock); if (ret == -EPIPE) { if (runtime->state == SNDRV_PCM_STATE_DRAINING) { ret = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); if (ret < 0) break; } continue; } if (ret != -ESTRPIPE) break; } return ret; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_sframes_t snd_pcm_oss_writev3(struct snd_pcm_substream *substream, void **bufs, snd_pcm_uframes_t frames) { struct snd_pcm_runtime *runtime = substream->runtime; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: writev: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } ret = snd_pcm_kernel_writev(substream, bufs, frames); if (ret != -EPIPE && ret != -ESTRPIPE) break; /* test, if we can't store new data, because the stream */ /* has not been started */ if (runtime->state == SNDRV_PCM_STATE_PREPARED) return -EAGAIN; } return ret; } snd_pcm_sframes_t snd_pcm_oss_readv3(struct snd_pcm_substream *substream, void **bufs, snd_pcm_uframes_t frames) { struct snd_pcm_runtime *runtime = substream->runtime; int ret; while (1) { if (runtime->state == SNDRV_PCM_STATE_XRUN || runtime->state == SNDRV_PCM_STATE_SUSPENDED) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: readv: recovering from %s\n", runtime->state == SNDRV_PCM_STATE_XRUN ? "XRUN" : "SUSPEND"); #endif ret = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DRAIN, NULL); if (ret < 0) break; } else if (runtime->state == SNDRV_PCM_STATE_SETUP) { ret = snd_pcm_oss_prepare(substream); if (ret < 0) break; } ret = snd_pcm_kernel_readv(substream, bufs, frames); if (ret != -EPIPE && ret != -ESTRPIPE) break; } return ret; } #endif /* CONFIG_SND_PCM_OSS_PLUGINS */ static ssize_t snd_pcm_oss_write2(struct snd_pcm_substream *substream, const char *buf, size_t bytes, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t frames, frames1; #ifdef CONFIG_SND_PCM_OSS_PLUGINS if (runtime->oss.plugin_first) { struct snd_pcm_plugin_channel *channels; size_t oss_frame_bytes = (runtime->oss.plugin_first->src_width * runtime->oss.plugin_first->src_format.channels) / 8; if (!in_kernel) { if (copy_from_user(runtime->oss.buffer, (const char __force __user *)buf, bytes)) return -EFAULT; buf = runtime->oss.buffer; } frames = bytes / oss_frame_bytes; frames1 = snd_pcm_plug_client_channels_buf(substream, (char *)buf, frames, &channels); if (frames1 < 0) return frames1; frames1 = snd_pcm_plug_write_transfer(substream, channels, frames1); if (frames1 <= 0) return frames1; bytes = frames1 * oss_frame_bytes; } else #endif { frames = bytes_to_frames(runtime, bytes); frames1 = snd_pcm_oss_write3(substream, buf, frames, in_kernel); if (frames1 <= 0) return frames1; bytes = frames_to_bytes(runtime, frames1); } return bytes; } static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const char __user *buf, size_t bytes) { size_t xfer = 0; ssize_t tmp = 0; struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return -ENXIO; atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } tmp = snd_pcm_oss_make_ready_locked(substream); if (tmp < 0) goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) tmp = runtime->oss.period_bytes - runtime->oss.buffer_used; if (tmp > 0) { if (copy_from_user(runtime->oss.buffer + runtime->oss.buffer_used, buf, tmp)) { tmp = -EFAULT; goto err; } } runtime->oss.buffer_used += tmp; buf += tmp; bytes -= tmp; xfer += tmp; if (substream->oss.setup.partialfrag || runtime->oss.buffer_used == runtime->oss.period_bytes) { tmp = snd_pcm_oss_write2(substream, runtime->oss.buffer + runtime->oss.period_ptr, runtime->oss.buffer_used - runtime->oss.period_ptr, 1); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; runtime->oss.period_ptr += tmp; runtime->oss.period_ptr %= runtime->oss.period_bytes; if (runtime->oss.period_ptr == 0 || runtime->oss.period_ptr == runtime->oss.buffer_used) runtime->oss.buffer_used = 0; else if ((substream->f_flags & O_NONBLOCK) != 0) { tmp = -EAGAIN; goto err; } } } else { tmp = snd_pcm_oss_write2(substream, (const char __force *)buf, runtime->oss.period_bytes, 0); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; buf += tmp; bytes -= tmp; xfer += tmp; if ((substream->f_flags & O_NONBLOCK) != 0 && tmp != runtime->oss.period_bytes) tmp = -EAGAIN; } err: mutex_unlock(&runtime->oss.params_lock); if (tmp < 0) break; if (signal_pending(current)) { tmp = -ERESTARTSYS; break; } tmp = 0; } atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } static ssize_t snd_pcm_oss_read2(struct snd_pcm_substream *substream, char *buf, size_t bytes, int in_kernel) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t frames, frames1; #ifdef CONFIG_SND_PCM_OSS_PLUGINS char __user *final_dst = (char __force __user *)buf; if (runtime->oss.plugin_first) { struct snd_pcm_plugin_channel *channels; size_t oss_frame_bytes = (runtime->oss.plugin_last->dst_width * runtime->oss.plugin_last->dst_format.channels) / 8; if (!in_kernel) buf = runtime->oss.buffer; frames = bytes / oss_frame_bytes; frames1 = snd_pcm_plug_client_channels_buf(substream, buf, frames, &channels); if (frames1 < 0) return frames1; frames1 = snd_pcm_plug_read_transfer(substream, channels, frames1); if (frames1 <= 0) return frames1; bytes = frames1 * oss_frame_bytes; if (!in_kernel && copy_to_user(final_dst, buf, bytes)) return -EFAULT; } else #endif { frames = bytes_to_frames(runtime, bytes); frames1 = snd_pcm_oss_read3(substream, buf, frames, in_kernel); if (frames1 <= 0) return frames1; bytes = frames_to_bytes(runtime, frames1); } return bytes; } static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __user *buf, size_t bytes) { size_t xfer = 0; ssize_t tmp = 0; struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return -ENXIO; atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } tmp = snd_pcm_oss_make_ready_locked(substream); if (tmp < 0) goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; runtime->oss.period_ptr = tmp; runtime->oss.buffer_used = tmp; } tmp = bytes; if ((size_t) tmp > runtime->oss.buffer_used) tmp = runtime->oss.buffer_used; if (copy_to_user(buf, runtime->oss.buffer + (runtime->oss.period_ptr - runtime->oss.buffer_used), tmp)) { tmp = -EFAULT; goto err; } buf += tmp; bytes -= tmp; xfer += tmp; runtime->oss.buffer_used -= tmp; } else { tmp = snd_pcm_oss_read2(substream, (char __force *)buf, runtime->oss.period_bytes, 0); if (tmp <= 0) goto err; runtime->oss.bytes += tmp; buf += tmp; bytes -= tmp; xfer += tmp; } err: mutex_unlock(&runtime->oss.params_lock); if (tmp < 0) break; if (signal_pending(current)) { tmp = -ERESTARTSYS; break; } tmp = 0; } atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } static int snd_pcm_oss_reset(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; int i; for (i = 0; i < 2; i++) { substream = pcm_oss_file->streams[i]; if (!substream) continue; runtime = substream->runtime; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; runtime->oss.prev_hw_ptr_period = 0; runtime->oss.period_ptr = 0; mutex_unlock(&runtime->oss.params_lock); } return 0; } static int snd_pcm_oss_post(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream != NULL) { err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_START, NULL); } /* note: all errors from the start action are ignored */ /* OSS apps do not know, how to handle them */ return 0; } static int snd_pcm_oss_sync1(struct snd_pcm_substream *substream, size_t size) { struct snd_pcm_runtime *runtime; ssize_t result = 0; snd_pcm_state_t state; long res; wait_queue_entry_t wait; runtime = substream->runtime; init_waitqueue_entry(&wait, current); add_wait_queue(&runtime->sleep, &wait); #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync1: size = %li\n", size); #endif while (1) { result = snd_pcm_oss_write2(substream, runtime->oss.buffer, size, 1); if (result > 0) { runtime->oss.buffer_used = 0; result = 0; break; } if (result != 0 && result != -EAGAIN) break; result = 0; set_current_state(TASK_INTERRUPTIBLE); scoped_guard(pcm_stream_lock_irq, substream) state = runtime->state; if (state != SNDRV_PCM_STATE_RUNNING) { set_current_state(TASK_RUNNING); break; } res = schedule_timeout(10 * HZ); if (signal_pending(current)) { result = -ERESTARTSYS; break; } if (res == 0) { pcm_err(substream->pcm, "OSS sync error - DMA timeout\n"); result = -EIO; break; } } remove_wait_queue(&runtime->sleep, &wait); return result; } static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) { int err = 0; unsigned int saved_f_flags; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_format_t format; unsigned long width; size_t size; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream != NULL) { runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) goto __direct; atomic_inc(&runtime->oss.rw_ref); if (mutex_lock_interruptible(&runtime->oss.params_lock)) { atomic_dec(&runtime->oss.rw_ref); return -ERESTARTSYS; } err = snd_pcm_oss_make_ready_locked(substream); if (err < 0) goto unlock; format = snd_pcm_oss_format_from(runtime->oss.format); width = snd_pcm_format_physical_width(format); if (runtime->oss.buffer_used > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: buffer_used\n"); #endif size = (8 * (runtime->oss.period_bytes - runtime->oss.buffer_used) + 7) / width; snd_pcm_format_set_silence(format, runtime->oss.buffer + runtime->oss.buffer_used, size); err = snd_pcm_oss_sync1(substream, runtime->oss.period_bytes); if (err < 0) goto unlock; } else if (runtime->oss.period_ptr > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: period_ptr\n"); #endif size = runtime->oss.period_bytes - runtime->oss.period_ptr; snd_pcm_format_set_silence(format, runtime->oss.buffer, size * 8 / width); err = snd_pcm_oss_sync1(substream, size); if (err < 0) goto unlock; } /* * The ALSA's period might be a bit large than OSS one. * Fill the remain portion of ALSA period with zeros. */ size = runtime->control->appl_ptr % runtime->period_size; if (size > 0) { size = runtime->period_size - size; if (runtime->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED) snd_pcm_lib_write(substream, NULL, size); else if (runtime->access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED) snd_pcm_lib_writev(substream, NULL, size); } unlock: mutex_unlock(&runtime->oss.params_lock); atomic_dec(&runtime->oss.rw_ref); if (err < 0) return err; /* * finish sync: drain the buffer */ __direct: saved_f_flags = substream->f_flags; substream->f_flags &= ~O_NONBLOCK; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DRAIN, NULL); substream->f_flags = saved_f_flags; if (err < 0) return err; mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; mutex_unlock(&runtime->oss.params_lock); } substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (substream != NULL) { err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; runtime = substream->runtime; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); if (err < 0) return err; mutex_lock(&runtime->oss.params_lock); runtime->oss.buffer_used = 0; runtime->oss.prepare = 1; mutex_unlock(&runtime->oss.params_lock); } return 0; } static int snd_pcm_oss_set_rate(struct snd_pcm_oss_file *pcm_oss_file, int rate) { int idx; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; int err; if (substream == NULL) continue; runtime = substream->runtime; if (rate < 1000) rate = 1000; else if (rate > 192000) rate = 192000; err = lock_params(runtime); if (err < 0) return err; if (runtime->oss.rate != rate) { runtime->oss.params = 1; runtime->oss.rate = rate; } unlock_params(runtime); } return snd_pcm_oss_get_rate(pcm_oss_file); } static int snd_pcm_oss_get_rate(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.rate; } static int snd_pcm_oss_set_channels(struct snd_pcm_oss_file *pcm_oss_file, unsigned int channels) { int idx; if (channels < 1) channels = 1; if (channels > 128) return -EINVAL; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; int err; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; if (runtime->oss.channels != channels) { runtime->oss.params = 1; runtime->oss.channels = channels; } unlock_params(runtime); } return snd_pcm_oss_get_channels(pcm_oss_file); } static int snd_pcm_oss_get_channels(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.channels; } static int snd_pcm_oss_get_block_size(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.period_bytes; } static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; int direct; struct snd_pcm_hw_params *params __free(kfree) = NULL; unsigned int formats = 0; const struct snd_mask *format_mask; int fmt; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; if (atomic_read(&substream->mmap_count)) direct = 1; else direct = substream->oss.setup.direct; if (!direct) return AFMT_MU_LAW | AFMT_U8 | AFMT_S16_LE | AFMT_S16_BE | AFMT_S8 | AFMT_U16_LE | AFMT_U16_BE | AFMT_S32_LE | AFMT_S32_BE | AFMT_S24_LE | AFMT_S24_BE | AFMT_S24_PACKED; params = kmalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; _snd_pcm_hw_params_any(params); err = snd_pcm_hw_refine(substream, params); if (err < 0) return err; format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); for (fmt = 0; fmt < 32; ++fmt) { if (snd_mask_test(format_mask, fmt)) { int f = snd_pcm_oss_format_to((__force snd_pcm_format_t)fmt); if (f >= 0) formats |= f; } } return formats; } static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format) { int formats, idx; int err; if (format != AFMT_QUERY) { formats = snd_pcm_oss_get_formats(pcm_oss_file); if (formats < 0) return formats; if (!(formats & format)) format = AFMT_U8; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; if (runtime->oss.format != format) { runtime->oss.params = 1; runtime->oss.format = format; } unlock_params(runtime); } } return snd_pcm_oss_get_format(pcm_oss_file); } static int snd_pcm_oss_get_format(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; int err; err = snd_pcm_oss_get_active_substream(pcm_oss_file, &substream); if (err < 0) return err; return substream->runtime->oss.format; } static int snd_pcm_oss_set_subdivide1(struct snd_pcm_substream *substream, int subdivide) { struct snd_pcm_runtime *runtime; runtime = substream->runtime; if (subdivide == 0) { subdivide = runtime->oss.subdivision; if (subdivide == 0) subdivide = 1; return subdivide; } if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; if (subdivide != 1 && subdivide != 2 && subdivide != 4 && subdivide != 8 && subdivide != 16) return -EINVAL; runtime->oss.subdivision = subdivide; runtime->oss.params = 1; return subdivide; } static int snd_pcm_oss_set_subdivide(struct snd_pcm_oss_file *pcm_oss_file, int subdivide) { int err = -EINVAL, idx; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; err = snd_pcm_oss_set_subdivide1(substream, subdivide); unlock_params(runtime); if (err < 0) return err; } return err; } static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsigned int val) { struct snd_pcm_runtime *runtime; int fragshift; runtime = substream->runtime; if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; fragshift = val & 0xffff; if (fragshift >= 25) /* should be large enough */ return -EINVAL; runtime->oss.fragshift = fragshift; runtime->oss.maxfrags = (val >> 16) & 0xffff; if (runtime->oss.fragshift < 4) /* < 16 */ runtime->oss.fragshift = 4; if (runtime->oss.maxfrags < 2) runtime->oss.maxfrags = 2; runtime->oss.params = 1; return 0; } static int snd_pcm_oss_set_fragment(struct snd_pcm_oss_file *pcm_oss_file, unsigned int val) { int err = -EINVAL, idx; for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; if (substream == NULL) continue; runtime = substream->runtime; err = lock_params(runtime); if (err < 0) return err; err = snd_pcm_oss_set_fragment1(substream, val); unlock_params(runtime); if (err < 0) return err; } return err; } static int snd_pcm_oss_nonblock(struct file * file) { spin_lock(&file->f_lock); file->f_flags |= O_NONBLOCK; spin_unlock(&file->f_lock); return 0; } static int snd_pcm_oss_get_caps1(struct snd_pcm_substream *substream, int res) { if (substream == NULL) { res &= ~DSP_CAP_DUPLEX; return res; } #ifdef DSP_CAP_MULTI if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) if (substream->pstr->substream_count > 1) res |= DSP_CAP_MULTI; #endif /* DSP_CAP_REALTIME is set all times: */ /* all ALSA drivers can return actual pointer in ring buffer */ #if defined(DSP_CAP_REALTIME) && 0 { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->info & (SNDRV_PCM_INFO_BLOCK_TRANSFER|SNDRV_PCM_INFO_BATCH)) res &= ~DSP_CAP_REALTIME; } #endif return res; } static int snd_pcm_oss_get_caps(struct snd_pcm_oss_file *pcm_oss_file) { int result, idx; result = DSP_CAP_TRIGGER | DSP_CAP_MMAP | DSP_CAP_DUPLEX | DSP_CAP_REALTIME; for (idx = 0; idx < 2; idx++) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; result = snd_pcm_oss_get_caps1(substream, result); } result |= 0x0001; /* revision - same as SB AWE 64 */ return result; } static void snd_pcm_oss_simulate_fill(struct snd_pcm_substream *substream, snd_pcm_uframes_t hw_ptr) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_uframes_t appl_ptr; appl_ptr = hw_ptr + runtime->buffer_size; appl_ptr %= runtime->boundary; runtime->control->appl_ptr = appl_ptr; } static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int trigger) { struct snd_pcm_runtime *runtime; struct snd_pcm_substream *psubstream = NULL, *csubstream = NULL; int err, cmd; #ifdef OSS_DEBUG pr_debug("pcm_oss: trigger = 0x%x\n", trigger); #endif psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (psubstream) { err = snd_pcm_oss_make_ready(psubstream); if (err < 0) return err; } if (csubstream) { err = snd_pcm_oss_make_ready(csubstream); if (err < 0) return err; } if (psubstream) { runtime = psubstream->runtime; cmd = 0; if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; if (trigger & PCM_ENABLE_OUTPUT) { if (runtime->oss.trigger) goto _skip1; if (atomic_read(&psubstream->mmap_count)) snd_pcm_oss_simulate_fill(psubstream, get_hw_ptr_period(runtime)); runtime->oss.trigger = 1; runtime->start_threshold = 1; cmd = SNDRV_PCM_IOCTL_START; } else { if (!runtime->oss.trigger) goto _skip1; runtime->oss.trigger = 0; runtime->start_threshold = runtime->boundary; cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } _skip1: mutex_unlock(&runtime->oss.params_lock); if (cmd) { err = snd_pcm_kernel_ioctl(psubstream, cmd, NULL); if (err < 0) return err; } } if (csubstream) { runtime = csubstream->runtime; cmd = 0; if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -ERESTARTSYS; if (trigger & PCM_ENABLE_INPUT) { if (runtime->oss.trigger) goto _skip2; runtime->oss.trigger = 1; runtime->start_threshold = 1; cmd = SNDRV_PCM_IOCTL_START; } else { if (!runtime->oss.trigger) goto _skip2; runtime->oss.trigger = 0; runtime->start_threshold = runtime->boundary; cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } _skip2: mutex_unlock(&runtime->oss.params_lock); if (cmd) { err = snd_pcm_kernel_ioctl(csubstream, cmd, NULL); if (err < 0) return err; } } return 0; } static int snd_pcm_oss_get_trigger(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *psubstream = NULL, *csubstream = NULL; int result = 0; psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (psubstream && psubstream->runtime && psubstream->runtime->oss.trigger) result |= PCM_ENABLE_OUTPUT; if (csubstream && csubstream->runtime && csubstream->runtime->oss.trigger) result |= PCM_ENABLE_INPUT; return result; } static int snd_pcm_oss_get_odelay(struct snd_pcm_oss_file *pcm_oss_file) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t delay; int err; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream == NULL) return -EINVAL; err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; runtime = substream->runtime; if (runtime->oss.params || runtime->oss.prepare) return 0; err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, &delay); if (err == -EPIPE) delay = 0; /* hack for broken OSS applications */ else if (err < 0) return err; return snd_pcm_oss_bytes(substream, delay); } static int snd_pcm_oss_get_ptr(struct snd_pcm_oss_file *pcm_oss_file, int stream, struct count_info __user * _info) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t delay; int fixup; struct count_info info; int err; if (_info == NULL) return -EFAULT; substream = pcm_oss_file->streams[stream]; if (substream == NULL) return -EINVAL; err = snd_pcm_oss_make_ready(substream); if (err < 0) return err; runtime = substream->runtime; if (runtime->oss.params || runtime->oss.prepare) { memset(&info, 0, sizeof(info)); if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } if (stream == SNDRV_PCM_STREAM_PLAYBACK) { err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, &delay); if (err == -EPIPE || err == -ESTRPIPE || (! err && delay < 0)) { err = 0; delay = 0; fixup = 0; } else { fixup = runtime->oss.buffer_used; } } else { err = snd_pcm_oss_capture_position_fixup(substream, &delay); fixup = -runtime->oss.buffer_used; } if (err < 0) return err; info.ptr = snd_pcm_oss_bytes(substream, runtime->status->hw_ptr % runtime->buffer_size); if (atomic_read(&substream->mmap_count)) { snd_pcm_sframes_t n; delay = get_hw_ptr_period(runtime); n = delay - runtime->oss.prev_hw_ptr_period; if (n < 0) n += runtime->boundary; info.blocks = n / runtime->period_size; runtime->oss.prev_hw_ptr_period = delay; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) snd_pcm_oss_simulate_fill(substream, delay); info.bytes = snd_pcm_oss_bytes(substream, runtime->status->hw_ptr) & INT_MAX; } else { delay = snd_pcm_oss_bytes(substream, delay); if (stream == SNDRV_PCM_STREAM_PLAYBACK) { if (substream->oss.setup.buggyptr) info.blocks = (runtime->oss.buffer_bytes - delay - fixup) / runtime->oss.period_bytes; else info.blocks = (delay + fixup) / runtime->oss.period_bytes; info.bytes = (runtime->oss.bytes - delay) & INT_MAX; } else { delay += fixup; info.blocks = delay / runtime->oss.period_bytes; info.bytes = (runtime->oss.bytes + delay) & INT_MAX; } } if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_pcm_oss_get_space(struct snd_pcm_oss_file *pcm_oss_file, int stream, struct audio_buf_info __user *_info) { struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t avail; int fixup; struct audio_buf_info info; int err; if (_info == NULL) return -EFAULT; substream = pcm_oss_file->streams[stream]; if (substream == NULL) return -EINVAL; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } info.fragsize = runtime->oss.period_bytes; info.fragstotal = runtime->periods; if (runtime->oss.prepare) { if (stream == SNDRV_PCM_STREAM_PLAYBACK) { info.bytes = runtime->oss.period_bytes * runtime->oss.periods; info.fragments = runtime->oss.periods; } else { info.bytes = 0; info.fragments = 0; } } else { if (stream == SNDRV_PCM_STREAM_PLAYBACK) { err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DELAY, &avail); if (err == -EPIPE || err == -ESTRPIPE || (! err && avail < 0)) { avail = runtime->buffer_size; err = 0; fixup = 0; } else { avail = runtime->buffer_size - avail; fixup = -runtime->oss.buffer_used; } } else { err = snd_pcm_oss_capture_position_fixup(substream, &avail); fixup = runtime->oss.buffer_used; } if (err < 0) return err; info.bytes = snd_pcm_oss_bytes(substream, avail) + fixup; info.fragments = info.bytes / runtime->oss.period_bytes; } #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: space: bytes = %i, fragments = %i, fragstotal = %i, fragsize = %i\n", info.bytes, info.fragments, info.fragstotal, info.fragsize); #endif if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_pcm_oss_get_mapbuf(struct snd_pcm_oss_file *pcm_oss_file, int stream, struct buffmem_desc __user * _info) { // it won't be probably implemented // pr_debug("TODO: snd_pcm_oss_get_mapbuf\n"); return -EINVAL; } static const char *strip_task_path(const char *path) { const char *ptr, *ptrl = NULL; for (ptr = path; *ptr; ptr++) { if (*ptr == '/') ptrl = ptr + 1; } return ptrl; } static void snd_pcm_oss_look_for_setup(struct snd_pcm *pcm, int stream, const char *task_name, struct snd_pcm_oss_setup *rsetup) { struct snd_pcm_oss_setup *setup; guard(mutex)(&pcm->streams[stream].oss.setup_mutex); do { for (setup = pcm->streams[stream].oss.setup_list; setup; setup = setup->next) { if (!strcmp(setup->task_name, task_name)) goto out; } } while ((task_name = strip_task_path(task_name)) != NULL); out: if (setup) *rsetup = *setup; } static void snd_pcm_oss_release_substream(struct snd_pcm_substream *substream) { snd_pcm_oss_release_buffers(substream); substream->oss.oss = 0; } static void snd_pcm_oss_init_substream(struct snd_pcm_substream *substream, struct snd_pcm_oss_setup *setup, int minor) { struct snd_pcm_runtime *runtime; substream->oss.oss = 1; substream->oss.setup = *setup; if (setup->nonblock) substream->f_flags |= O_NONBLOCK; else if (setup->block) substream->f_flags &= ~O_NONBLOCK; runtime = substream->runtime; runtime->oss.params = 1; runtime->oss.trigger = 1; runtime->oss.rate = 8000; mutex_init(&runtime->oss.params_lock); switch (SNDRV_MINOR_OSS_DEVICE(minor)) { case SNDRV_MINOR_OSS_PCM_8: runtime->oss.format = AFMT_U8; break; case SNDRV_MINOR_OSS_PCM_16: runtime->oss.format = AFMT_S16_LE; break; default: runtime->oss.format = AFMT_MU_LAW; } runtime->oss.channels = 1; runtime->oss.fragshift = 0; runtime->oss.maxfrags = 0; runtime->oss.subdivision = 0; substream->pcm_release = snd_pcm_oss_release_substream; atomic_set(&runtime->oss.rw_ref, 0); } static int snd_pcm_oss_release_file(struct snd_pcm_oss_file *pcm_oss_file) { int cidx; if (!pcm_oss_file) return 0; for (cidx = 0; cidx < 2; ++cidx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[cidx]; if (substream) snd_pcm_release_substream(substream); } kfree(pcm_oss_file); return 0; } static int snd_pcm_oss_open_file(struct file *file, struct snd_pcm *pcm, struct snd_pcm_oss_file **rpcm_oss_file, int minor, struct snd_pcm_oss_setup *setup) { int idx, err; struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; fmode_t f_mode = file->f_mode; if (rpcm_oss_file) *rpcm_oss_file = NULL; pcm_oss_file = kzalloc(sizeof(*pcm_oss_file), GFP_KERNEL); if (pcm_oss_file == NULL) return -ENOMEM; if ((f_mode & (FMODE_WRITE|FMODE_READ)) == (FMODE_WRITE|FMODE_READ) && (pcm->info_flags & SNDRV_PCM_INFO_HALF_DUPLEX)) f_mode = FMODE_WRITE; file->f_flags &= ~O_APPEND; for (idx = 0; idx < 2; idx++) { if (setup[idx].disable) continue; if (! pcm->streams[idx].substream_count) continue; /* no matching substream */ if (idx == SNDRV_PCM_STREAM_PLAYBACK) { if (! (f_mode & FMODE_WRITE)) continue; } else { if (! (f_mode & FMODE_READ)) continue; } err = snd_pcm_open_substream(pcm, idx, file, &substream); if (err < 0) { snd_pcm_oss_release_file(pcm_oss_file); return err; } pcm_oss_file->streams[idx] = substream; snd_pcm_oss_init_substream(substream, &setup[idx], minor); } if (!pcm_oss_file->streams[0] && !pcm_oss_file->streams[1]) { snd_pcm_oss_release_file(pcm_oss_file); return -EINVAL; } file->private_data = pcm_oss_file; if (rpcm_oss_file) *rpcm_oss_file = pcm_oss_file; return 0; } static int snd_task_name(struct task_struct *task, char *name, size_t size) { unsigned int idx; if (snd_BUG_ON(!task || !name || size < 2)) return -EINVAL; for (idx = 0; idx < sizeof(task->comm) && idx + 1 < size; idx++) name[idx] = task->comm[idx]; name[idx] = '\0'; return 0; } static int snd_pcm_oss_open(struct inode *inode, struct file *file) { int err; char task_name[32]; struct snd_pcm *pcm; struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_oss_setup setup[2]; int nonblock; wait_queue_entry_t wait; err = nonseekable_open(inode, file); if (err < 0) return err; pcm = snd_lookup_oss_minor_data(iminor(inode), SNDRV_OSS_DEVICE_TYPE_PCM); if (pcm == NULL) { err = -ENODEV; goto __error1; } err = snd_card_file_add(pcm->card, file); if (err < 0) goto __error1; if (!try_module_get(pcm->card->module)) { err = -EFAULT; goto __error2; } if (snd_task_name(current, task_name, sizeof(task_name)) < 0) { err = -EFAULT; goto __error; } memset(setup, 0, sizeof(setup)); if (file->f_mode & FMODE_WRITE) snd_pcm_oss_look_for_setup(pcm, SNDRV_PCM_STREAM_PLAYBACK, task_name, &setup[0]); if (file->f_mode & FMODE_READ) snd_pcm_oss_look_for_setup(pcm, SNDRV_PCM_STREAM_CAPTURE, task_name, &setup[1]); nonblock = !!(file->f_flags & O_NONBLOCK); if (!nonblock) nonblock = nonblock_open; init_waitqueue_entry(&wait, current); add_wait_queue(&pcm->open_wait, &wait); mutex_lock(&pcm->open_mutex); while (1) { err = snd_pcm_oss_open_file(file, pcm, &pcm_oss_file, iminor(inode), setup); if (err >= 0) break; if (err == -EAGAIN) { if (nonblock) { err = -EBUSY; break; } } else break; set_current_state(TASK_INTERRUPTIBLE); mutex_unlock(&pcm->open_mutex); schedule(); mutex_lock(&pcm->open_mutex); if (pcm->card->shutdown) { err = -ENODEV; break; } if (signal_pending(current)) { err = -ERESTARTSYS; break; } } remove_wait_queue(&pcm->open_wait, &wait); mutex_unlock(&pcm->open_mutex); if (err < 0) goto __error; snd_card_unref(pcm->card); return err; __error: module_put(pcm->card->module); __error2: snd_card_file_remove(pcm->card, file); __error1: if (pcm) snd_card_unref(pcm->card); return err; } static int snd_pcm_oss_release(struct inode *inode, struct file *file) { struct snd_pcm *pcm; struct snd_pcm_substream *substream; struct snd_pcm_oss_file *pcm_oss_file; pcm_oss_file = file->private_data; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream == NULL) substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (snd_BUG_ON(!substream)) return -ENXIO; pcm = substream->pcm; if (!pcm->card->shutdown) snd_pcm_oss_sync(pcm_oss_file); mutex_lock(&pcm->open_mutex); snd_pcm_oss_release_file(pcm_oss_file); mutex_unlock(&pcm->open_mutex); wake_up(&pcm->open_wait); module_put(pcm->card->module); snd_card_file_remove(pcm->card, file); return 0; } static long snd_pcm_oss_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_pcm_oss_file *pcm_oss_file; int __user *p = (int __user *)arg; int res; pcm_oss_file = file->private_data; if (cmd == OSS_GETVERSION) return put_user(SNDRV_OSS_VERSION, p); if (cmd == OSS_ALSAEMULVER) return put_user(1, p); #if IS_REACHABLE(CONFIG_SND_MIXER_OSS) if (((cmd >> 8) & 0xff) == 'M') { /* mixer ioctl - for OSS compatibility */ struct snd_pcm_substream *substream; int idx; for (idx = 0; idx < 2; ++idx) { substream = pcm_oss_file->streams[idx]; if (substream != NULL) break; } if (snd_BUG_ON(idx >= 2)) return -ENXIO; return snd_mixer_oss_ioctl_card(substream->pcm->card, cmd, arg); } #endif if (((cmd >> 8) & 0xff) != 'P') return -EINVAL; #ifdef OSS_DEBUG pr_debug("pcm_oss: ioctl = 0x%x\n", cmd); #endif switch (cmd) { case SNDCTL_DSP_RESET: return snd_pcm_oss_reset(pcm_oss_file); case SNDCTL_DSP_SYNC: return snd_pcm_oss_sync(pcm_oss_file); case SNDCTL_DSP_SPEED: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_rate(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_READ_RATE: res = snd_pcm_oss_get_rate(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_STEREO: if (get_user(res, p)) return -EFAULT; res = res > 0 ? 2 : 1; res = snd_pcm_oss_set_channels(pcm_oss_file, res); if (res < 0) return res; return put_user(--res, p); case SNDCTL_DSP_GETBLKSIZE: res = snd_pcm_oss_get_block_size(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_SETFMT: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_format(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_READ_BITS: res = snd_pcm_oss_get_format(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_CHANNELS: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_channels(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_READ_CHANNELS: res = snd_pcm_oss_get_channels(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SOUND_PCM_WRITE_FILTER: case SOUND_PCM_READ_FILTER: return -EIO; case SNDCTL_DSP_POST: return snd_pcm_oss_post(pcm_oss_file); case SNDCTL_DSP_SUBDIVIDE: if (get_user(res, p)) return -EFAULT; res = snd_pcm_oss_set_subdivide(pcm_oss_file, res); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_SETFRAGMENT: if (get_user(res, p)) return -EFAULT; return snd_pcm_oss_set_fragment(pcm_oss_file, res); case SNDCTL_DSP_GETFMTS: res = snd_pcm_oss_get_formats(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_GETOSPACE: case SNDCTL_DSP_GETISPACE: return snd_pcm_oss_get_space(pcm_oss_file, cmd == SNDCTL_DSP_GETISPACE ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK, (struct audio_buf_info __user *) arg); case SNDCTL_DSP_NONBLOCK: return snd_pcm_oss_nonblock(file); case SNDCTL_DSP_GETCAPS: res = snd_pcm_oss_get_caps(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_GETTRIGGER: res = snd_pcm_oss_get_trigger(pcm_oss_file); if (res < 0) return res; return put_user(res, p); case SNDCTL_DSP_SETTRIGGER: if (get_user(res, p)) return -EFAULT; return snd_pcm_oss_set_trigger(pcm_oss_file, res); case SNDCTL_DSP_GETIPTR: case SNDCTL_DSP_GETOPTR: return snd_pcm_oss_get_ptr(pcm_oss_file, cmd == SNDCTL_DSP_GETIPTR ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK, (struct count_info __user *) arg); case SNDCTL_DSP_MAPINBUF: case SNDCTL_DSP_MAPOUTBUF: return snd_pcm_oss_get_mapbuf(pcm_oss_file, cmd == SNDCTL_DSP_MAPINBUF ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK, (struct buffmem_desc __user *) arg); case SNDCTL_DSP_SETSYNCRO: /* stop DMA now.. */ return 0; case SNDCTL_DSP_SETDUPLEX: if (snd_pcm_oss_get_caps(pcm_oss_file) & DSP_CAP_DUPLEX) return 0; return -EIO; case SNDCTL_DSP_GETODELAY: res = snd_pcm_oss_get_odelay(pcm_oss_file); if (res < 0) { /* it's for sure, some broken apps don't check for error codes */ put_user(0, p); return res; } return put_user(res, p); case SNDCTL_DSP_PROFILE: return 0; /* silently ignore */ default: pr_debug("pcm_oss: unknown command = 0x%x\n", cmd); } return -EINVAL; } #ifdef CONFIG_COMPAT /* all compatible */ static long snd_pcm_oss_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { /* * Everything is compatbile except SNDCTL_DSP_MAPINBUF/SNDCTL_DSP_MAPOUTBUF, * which are not implemented for the native case either */ return snd_pcm_oss_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); } #else #define snd_pcm_oss_ioctl_compat NULL #endif static ssize_t snd_pcm_oss_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; pcm_oss_file = file->private_data; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; if (substream == NULL) return -ENXIO; substream->f_flags = file->f_flags & O_NONBLOCK; #ifndef OSS_DEBUG return snd_pcm_oss_read1(substream, buf, count); #else { ssize_t res = snd_pcm_oss_read1(substream, buf, count); pcm_dbg(substream->pcm, "pcm_oss: read %li bytes (returned %li bytes)\n", (long)count, (long)res); return res; } #endif } static ssize_t snd_pcm_oss_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; long result; pcm_oss_file = file->private_data; substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream == NULL) return -ENXIO; substream->f_flags = file->f_flags & O_NONBLOCK; result = snd_pcm_oss_write1(substream, buf, count); #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "pcm_oss: write %li bytes (wrote %li bytes)\n", (long)count, (long)result); #endif return result; } static int snd_pcm_oss_playback_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return runtime->oss.prev_hw_ptr_period != get_hw_ptr_period(runtime); else return snd_pcm_playback_avail(runtime) >= runtime->oss.period_frames; } static int snd_pcm_oss_capture_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; if (atomic_read(&substream->mmap_count)) return runtime->oss.prev_hw_ptr_period != get_hw_ptr_period(runtime); else return snd_pcm_capture_avail(runtime) >= runtime->oss.period_frames; } static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait) { struct snd_pcm_oss_file *pcm_oss_file; __poll_t mask; struct snd_pcm_substream *psubstream = NULL, *csubstream = NULL; pcm_oss_file = file->private_data; psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; mask = 0; if (psubstream != NULL) { struct snd_pcm_runtime *runtime = psubstream->runtime; poll_wait(file, &runtime->sleep, wait); scoped_guard(pcm_stream_lock_irq, psubstream) { if (runtime->state != SNDRV_PCM_STATE_DRAINING && (runtime->state != SNDRV_PCM_STATE_RUNNING || snd_pcm_oss_playback_ready(psubstream))) mask |= EPOLLOUT | EPOLLWRNORM; } } if (csubstream != NULL) { struct snd_pcm_runtime *runtime = csubstream->runtime; snd_pcm_state_t ostate; poll_wait(file, &runtime->sleep, wait); scoped_guard(pcm_stream_lock_irq, csubstream) { ostate = runtime->state; if (ostate != SNDRV_PCM_STATE_RUNNING || snd_pcm_oss_capture_ready(csubstream)) mask |= EPOLLIN | EPOLLRDNORM; } if (ostate != SNDRV_PCM_STATE_RUNNING && runtime->oss.trigger) { struct snd_pcm_oss_file ofile; memset(&ofile, 0, sizeof(ofile)); ofile.streams[SNDRV_PCM_STREAM_CAPTURE] = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; runtime->oss.trigger = 0; snd_pcm_oss_set_trigger(&ofile, PCM_ENABLE_INPUT); } } return mask; } static int snd_pcm_oss_mmap(struct file *file, struct vm_area_struct *area) { struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream = NULL; struct snd_pcm_runtime *runtime; int err; #ifdef OSS_DEBUG pr_debug("pcm_oss: mmap begin\n"); #endif pcm_oss_file = file->private_data; switch ((area->vm_flags & (VM_READ | VM_WRITE))) { case VM_READ | VM_WRITE: substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream) break; fallthrough; case VM_READ: substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; break; case VM_WRITE: substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; break; default: return -EINVAL; } /* set VM_READ access as well to fix memset() routines that do reads before writes (to improve performance) */ vm_flags_set(area, VM_READ); if (substream == NULL) return -ENXIO; runtime = substream->runtime; if (!(runtime->info & SNDRV_PCM_INFO_MMAP_VALID)) return -EIO; if (runtime->info & SNDRV_PCM_INFO_INTERLEAVED) runtime->access = SNDRV_PCM_ACCESS_MMAP_INTERLEAVED; else return -EIO; if (runtime->oss.params) { /* use mutex_trylock() for params_lock for avoiding a deadlock * between mmap_lock and params_lock taken by * copy_from/to_user() in snd_pcm_oss_write/read() */ err = snd_pcm_oss_change_params(substream, true); if (err < 0) return err; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS if (runtime->oss.plugin_first != NULL) return -EIO; #endif if (area->vm_pgoff != 0) return -EINVAL; err = snd_pcm_mmap_data(substream, file, area); if (err < 0) return err; runtime->oss.mmap_bytes = area->vm_end - area->vm_start; runtime->silence_threshold = 0; runtime->silence_size = 0; #ifdef OSS_DEBUG pr_debug("pcm_oss: mmap ok, bytes = 0x%x\n", runtime->oss.mmap_bytes); #endif /* In mmap mode we never stop */ runtime->stop_threshold = runtime->boundary; return 0; } #ifdef CONFIG_SND_VERBOSE_PROCFS /* * /proc interface */ static void snd_pcm_oss_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_str *pstr = entry->private_data; struct snd_pcm_oss_setup *setup = pstr->oss.setup_list; guard(mutex)(&pstr->oss.setup_mutex); while (setup) { snd_iprintf(buffer, "%s %u %u%s%s%s%s%s%s\n", setup->task_name, setup->periods, setup->period_size, setup->disable ? " disable" : "", setup->direct ? " direct" : "", setup->block ? " block" : "", setup->nonblock ? " non-block" : "", setup->partialfrag ? " partial-frag" : "", setup->nosilence ? " no-silence" : ""); setup = setup->next; } } static void snd_pcm_oss_proc_free_setup_list(struct snd_pcm_str * pstr) { struct snd_pcm_oss_setup *setup, *setupn; for (setup = pstr->oss.setup_list, pstr->oss.setup_list = NULL; setup; setup = setupn) { setupn = setup->next; kfree(setup->task_name); kfree(setup); } pstr->oss.setup_list = NULL; } static void snd_pcm_oss_proc_write(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_str *pstr = entry->private_data; char line[128], str[32], task_name[32]; const char *ptr; int idx1; struct snd_pcm_oss_setup *setup, *setup1, template; while (!snd_info_get_line(buffer, line, sizeof(line))) { guard(mutex)(&pstr->oss.setup_mutex); memset(&template, 0, sizeof(template)); ptr = snd_info_get_str(task_name, line, sizeof(task_name)); if (!strcmp(task_name, "clear") || !strcmp(task_name, "erase")) { snd_pcm_oss_proc_free_setup_list(pstr); continue; } for (setup = pstr->oss.setup_list; setup; setup = setup->next) { if (!strcmp(setup->task_name, task_name)) { template = *setup; break; } } ptr = snd_info_get_str(str, ptr, sizeof(str)); template.periods = simple_strtoul(str, NULL, 10); ptr = snd_info_get_str(str, ptr, sizeof(str)); template.period_size = simple_strtoul(str, NULL, 10); for (idx1 = 31; idx1 >= 0; idx1--) if (template.period_size & (1 << idx1)) break; for (idx1--; idx1 >= 0; idx1--) template.period_size &= ~(1 << idx1); do { ptr = snd_info_get_str(str, ptr, sizeof(str)); if (!strcmp(str, "disable")) { template.disable = 1; } else if (!strcmp(str, "direct")) { template.direct = 1; } else if (!strcmp(str, "block")) { template.block = 1; } else if (!strcmp(str, "non-block")) { template.nonblock = 1; } else if (!strcmp(str, "partial-frag")) { template.partialfrag = 1; } else if (!strcmp(str, "no-silence")) { template.nosilence = 1; } else if (!strcmp(str, "buggy-ptr")) { template.buggyptr = 1; } } while (*str); if (setup == NULL) { setup = kmalloc(sizeof(*setup), GFP_KERNEL); if (! setup) { buffer->error = -ENOMEM; return; } if (pstr->oss.setup_list == NULL) pstr->oss.setup_list = setup; else { for (setup1 = pstr->oss.setup_list; setup1->next; setup1 = setup1->next); setup1->next = setup; } template.task_name = kstrdup(task_name, GFP_KERNEL); if (! template.task_name) { kfree(setup); buffer->error = -ENOMEM; return; } } *setup = template; } } static void snd_pcm_oss_proc_init(struct snd_pcm *pcm) { int stream; for (stream = 0; stream < 2; ++stream) { struct snd_info_entry *entry; struct snd_pcm_str *pstr = &pcm->streams[stream]; if (pstr->substream_count == 0) continue; entry = snd_info_create_card_entry(pcm->card, "oss", pstr->proc_root); if (entry) { entry->content = SNDRV_INFO_CONTENT_TEXT; entry->mode = S_IFREG | 0644; entry->c.text.read = snd_pcm_oss_proc_read; entry->c.text.write = snd_pcm_oss_proc_write; entry->private_data = pstr; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } } pstr->oss.proc_entry = entry; } } static void snd_pcm_oss_proc_done(struct snd_pcm *pcm) { int stream; for (stream = 0; stream < 2; ++stream) { struct snd_pcm_str *pstr = &pcm->streams[stream]; snd_info_free_entry(pstr->oss.proc_entry); pstr->oss.proc_entry = NULL; snd_pcm_oss_proc_free_setup_list(pstr); } } #else /* !CONFIG_SND_VERBOSE_PROCFS */ static inline void snd_pcm_oss_proc_init(struct snd_pcm *pcm) { } static inline void snd_pcm_oss_proc_done(struct snd_pcm *pcm) { } #endif /* CONFIG_SND_VERBOSE_PROCFS */ /* * ENTRY functions */ static const struct file_operations snd_pcm_oss_f_reg = { .owner = THIS_MODULE, .read = snd_pcm_oss_read, .write = snd_pcm_oss_write, .open = snd_pcm_oss_open, .release = snd_pcm_oss_release, .poll = snd_pcm_oss_poll, .unlocked_ioctl = snd_pcm_oss_ioctl, .compat_ioctl = snd_pcm_oss_ioctl_compat, .mmap = snd_pcm_oss_mmap, }; static void register_oss_dsp(struct snd_pcm *pcm, int index) { if (snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM, pcm->card, index, &snd_pcm_oss_f_reg, pcm) < 0) { pcm_err(pcm, "unable to register OSS PCM device %i:%i\n", pcm->card->number, pcm->device); } } static int snd_pcm_oss_register_minor(struct snd_pcm *pcm) { pcm->oss.reg = 0; if (dsp_map[pcm->card->number] == (int)pcm->device) { char name[128]; int duplex; register_oss_dsp(pcm, 0); duplex = (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream_count > 0 && pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream_count && !(pcm->info_flags & SNDRV_PCM_INFO_HALF_DUPLEX)); sprintf(name, "%s%s", pcm->name, duplex ? " (DUPLEX)" : ""); #ifdef SNDRV_OSS_INFO_DEV_AUDIO snd_oss_info_register(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number, name); #endif pcm->oss.reg++; pcm->oss.reg_mask |= 1; } if (adsp_map[pcm->card->number] == (int)pcm->device) { register_oss_dsp(pcm, 1); pcm->oss.reg++; pcm->oss.reg_mask |= 2; } if (pcm->oss.reg) snd_pcm_oss_proc_init(pcm); return 0; } static int snd_pcm_oss_disconnect_minor(struct snd_pcm *pcm) { if (pcm->oss.reg) { if (pcm->oss.reg_mask & 1) { pcm->oss.reg_mask &= ~1; snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM, pcm->card, 0); } if (pcm->oss.reg_mask & 2) { pcm->oss.reg_mask &= ~2; snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM, pcm->card, 1); } if (dsp_map[pcm->card->number] == (int)pcm->device) { #ifdef SNDRV_OSS_INFO_DEV_AUDIO snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number); #endif } pcm->oss.reg = 0; } return 0; } static int snd_pcm_oss_unregister_minor(struct snd_pcm *pcm) { snd_pcm_oss_disconnect_minor(pcm); snd_pcm_oss_proc_done(pcm); return 0; } static struct snd_pcm_notify snd_pcm_oss_notify = { .n_register = snd_pcm_oss_register_minor, .n_disconnect = snd_pcm_oss_disconnect_minor, .n_unregister = snd_pcm_oss_unregister_minor, }; static int __init alsa_pcm_oss_init(void) { int i; int err; /* check device map table */ for (i = 0; i < SNDRV_CARDS; i++) { if (dsp_map[i] < 0 || dsp_map[i] >= SNDRV_PCM_DEVICES) { pr_err("ALSA: pcm_oss: invalid dsp_map[%d] = %d\n", i, dsp_map[i]); dsp_map[i] = 0; } if (adsp_map[i] < 0 || adsp_map[i] >= SNDRV_PCM_DEVICES) { pr_err("ALSA: pcm_oss: invalid adsp_map[%d] = %d\n", i, adsp_map[i]); adsp_map[i] = 1; } } err = snd_pcm_notify(&snd_pcm_oss_notify, 0); if (err < 0) return err; return 0; } static void __exit alsa_pcm_oss_exit(void) { snd_pcm_notify(&snd_pcm_oss_notify, 1); } module_init(alsa_pcm_oss_init) module_exit(alsa_pcm_oss_exit) |
2 1 1 1 3 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 235 234 98 98 30241 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net> * * Based on the original implementation which is: * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * * Parts of the original code have been moved to arch/x86/vdso/vma.c * * This file implements vsyscall emulation. vsyscalls are a legacy ABI: * Userspace can request certain kernel services by calling fixed * addresses. This concept is problematic: * * - It interferes with ASLR. * - It's awkward to write code that lives in kernel addresses but is * callable by userspace at fixed addresses. * - The whole concept is impossible for 32-bit compat userspace. * - UML cannot easily virtualize a vsyscall. * * As of mid-2014, I believe that there is no new userspace code that * will use a vsyscall if the vDSO is present. I hope that there will * soon be no new userspace code that will ever use a vsyscall. * * The code in this file emulates vsyscalls when notified of a page * fault to a vsyscall address. */ #include <linux/kernel.h> #include <linux/timer.h> #include <linux/sched/signal.h> #include <linux/mm_types.h> #include <linux/syscalls.h> #include <linux/ratelimit.h> #include <asm/vsyscall.h> #include <asm/unistd.h> #include <asm/fixmap.h> #include <asm/traps.h> #include <asm/paravirt.h> #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = #ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; #elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) XONLY; #else #error VSYSCALL config is broken #endif static int __init vsyscall_setup(char *str) { if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; else if (!strcmp("xonly", str)) vsyscall_mode = XONLY; else if (!strcmp("none", str)) vsyscall_mode = NONE; else return -EINVAL; return 0; } return -EINVAL; } early_param("vsyscall", vsyscall_setup); static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, const char *message) { if (!show_unhandled_signals) return; printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n", level, current->comm, task_pid_nr(current), message, regs->ip, regs->cs, regs->sp, regs->ax, regs->si, regs->di); } static int addr_to_vsyscall_nr(unsigned long addr) { int nr; if ((addr & ~0xC00UL) != VSYSCALL_ADDR) return -EINVAL; nr = (addr & 0xC00UL) >> 10; if (nr >= 3) return -EINVAL; return nr; } static bool write_ok_or_segv(unsigned long ptr, size_t size) { if (!access_ok((void __user *)ptr, size)) { struct thread_struct *thread = ¤t->thread; thread->error_code = X86_PF_USER | X86_PF_WRITE; thread->cr2 = ptr; thread->trap_nr = X86_TRAP_PF; force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr); return false; } else { return true; } } bool emulate_vsyscall(unsigned long error_code, struct pt_regs *regs, unsigned long address) { unsigned long caller; int vsyscall_nr, syscall_nr, tmp; long ret; unsigned long orig_dx; /* Write faults or kernel-privilege faults never get fixed up. */ if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) return false; if (!(error_code & X86_PF_INSTR)) { /* Failed vsyscall read */ if (vsyscall_mode == EMULATE) return false; /* * User code tried and failed to read the vsyscall page. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); return false; } /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. */ WARN_ON_ONCE(address != regs->ip); if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); return false; } vsyscall_nr = addr_to_vsyscall_nr(address); trace_emulate_vsyscall(vsyscall_nr); if (vsyscall_nr < 0) { warn_bad_vsyscall(KERN_WARNING, regs, "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); goto sigsegv; } if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { warn_bad_vsyscall(KERN_WARNING, regs, "vsyscall with bad stack (exploit attempt?)"); goto sigsegv; } /* * Check for access_ok violations and find the syscall nr. * * NULL is a valid user pointer (in the access_ok sense) on 32-bit and * 64-bit, so we don't need to special-case it here. For all the * vsyscalls, NULL means "don't write anything" not "write it at * address 0". */ switch (vsyscall_nr) { case 0: if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) || !write_ok_or_segv(regs->si, sizeof(struct timezone))) { ret = -EFAULT; goto check_fault; } syscall_nr = __NR_gettimeofday; break; case 1: if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) { ret = -EFAULT; goto check_fault; } syscall_nr = __NR_time; break; case 2: if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || !write_ok_or_segv(regs->si, sizeof(unsigned))) { ret = -EFAULT; goto check_fault; } syscall_nr = __NR_getcpu; break; } /* * Handle seccomp. regs->ip must be the original value. * See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst. * * We could optimize the seccomp disabled case, but performance * here doesn't matter. */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; tmp = secure_computing(); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); force_exit_sig(SIGSYS); return true; } regs->orig_ax = -1; if (tmp) goto do_ret; /* skip requested */ /* * With a real vsyscall, page faults cause SIGSEGV. */ ret = -EFAULT; switch (vsyscall_nr) { case 0: /* this decodes regs->di and regs->si on its own */ ret = __x64_sys_gettimeofday(regs); break; case 1: /* this decodes regs->di on its own */ ret = __x64_sys_time(regs); break; case 2: /* while we could clobber regs->dx, we didn't in the past... */ orig_dx = regs->dx; regs->dx = 0; /* this decodes regs->di, regs->si and regs->dx on its own */ ret = __x64_sys_getcpu(regs); regs->dx = orig_dx; break; } check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); goto sigsegv; } regs->ax = ret; do_ret: /* Emulate a ret instruction. */ regs->ip = caller; regs->sp += 8; return true; sigsegv: force_sig(SIGSEGV); return true; } /* * A pseudo VMA to allow ptrace access for the vsyscall page. This only * covers the 64bit vsyscall page now. 32bit has a real VMA now and does * not need special handling anymore: */ static const char *gate_vma_name(struct vm_area_struct *vma) { return "[vsyscall]"; } static const struct vm_operations_struct gate_vma_ops = { .name = gate_vma_name, }; static struct vm_area_struct gate_vma __ro_after_init = { .vm_start = VSYSCALL_ADDR, .vm_end = VSYSCALL_ADDR + PAGE_SIZE, .vm_page_prot = PAGE_READONLY_EXEC, .vm_flags = VM_READ | VM_EXEC, .vm_ops = &gate_vma_ops, }; struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_COMPAT if (!mm || !test_bit(MM_CONTEXT_HAS_VSYSCALL, &mm->context.flags)) return NULL; #endif if (vsyscall_mode == NONE) return NULL; return &gate_vma; } int in_gate_area(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma = get_gate_vma(mm); if (!vma) return 0; return (addr >= vma->vm_start) && (addr < vma->vm_end); } /* * Use this when you have no reliable mm, typically from interrupt * context. It is less reliable than using a task's mm and may give * false positives. */ int in_gate_area_no_mm(unsigned long addr) { return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } /* * The VSYSCALL page is the only user-accessible page in the kernel address * range. Normally, the kernel page tables can have _PAGE_USER clear, but * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls * are enabled. * * Some day we may create a "minimal" vsyscall mode in which we emulate * vsyscalls but leave the page not present. If so, we skip calling * this. */ void __init set_vsyscall_pgtable_user_bits(pgd_t *root) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER)); #endif pud = pud_offset(p4d, VSYSCALL_ADDR); set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); pmd = pmd_offset(pud, VSYSCALL_ADDR); set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER)); } void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); /* * For full emulation, the page needs to exist for real. In * execute-only mode, there is no PTE at all backing the vsyscall * page. */ if (vsyscall_mode == EMULATE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } if (vsyscall_mode == XONLY) vm_flags_init(&gate_vma, VM_EXEC); BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } |
36 36 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-mul_1.c - MPI helper functions * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" #include "longlong.h" mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t cy_limb; mpi_size_t j; mpi_limb_t prod_high, prod_low; /* The loop counter and index J goes from -S1_SIZE to -1. This way * the loop becomes faster. */ j = -s1_size; /* Offset the base pointers to compensate for the negative indices. */ s1_ptr -= j; res_ptr -= j; cy_limb = 0; do { umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); prod_low += cy_limb; cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; res_ptr[j] = prod_low; } while (++j); return cy_limb; } |
5 9 5 4 4 9 6 3 2 9 7 9 4 9 4 4 4 4 4 4 4 4 4 4 4 4 4 3 1 5 5 5 5 5 4 1 4 4 3 1 2 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> static int get_ifindex(const struct net_device *dev) { return dev ? dev->ifindex : 0; } static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, const struct nft_pktinfo *pkt, const struct net_device *dev, struct ipv6hdr *iph) { int lookup_flags = 0; if (priv->flags & NFTA_FIB_F_DADDR) { fl6->daddr = iph->daddr; fl6->saddr = iph->saddr; } else { if (nft_hook(pkt) == NF_INET_FORWARD && priv->flags & NFTA_FIB_F_IIF) fl6->flowi6_iif = nft_out(pkt)->ifindex; fl6->daddr = iph->saddr; fl6->saddr = iph->daddr; } if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); } else if (priv->flags & NFTA_FIB_F_IIF) { fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); } if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; if (priv->flags & NFTA_FIB_F_MARK) fl6->flowi6_mark = pkt->skb->mark; fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; return lookup_flags; } static u32 __nft_fib6_eval_type(const struct nft_fib *priv, const struct nft_pktinfo *pkt, struct ipv6hdr *iph) { const struct net_device *dev = NULL; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; u32 ret = 0; if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; if (rt->rt6i_flags & RTF_REJECT) { route_err = rt->dst.error; dst_release(&rt->dst); goto err; } if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) ret = RTN_ANYCAST; else if (!dev && rt->rt6i_flags & RTF_LOCAL) ret = RTN_LOCAL; dst_release(&rt->dst); if (ret) return ret; addrtype = ipv6_addr_type(&fl6.daddr); if (addrtype & IPV6_ADDR_MULTICAST) return RTN_MULTICAST; if (addrtype & IPV6_ADDR_UNICAST) return RTN_UNICAST; return RTN_UNSPEC; err: switch (route_err) { case -EINVAL: return RTN_BLACKHOLE; case -EACCES: return RTN_PROHIBIT; case -EAGAIN: return RTN_THROW; default: break; } return RTN_UNREACHABLE; } void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; struct ipv6hdr *iph, _iph; iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } *dest = __nft_fib6_eval_type(priv, pkt, iph); } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph) { if (likely(next != IPPROTO_ICMPV6)) return false; if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY) return false; return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; } void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; struct ipv6hdr *iph, _iph; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; struct rt6_info *rt; int lookup_flags; if (priv->flags & NFTA_FIB_F_IIF) oif = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) oif = nft_out(pkt); iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); if (nft_hook(pkt) == NF_INET_PRE_ROUTING || nft_hook(pkt) == NF_INET_INGRESS) { if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { nft_fib_store_result(dest, priv, nft_in(pkt)); return; } } *dest = 0; rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, lookup_flags); if (rt->dst.error) goto put_rt_err; /* Should not see RTF_LOCAL here */ if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; if (oif && oif != rt->rt6i_idev->dev && l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) goto put_rt_err; nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); put_rt_err: ip6_rt_put(rt); } EXPORT_SYMBOL_GPL(nft_fib6_eval); static struct nft_expr_type nft_fib6_type; static const struct nft_expr_ops nft_fib6_type_ops = { .type = &nft_fib6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib6_eval_type, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib6_ops = { .type = &nft_fib6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib6_eval, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * nft_fib6_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { enum nft_fib_result result; if (!tb[NFTA_FIB_RESULT]) return ERR_PTR(-EINVAL); result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (result) { case NFT_FIB_RESULT_OIF: return &nft_fib6_ops; case NFT_FIB_RESULT_OIFNAME: return &nft_fib6_ops; case NFT_FIB_RESULT_ADDRTYPE: return &nft_fib6_type_ops; default: return ERR_PTR(-EOPNOTSUPP); } } static struct nft_expr_type nft_fib6_type __read_mostly = { .name = "fib", .select_ops = nft_fib6_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV6, .owner = THIS_MODULE, }; static int __init nft_fib6_module_init(void) { return nft_register_expr(&nft_fib6_type); } static void __exit nft_fib6_module_exit(void) { nft_unregister_expr(&nft_fib6_type); } module_init(nft_fib6_module_init); module_exit(nft_fib6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); MODULE_DESCRIPTION("nftables fib / ipv6 route lookup support"); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C)2006 USAGI/WIDE Project * * Author: * Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com> * * Based on net/netfilter/xt_tcpudp.c */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/mip6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6t_mh.h> MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); MODULE_LICENSE("GPL"); /* Returns 1 if the type is matched by the range, 0 otherwise */ static inline bool type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) { return (type >= min && type <= max) ^ invert; } static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct ip6_mh _mh; const struct ip6_mh *mh; const struct ip6t_mh *mhinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh); if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil MH tinygram.\n"); par->hotdrop = true; return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { pr_debug("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); par->hotdrop = true; return false; } return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); } static int mh_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_mh *mhinfo = par->matchinfo; /* Must specify no unknown invflags */ return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0; } static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", .family = NFPROTO_IPV6, .checkentry = mh_mt6_check, .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), .proto = IPPROTO_MH, .me = THIS_MODULE, }; static int __init mh_mt6_init(void) { return xt_register_match(&mh_mt6_reg); } static void __exit mh_mt6_exit(void) { xt_unregister_match(&mh_mt6_reg); } module_init(mh_mt6_init); module_exit(mh_mt6_exit); |
2 68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IEEE802154_CORE_H #define __IEEE802154_CORE_H #include <net/cfg802154.h> struct cfg802154_registered_device { const struct cfg802154_ops *ops; struct list_head list; /* wpan_phy index, internal only */ int wpan_phy_idx; /* also protected by devlist_mtx */ int opencount; wait_queue_head_t dev_wait; /* protected by RTNL only */ int num_running_ifaces; /* associated wpan interfaces, protected by rtnl or RCU */ struct list_head wpan_dev_list; int devlist_generation, wpan_dev_id; /* must be last because of the way we do wpan_phy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wpan_phy wpan_phy __aligned(NETDEV_ALIGN); }; static inline struct cfg802154_registered_device * wpan_phy_to_rdev(struct wpan_phy *wpan_phy) { BUG_ON(!wpan_phy); return container_of(wpan_phy, struct cfg802154_registered_device, wpan_phy); } extern struct list_head cfg802154_rdev_list; extern int cfg802154_rdev_list_generation; int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, struct net *net); /* free object */ void cfg802154_dev_free(struct cfg802154_registered_device *rdev); struct cfg802154_registered_device * cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx); struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx); #endif /* __IEEE802154_CORE_H */ |
215 2 4 161 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_BLOCK_GROUP_H #define BTRFS_BLOCK_GROUP_H #include <linux/atomic.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/refcount.h> #include <linux/wait.h> #include <linux/sizes.h> #include <linux/rwsem.h> #include <linux/rbtree.h> #include <uapi/linux/btrfs_tree.h> #include "free-space-cache.h" struct btrfs_chunk_map; struct btrfs_fs_info; struct btrfs_inode; struct btrfs_trans_handle; enum btrfs_disk_cache_state { BTRFS_DC_WRITTEN, BTRFS_DC_ERROR, BTRFS_DC_CLEAR, BTRFS_DC_SETUP, }; enum btrfs_block_group_size_class { /* Unset */ BTRFS_BG_SZ_NONE, /* 0 < size <= 128K */ BTRFS_BG_SZ_SMALL, /* 128K < size <= 8M */ BTRFS_BG_SZ_MEDIUM, /* 8M < size < BG_LENGTH */ BTRFS_BG_SZ_LARGE, }; /* * This describes the state of the block_group for async discard. This is due * to the two pass nature of it where extent discarding is prioritized over * bitmap discarding. BTRFS_DISCARD_RESET_CURSOR is set when we are resetting * between lists to prevent contention for discard state variables * (eg. discard_cursor). */ enum btrfs_discard_state { BTRFS_DISCARD_EXTENTS, BTRFS_DISCARD_BITMAPS, BTRFS_DISCARD_RESET_CURSOR, }; /* * Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to * only allocate a chunk if we really need one. * * CHUNK_ALLOC_LIMITED means to only try and allocate one if we have very few * chunks already allocated. This is used as part of the clustering code to * help make sure we have a good pool of storage to cluster in, without filling * the FS with empty chunks * * CHUNK_ALLOC_FORCE means it must try to allocate one * * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from * find_free_extent() that also activaes the zone */ enum btrfs_chunk_alloc_enum { CHUNK_ALLOC_NO_FORCE, CHUNK_ALLOC_LIMITED, CHUNK_ALLOC_FORCE, CHUNK_ALLOC_FORCE_FOR_EXTENT, }; /* Block group flags set at runtime */ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_IREF, BLOCK_GROUP_FLAG_REMOVED, BLOCK_GROUP_FLAG_TO_COPY, BLOCK_GROUP_FLAG_RELOCATING_REPAIR, BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED, BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, /* Does the block group need to be added to the free space tree? */ BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, /* * Indicate that block group is in the list of new block groups of a * transaction. */ BLOCK_GROUP_FLAG_NEW, }; enum btrfs_caching_type { BTRFS_CACHE_NO, BTRFS_CACHE_STARTED, BTRFS_CACHE_FINISHED, BTRFS_CACHE_ERROR, }; struct btrfs_caching_control { struct list_head list; struct mutex mutex; wait_queue_head_t wait; struct btrfs_work work; struct btrfs_block_group *block_group; /* Track progress of caching during allocation. */ atomic_t progress; refcount_t count; }; /* Once caching_thread() finds this much free space, it will wake up waiters. */ #define CACHING_CTL_WAKE_UP SZ_2M struct btrfs_block_group { struct btrfs_fs_info *fs_info; struct btrfs_inode *inode; spinlock_t lock; u64 start; u64 length; u64 pinned; u64 reserved; u64 used; u64 delalloc_bytes; u64 bytes_super; u64 flags; u64 cache_generation; u64 global_root_id; /* * The last committed used bytes of this block group, if the above @used * is still the same as @commit_used, we don't need to update block * group item of this block group. */ u64 commit_used; /* * If the free space extent count exceeds this number, convert the block * group to bitmaps. */ u32 bitmap_high_thresh; /* * If the free space extent count drops below this number, convert the * block group back to extents. */ u32 bitmap_low_thresh; /* * It is just used for the delayed data space allocation because * only the data space allocation and the relative metadata update * can be done cross the transaction. */ struct rw_semaphore data_rwsem; /* For raid56, this is a full stripe, without parity */ unsigned long full_stripe_len; unsigned long runtime_flags; unsigned int ro; int disk_cache_state; /* Cache tracking stuff */ int cached; struct btrfs_caching_control *caching_ctl; struct btrfs_space_info *space_info; /* Free space cache stuff */ struct btrfs_free_space_ctl *free_space_ctl; /* Block group cache stuff */ struct rb_node cache_node; /* For block groups in the same raid type */ struct list_head list; refcount_t refs; /* * List of struct btrfs_free_clusters for this block group. * Today it will only have one thing on it, but that may change */ struct list_head cluster_list; /* * Used for several lists: * * 1) struct btrfs_fs_info::unused_bgs * 2) struct btrfs_fs_info::reclaim_bgs * 3) struct btrfs_transaction::deleted_bgs * 4) struct btrfs_trans_handle::new_bgs */ struct list_head bg_list; /* For read-only block groups */ struct list_head ro_list; /* * When non-zero it means the block group's logical address and its * device extents can not be reused for future block group allocations * until the counter goes down to 0. This is to prevent them from being * reused while some task is still using the block group after it was * deleted - we want to make sure they can only be reused for new block * groups after that task is done with the deleted block group. */ atomic_t frozen; /* For discard operations */ struct list_head discard_list; int discard_index; u64 discard_eligible_time; u64 discard_cursor; enum btrfs_discard_state discard_state; /* For dirty block groups */ struct list_head dirty_list; struct list_head io_list; struct btrfs_io_ctl io_ctl; /* * Incremented when doing extent allocations and holding a read lock * on the space_info's groups_sem semaphore. * Decremented when an ordered extent that represents an IO against this * block group's range is created (after it's added to its inode's * root's list of ordered extents) or immediately after the allocation * if it's a metadata extent or fallocate extent (for these cases we * don't create ordered extents). */ atomic_t reservations; /* * Incremented while holding the spinlock *lock* by a task checking if * it can perform a nocow write (incremented if the value for the *ro* * field is 0). Decremented by such tasks once they create an ordered * extent or before that if some error happens before reaching that step. * This is to prevent races between block group relocation and nocow * writes through direct IO. */ atomic_t nocow_writers; /* Lock for free space tree operations. */ struct mutex free_space_lock; /* * Number of extents in this block group used for swap files. * All accesses protected by the spinlock 'lock'. */ int swap_extents; /* * Allocation offset for the block group to implement sequential * allocation. This is used only on a zoned filesystem. */ u64 alloc_offset; u64 zone_unusable; u64 zone_capacity; u64 meta_write_pointer; struct btrfs_chunk_map *physical_map; struct list_head active_bg_list; struct work_struct zone_finish_work; struct extent_buffer *last_eb; enum btrfs_block_group_size_class size_class; u64 reclaim_mark; }; static inline u64 btrfs_block_group_end(const struct btrfs_block_group *block_group) { return (block_group->start + block_group->length); } static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg) { lockdep_assert_held(&bg->lock); return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0); } static inline bool btrfs_is_block_group_data_only(const struct btrfs_block_group *block_group) { /* * In mixed mode the fragmentation is expected to be high, lowering the * efficiency, so only proper data block groups are considered. */ return (block_group->flags & BTRFS_BLOCK_GROUP_DATA) && !(block_group->flags & BTRFS_BLOCK_GROUP_METADATA); } #ifdef CONFIG_BTRFS_DEBUG int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group); #endif struct btrfs_block_group *btrfs_lookup_first_block_group( struct btrfs_fs_info *info, u64 bytenr); struct btrfs_block_group *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr); struct btrfs_block_group *btrfs_next_block_group( struct btrfs_block_group *cache); void btrfs_get_block_group(struct btrfs_block_group *cache); void btrfs_put_block_group(struct btrfs_block_group *cache); void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, const u64 start); void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg); struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr); void btrfs_dec_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes); int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end, u64 *total_added_ret); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map); void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); void btrfs_mark_bg_unused(struct btrfs_block_group *bg); void btrfs_reclaim_bgs_work(struct work_struct *work); void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info); void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg); int btrfs_read_block_groups(struct btrfs_fs_info *info); struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 type, u64 chunk_offset, u64 size); void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans); int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, bool do_chunk_alloc); void btrfs_dec_block_group_ro(struct btrfs_block_group *cache); int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_setup_space_cache(struct btrfs_trans_handle *trans); int btrfs_update_block_group(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, bool alloc); int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, u64 ram_bytes, u64 num_bytes, int delalloc, bool force_wrong_size_class); void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes, int delalloc); int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, enum btrfs_chunk_alloc_enum force); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type); void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type); void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans, bool is_item_insertion); u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 **logical, int *naddrs, int *stripe_len); static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info) { return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA); } static inline u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info) { return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA); } static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info) { return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); } static inline int btrfs_block_group_done(const struct btrfs_block_group *cache) { smp_mb(); return cache->cached == BTRFS_CACHE_FINISHED || cache->cached == BTRFS_CACHE_ERROR; } void btrfs_freeze_block_group(struct btrfs_block_group *cache); void btrfs_unfreeze_block_group(struct btrfs_block_group *cache); bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg); void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount); enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size); int btrfs_use_block_group_size_class(struct btrfs_block_group *bg, enum btrfs_block_group_size_class size_class, bool force_wrong_size_class); bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg); #endif /* BTRFS_BLOCK_GROUP_H */ |
167 167 167 167 167 167 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2019 Christoph Hellwig. */ #include "xfs.h" static inline unsigned int bio_max_vecs(unsigned int count) { return bio_max_segs(howmany(count, PAGE_SIZE)); } int xfs_rw_bdev( struct block_device *bdev, sector_t sector, unsigned int count, char *data, enum req_op op) { unsigned int is_vmalloc = is_vmalloc_addr(data); unsigned int left = count; int error; struct bio *bio; if (is_vmalloc && op == REQ_OP_WRITE) flush_kernel_vmap_range(data, count); bio = bio_alloc(bdev, bio_max_vecs(left), op | REQ_META | REQ_SYNC, GFP_KERNEL); bio->bi_iter.bi_sector = sector; do { struct page *page = kmem_to_page(data); unsigned int off = offset_in_page(data); unsigned int len = min_t(unsigned, left, PAGE_SIZE - off); while (bio_add_page(bio, page, len, off) != len) { struct bio *prev = bio; bio = bio_alloc(prev->bi_bdev, bio_max_vecs(left), prev->bi_opf, GFP_KERNEL); bio->bi_iter.bi_sector = bio_end_sector(prev); bio_chain(prev, bio); submit_bio(prev); } data += len; left -= len; } while (left > 0); error = submit_bio_wait(bio); bio_put(bio); if (is_vmalloc && op == REQ_OP_READ) invalidate_kernel_vmap_range(data, count); return error; } |
233 6 6 6 6 6 6 6 16 16 13 13 11 2 14 1 14 3 7 3 3 3 2 3 2 14 12 14 15 4 16 14 9 13 12 12 13 9 13 9 1 1 9 8 8 2 1 13 10 13 13 8 10 9 9 9 9 9 1 16 14 16 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Generic address resolution entity * * Authors: * net_random Alan Cox * net_ratelimit Andi Kleen * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project * * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/inet.h> #include <linux/mm.h> #include <linux/net.h> #include <linux/string.h> #include <linux/types.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/ratelimit.h> #include <linux/socket.h> #include <net/sock.h> #include <net/net_ratelimit.h> #include <net/ipv6.h> #include <asm/byteorder.h> #include <linux/uaccess.h> DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { return __ratelimit(&net_ratelimit_state); } EXPORT_SYMBOL(net_ratelimit); /* * Convert an ASCII string to binary IP. * This is outside of net/ipv4/ because various code that uses IP addresses * is otherwise not dependent on the TCP/IP stack. */ __be32 in_aton(const char *str) { unsigned int l; unsigned int val; int i; l = 0; for (i = 0; i < 4; i++) { l <<= 8; if (*str != '\0') { val = 0; while (*str != '\0' && *str != '.' && *str != '\n') { val *= 10; val += *str - '0'; str++; } l |= val; if (*str != '\0') str++; } } return htonl(l); } EXPORT_SYMBOL(in_aton); #define IN6PTON_XDIGIT 0x00010000 #define IN6PTON_DIGIT 0x00020000 #define IN6PTON_COLON_MASK 0x00700000 #define IN6PTON_COLON_1 0x00100000 /* single : requested */ #define IN6PTON_COLON_2 0x00200000 /* second : requested */ #define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ #define IN6PTON_DOT 0x00800000 /* . */ #define IN6PTON_DELIM 0x10000000 #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 static inline int xdigit2bin(char c, int delim) { int val; if (c == delim || c == '\0') return IN6PTON_DELIM; if (c == ':') return IN6PTON_COLON_MASK; if (c == '.') return IN6PTON_DOT; val = hex_to_bin(c); if (val >= 0) return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0); if (delim == -1) return IN6PTON_DELIM; return IN6PTON_UNKNOWN; } /** * in4_pton - convert an IPv4 address from literal to binary representation * @src: the start of the IPv4 address string * @srclen: the length of the string, -1 means strlen(src) * @dst: the binary (u8[4] array) representation of the IPv4 address * @delim: the delimiter of the IPv4 address in @src, -1 means no delimiter * @end: A pointer to the end of the parsed string will be placed here * * Return one on success, return zero when any error occurs * and @end will point to the end of the parsed string. * */ int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s; u8 *d; u8 dbuf[4]; int ret = 0; int i; int w = 0; if (srclen < 0) srclen = strlen(src); s = src; d = dbuf; i = 0; while (1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) { goto out; } if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (w == 0) goto out; *d++ = w & 0xff; w = 0; i++; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (i != 4) goto out; break; } goto cont; } w = (w * 10) + c; if ((w & 0xffff) > 255) { goto out; } cont: if (i >= 4) goto out; s++; srclen--; } ret = 1; memcpy(dst, dbuf, sizeof(dbuf)); out: if (end) *end = s; return ret; } EXPORT_SYMBOL(in4_pton); /** * in6_pton - convert an IPv6 address from literal to binary representation * @src: the start of the IPv6 address string * @srclen: the length of the string, -1 means strlen(src) * @dst: the binary (u8[16] array) representation of the IPv6 address * @delim: the delimiter of the IPv6 address in @src, -1 means no delimiter * @end: A pointer to the end of the parsed string will be placed here * * Return one on success, return zero when any error occurs * and @end will point to the end of the parsed string. * */ int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s, *tok = NULL; u8 *d, *dc = NULL; u8 dbuf[16]; int ret = 0; int i; int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; int w = 0; memset(dbuf, 0, sizeof(dbuf)); s = src; d = dbuf; if (srclen < 0) srclen = strlen(src); while (1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & state)) goto out; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { /* process one 16-bit word */ if (!(state & IN6PTON_NULL)) { *d++ = (w >> 8) & 0xff; *d++ = w & 0xff; } w = 0; if (c & IN6PTON_DELIM) { /* We've processed last word */ break; } /* * COLON_1 => XDIGIT * COLON_2 => XDIGIT|DELIM * COLON_1_2 => COLON_2 */ switch (state & IN6PTON_COLON_MASK) { case IN6PTON_COLON_2: dc = d; state = IN6PTON_XDIGIT | IN6PTON_DELIM; if (dc - dbuf >= sizeof(dbuf)) state |= IN6PTON_NULL; break; case IN6PTON_COLON_1|IN6PTON_COLON_1_2: state = IN6PTON_XDIGIT | IN6PTON_COLON_2; break; case IN6PTON_COLON_1: state = IN6PTON_XDIGIT; break; case IN6PTON_COLON_1_2: state = IN6PTON_COLON_2; break; default: state = 0; } tok = s + 1; goto cont; } if (c & IN6PTON_DOT) { ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); if (ret > 0) { d += 4; break; } goto out; } w = (w << 4) | (0xff & c); state = IN6PTON_COLON_1 | IN6PTON_DELIM; if (!(w & 0xf000)) { state |= IN6PTON_XDIGIT; } if (!dc && d + 2 < dbuf + sizeof(dbuf)) { state |= IN6PTON_COLON_1_2; state &= ~IN6PTON_DELIM; } if (d + 2 >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); } cont: if ((dc && d + 4 < dbuf + sizeof(dbuf)) || d + 4 == dbuf + sizeof(dbuf)) { state |= IN6PTON_DOT; } if (d >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); } s++; srclen--; } i = 15; d--; if (dc) { while (d >= dc) dst[i--] = *d--; while (i >= dc - dbuf) dst[i--] = 0; while (i >= 0) dst[i--] = *d--; } else memcpy(dst, dbuf, sizeof(dbuf)); ret = 1; out: if (end) *end = s; return ret; } EXPORT_SYMBOL(in6_pton); static int inet4_pton(const char *src, u16 port_num, struct sockaddr_storage *addr) { struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; size_t srclen = strlen(src); if (srclen > INET_ADDRSTRLEN) return -EINVAL; if (in4_pton(src, srclen, (u8 *)&addr4->sin_addr.s_addr, '\n', NULL) == 0) return -EINVAL; addr4->sin_family = AF_INET; addr4->sin_port = htons(port_num); return 0; } static int inet6_pton(struct net *net, const char *src, u16 port_num, struct sockaddr_storage *addr) { struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; const char *scope_delim; size_t srclen = strlen(src); if (srclen > INET6_ADDRSTRLEN) return -EINVAL; if (in6_pton(src, srclen, (u8 *)&addr6->sin6_addr.s6_addr, '%', &scope_delim) == 0) return -EINVAL; if (ipv6_addr_type(&addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL && src + srclen != scope_delim && *scope_delim == '%') { struct net_device *dev; char scope_id[16]; size_t scope_len = min_t(size_t, sizeof(scope_id) - 1, src + srclen - scope_delim - 1); memcpy(scope_id, scope_delim + 1, scope_len); scope_id[scope_len] = '\0'; dev = dev_get_by_name(net, scope_id); if (dev) { addr6->sin6_scope_id = dev->ifindex; dev_put(dev); } else if (kstrtouint(scope_id, 0, &addr6->sin6_scope_id)) { return -EINVAL; } } addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(port_num); return 0; } /** * inet_pton_with_scope - convert an IPv4/IPv6 and port to socket address * @net: net namespace (used for scope handling) * @af: address family, AF_INET, AF_INET6 or AF_UNSPEC for either * @src: the start of the address string * @port: the start of the port string (or NULL for none) * @addr: output socket address * * Return zero on success, return errno when any error occurs. */ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af, const char *src, const char *port, struct sockaddr_storage *addr) { u16 port_num; int ret = -EINVAL; if (port) { if (kstrtou16(port, 0, &port_num)) return -EINVAL; } else { port_num = 0; } switch (af) { case AF_INET: ret = inet4_pton(src, port_num, addr); break; case AF_INET6: ret = inet6_pton(net, src, port_num, addr); break; case AF_UNSPEC: ret = inet4_pton(src, port_num, addr); if (ret) ret = inet6_pton(net, src, port_num, addr); break; default: pr_err("unexpected address family %d\n", af); } return ret; } EXPORT_SYMBOL(inet_pton_with_scope); bool inet_addr_is_any(struct sockaddr *addr) { if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr; const struct sockaddr_in6 in6_any = { .sin6_addr = IN6ADDR_ANY_INIT }; if (!memcmp(in6->sin6_addr.s6_addr, in6_any.sin6_addr.s6_addr, 16)) return true; } else if (addr->sa_family == AF_INET) { struct sockaddr_in *in = (struct sockaddr_in *)addr; if (in->sin_addr.s_addr == htonl(INADDR_ANY)) return true; } else { pr_warn("unexpected address family %u\n", addr->sa_family); } return false; } EXPORT_SYMBOL(inet_addr_is_any); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace4(sum, from, to); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) skb->csum = ~csum_add(csum_sub(~(skb->csum), (__force __wsum)from), (__force __wsum)to); } else if (pseudohdr) *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum), (__force __wsum)from), (__force __wsum)to)); } EXPORT_SYMBOL(inet_proto_csum_replace4); /** * inet_proto_csum_replace16 - update layer 4 header checksum field * @sum: Layer 4 header checksum field * @skb: sk_buff for the packet * @from: old IPv6 address * @to: new IPv6 address * @pseudohdr: True if layer 4 header checksum includes pseudoheader * * Update layer 4 header as per the update in IPv6 src/dst address. * * There is no need to update skb->csum in this function, because update in two * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to * update skb->csum, because update in 3 fields a.) IPv4 src/dst address, * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as * L4 Header checksum for skb->csum calculation. */ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr) { __be32 diff[] = { ~from[0], ~from[1], ~from[2], ~from[3], to[0], to[1], to[2], to[3], }; if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); } EXPORT_SYMBOL(inet_proto_csum_replace16); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace_by_diff(sum, diff); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) skb->csum = ~csum_sub(diff, skb->csum); } else if (pseudohdr) { *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); } } EXPORT_SYMBOL(inet_proto_csum_replace_by_diff); |
1 1 1 3 1 1 3 3 1 1 3 2 2 2 3 3 2 8 8 4 8 4 3 3 3 1 3 3 3 2 3 4 1 5 5 5 5 3 2 5 3 11 11 11 11 11 1 11 6 11 10 11 10 11 1 9 5 10 3 3 3 3 2 5 5 3 3 3 8 8 8 5 8 3 7 4 4 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 11 3 2 1 1 1 1 1 1 1 1 1 1 2 3 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 | // SPDX-License-Identifier: GPL-2.0 /* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the NetFilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Version 1, is capable of handling both version 0 and 1 messages. * Version 0 is the plain old format. * Note Version 0 receivers will just drop Ver 1 messages. * Version 1 is capable of handle IPv6, Persistence data, * time-outs, and firewall marks. * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 * * Definitions Message: is a complete datagram * Sync_conn: is a part of a Message * Param Data is an option to a Sync_conn. * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * ip_vs_sync: sync connection info from master load balancer to backups * through multicast * * Changes: * Alexandre Cassen : Added master & backup support at a time. * Alexandre Cassen : Added SyncID support for incoming sync * messages filtering. * Justin Ossevoort : Fix endian problem on sync message size. * Hans Schillstrom : Added Version 1: i.e. IPv6, * Persistence support, fwmark and time-out. */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/inetdevice.h> #include <linux/net.h> #include <linux/completion.h> #include <linux/delay.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/igmp.h> /* for ip_mc_join_group */ #include <linux/udp.h> #include <linux/err.h> #include <linux/kthread.h> #include <linux/wait.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/unaligned.h> /* Used for ntoh_seq and hton_seq */ #include <net/ip.h> #include <net/sock.h> #include <net/ip_vs.h> #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ #define IP_VS_SYNC_PORT 8848 /* multicast port */ #define SYNC_PROTO_VER 1 /* Protocol version in header */ static struct lock_class_key __ipvs_sync_key; /* * IPVS sync connection entry * Version 0, i.e. original version. */ struct ip_vs_sync_conn_v0 { __u8 reserved; /* Protocol, addresses and port numbers */ __u8 protocol; /* Which protocol (TCP/UDP) */ __be16 cport; __be16 vport; __be16 dport; __be32 caddr; /* client address */ __be32 vaddr; /* virtual address */ __be32 daddr; /* destination address */ /* Flags and state transition */ __be16 flags; /* status flags */ __be16 state; /* state info */ /* The sequence options start here */ }; struct ip_vs_sync_conn_options { struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ }; /* Sync Connection format (sync_conn) 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Type | Protocol | Ver. | Size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Flags | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | State | cport | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | vport | dport | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | fwmark | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | timeout (in sec.) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ... | | IP-Addresses (v4 or v6) | | ... | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Optional Parameters. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Param. Type | Param. Length | Param. data | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | ... | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | Param Type | Param. Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Param data | | Last Param data should be padded for 32 bit alignment | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ /* * Type 0, IPv4 sync connection format */ struct ip_vs_sync_v4 { __u8 type; __u8 protocol; /* Which protocol (TCP/UDP) */ __be16 ver_size; /* Version msb 4 bits */ /* Flags and state transition */ __be32 flags; /* status flags */ __be16 state; /* state info */ /* Protocol, addresses and port numbers */ __be16 cport; __be16 vport; __be16 dport; __be32 fwmark; /* Firewall mark from skb */ __be32 timeout; /* cp timeout */ __be32 caddr; /* client address */ __be32 vaddr; /* virtual address */ __be32 daddr; /* destination address */ /* The sequence options start here */ /* PE data padded to 32bit alignment after seq. options */ }; /* * Type 2 messages IPv6 */ struct ip_vs_sync_v6 { __u8 type; __u8 protocol; /* Which protocol (TCP/UDP) */ __be16 ver_size; /* Version msb 4 bits */ /* Flags and state transition */ __be32 flags; /* status flags */ __be16 state; /* state info */ /* Protocol, addresses and port numbers */ __be16 cport; __be16 vport; __be16 dport; __be32 fwmark; /* Firewall mark from skb */ __be32 timeout; /* cp timeout */ struct in6_addr caddr; /* client address */ struct in6_addr vaddr; /* virtual address */ struct in6_addr daddr; /* destination address */ /* The sequence options start here */ /* PE data padded to 32bit alignment after seq. options */ }; union ip_vs_sync_conn { struct ip_vs_sync_v4 v4; struct ip_vs_sync_v6 v6; }; /* Bits in Type field in above */ #define STYPE_INET6 0 #define STYPE_F_INET6 (1 << STYPE_INET6) #define SVER_SHIFT 12 /* Shift to get version */ #define SVER_MASK 0x0fff /* Mask to strip version */ #define IPVS_OPT_SEQ_DATA 1 #define IPVS_OPT_PE_DATA 2 #define IPVS_OPT_PE_NAME 3 #define IPVS_OPT_PARAM 7 #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { struct task_struct *task; struct netns_ipvs *ipvs; struct socket *sock; char *buf; int id; }; /* Version 0 definition of packet sizes */ #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) #define FULL_CONN_SIZE \ (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) /* The master mulitcasts messages (Datagrams) to the backup load balancers in the following format. Version 1: Note, first byte should be Zero, so ver 0 receivers will drop the packet. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 0 | SyncID | Size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Count Conns | Version | Reserved, set to Zero | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | IPVS Sync Connection (1) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | . | ~ . ~ | . | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | IPVS Sync Connection (n) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Version 0 Header 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Count Conns | SyncID | Size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | IPVS Sync Connection (1) | */ /* Version 0 header */ struct ip_vs_sync_mesg_v0 { __u8 nr_conns; __u8 syncid; __be16 size; /* ip_vs_sync_conn entries start here */ }; /* Version 1 header */ struct ip_vs_sync_mesg { __u8 reserved; /* must be zero */ __u8 syncid; __be16 size; __u8 nr_conns; __s8 version; /* SYNC_PROTO_VER */ __u16 spare; /* ip_vs_sync_conn entries start here */ }; union ipvs_sockaddr { struct sockaddr_in in; struct sockaddr_in6 in6; }; struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; /* pointers for the message data */ struct ip_vs_sync_mesg *mesg; unsigned char *head; unsigned char *end; }; /* * Copy of struct ip_vs_seq * From unaligned network order to aligned host order */ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) { memset(ho, 0, sizeof(*ho)); ho->init_seq = get_unaligned_be32(&no->init_seq); ho->delta = get_unaligned_be32(&no->delta); ho->previous_delta = get_unaligned_be32(&no->previous_delta); } /* * Copy of struct ip_vs_seq * From Aligned host order to unaligned network order */ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) { put_unaligned_be32(ho->init_seq, &no->init_seq); put_unaligned_be32(ho->delta, &no->delta); put_unaligned_be32(ho->previous_delta, &no->previous_delta); } static inline struct ip_vs_sync_buff * sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) { struct ip_vs_sync_buff *sb; spin_lock_bh(&ipvs->sync_lock); if (list_empty(&ms->sync_queue)) { sb = NULL; __set_current_state(TASK_INTERRUPTIBLE); } else { sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff, list); list_del(&sb->list); ms->sync_queue_len--; if (!ms->sync_queue_len) ms->sync_queue_delay = 0; } spin_unlock_bh(&ipvs->sync_lock); return sb; } /* * Create a new sync buffer for Version 1 proto. */ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg), ipvs->mcfg.sync_maxlen); sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ sb->mesg->version = SYNC_PROTO_VER; sb->mesg->syncid = ipvs->mcfg.syncid; sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); sb->mesg->nr_conns = 0; sb->mesg->spare = 0; sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); sb->end = (unsigned char *)sb->mesg + len; sb->firstuse = jiffies; return sb; } static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) { kfree(sb->mesg); kfree(sb); } static inline void sb_queue_tail(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) { struct ip_vs_sync_buff *sb = ms->sync_buff; spin_lock(&ipvs->sync_lock); if (ipvs->sync_state & IP_VS_STATE_MASTER && ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) { if (!ms->sync_queue_len) schedule_delayed_work(&ms->master_wakeup_work, max(IPVS_SYNC_SEND_DELAY, 1)); ms->sync_queue_len++; list_add_tail(&sb->list, &ms->sync_queue); if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) { int id = (int)(ms - ipvs->ms); wake_up_process(ipvs->master_tinfo[id].task); } } else ip_vs_sync_buff_release(sb); spin_unlock(&ipvs->sync_lock); } /* * Get the current sync buffer if it has been created for more * than the specified time or the specified time is zero. */ static inline struct ip_vs_sync_buff * get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms, unsigned long time) { struct ip_vs_sync_buff *sb; spin_lock_bh(&ipvs->sync_buff_lock); sb = ms->sync_buff; if (sb && time_after_eq(jiffies - sb->firstuse, time)) { ms->sync_buff = NULL; __set_current_state(TASK_RUNNING); } else sb = NULL; spin_unlock_bh(&ipvs->sync_buff_lock); return sb; } static inline int select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) { return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask; } /* * Create a new sync buffer for Version 0 proto. */ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; struct ip_vs_sync_mesg_v0 *mesg; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0), ipvs->mcfg.sync_maxlen); sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg->nr_conns = 0; mesg->syncid = ipvs->mcfg.syncid; mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); sb->end = (unsigned char *)mesg + len; sb->firstuse = jiffies; return sb; } /* Check if connection is controlled by persistence */ static inline bool in_persistence(struct ip_vs_conn *cp) { for (cp = cp->control; cp; cp = cp->control) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) return true; } return false; } /* Check if conn should be synced. * pkts: conn packets, use sysctl_sync_threshold to avoid packet check * - (1) sync_refresh_period: reduce sync rate. Additionally, retry * sync_retries times with period of sync_refresh_period/8 * - (2) if both sync_refresh_period and sync_period are 0 send sync only * for state changes or only once when pkts matches sync_threshold * - (3) templates: rate can be reduced only with sync_refresh_period or * with (2) */ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { unsigned long orig = READ_ONCE(cp->sync_endtime); unsigned long now = jiffies; unsigned long n = (now + cp->timeout) & ~3UL; unsigned int sync_refresh_period; int sync_period; int force; /* Check if we sync in current state */ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) force = 0; else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) return 0; else if (likely(cp->protocol == IPPROTO_TCP)) { if (!((1 << cp->state) & ((1 << IP_VS_TCP_S_ESTABLISHED) | (1 << IP_VS_TCP_S_FIN_WAIT) | (1 << IP_VS_TCP_S_CLOSE) | (1 << IP_VS_TCP_S_CLOSE_WAIT) | (1 << IP_VS_TCP_S_TIME_WAIT)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_TCP_S_ESTABLISHED) goto set; } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { if (!((1 << cp->state) & ((1 << IP_VS_SCTP_S_ESTABLISHED) | (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | (1 << IP_VS_SCTP_S_CLOSED)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) goto set; } else { /* UDP or another protocol with single state */ force = 0; } sync_refresh_period = sysctl_sync_refresh_period(ipvs); if (sync_refresh_period > 0) { long diff = n - orig; long min_diff = max(cp->timeout >> 1, 10UL * HZ); /* Avoid sync if difference is below sync_refresh_period * and below the half timeout. */ if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) { int retries = orig & 3; if (retries >= sysctl_sync_retries(ipvs)) return 0; if (time_before(now, orig - cp->timeout + (sync_refresh_period >> 3))) return 0; n |= retries + 1; } } sync_period = sysctl_sync_period(ipvs); if (sync_period > 0) { if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && pkts % sync_period != sysctl_sync_threshold(ipvs)) return 0; } else if (!sync_refresh_period && pkts != sysctl_sync_threshold(ipvs)) return 0; set: cp->old_state = cp->state; n = cmpxchg(&cp->sync_endtime, orig, n); return n == orig || force; } /* * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_buff *buff; struct ipvs_master_sync_state *ms; int id; unsigned int len; if (unlikely(cp->af != AF_INET)) return; /* Do not sync ONE PACKET */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return; if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) return; spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { spin_unlock_bh(&ipvs->sync_buff_lock); return; } id = select_master_thread_id(ipvs, cp); ms = &ipvs->ms[id]; buff = ms->sync_buff; len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : SIMPLE_CONN_SIZE; if (buff) { m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; /* Send buffer if it is for v1 */ if (buff->head + len > buff->end || !m->nr_conns) { sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; buff = NULL; } } if (!buff) { buff = ip_vs_sync_buff_create_v0(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } ms->sync_buff = buff; } m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; s = (struct ip_vs_sync_conn_v0 *) buff->head; /* copy members */ s->reserved = 0; s->protocol = cp->protocol; s->cport = cp->cport; s->vport = cp->vport; s->dport = cp->dport; s->caddr = cp->caddr.ip; s->vaddr = cp->vaddr.ip; s->daddr = cp->daddr.ip; s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); s->state = htons(cp->state); if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { struct ip_vs_sync_conn_options *opt = (struct ip_vs_sync_conn_options *)&s[1]; memcpy(opt, &cp->sync_conn_opt, sizeof(*opt)); } m->nr_conns++; m->size = htons(ntohs(m->size) + len); buff->head += len; spin_unlock_bh(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ cp = cp->control; if (cp) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) pkts = atomic_inc_return(&cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); ip_vs_sync_conn(ipvs, cp, pkts); } } /* * Add an ip_vs_conn information into the current sync_buff. * Called by ip_vs_in. * Sending Version 1 messages */ void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; struct ip_vs_sync_buff *buff; struct ipvs_master_sync_state *ms; int id; __u8 *p; unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ if (sysctl_sync_ver(ipvs) == 0) { ip_vs_sync_conn_v0(ipvs, cp, pkts); return; } /* Do not sync ONE PACKET */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) goto control; sloop: if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) goto control; /* Sanity checks */ pe_name_len = 0; if (cp->pe_data_len) { if (!cp->pe_data || !cp->dest) { IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); return; } pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); } spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { spin_unlock_bh(&ipvs->sync_buff_lock); return; } id = select_master_thread_id(ipvs, cp); ms = &ipvs->ms[id]; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) len = sizeof(struct ip_vs_sync_v6); else #endif len = sizeof(struct ip_vs_sync_v4); if (cp->flags & IP_VS_CONN_F_SEQ_MASK) len += sizeof(struct ip_vs_sync_conn_options) + 2; if (cp->pe_data_len) len += cp->pe_data_len + 2; /* + Param hdr field */ if (pe_name_len) len += pe_name_len + 2; /* check if there is a space for this one */ pad = 0; buff = ms->sync_buff; if (buff) { m = buff->mesg; pad = (4 - (size_t) buff->head) & 3; /* Send buffer if it is for v0 */ if (buff->head + len + pad > buff->end || m->reserved) { sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; buff = NULL; pad = 0; } } if (!buff) { buff = ip_vs_sync_buff_create(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } ms->sync_buff = buff; m = buff->mesg; } p = buff->head; buff->head += pad + len; m->size = htons(ntohs(m->size) + pad + len); /* Add ev. padding from prev. sync_conn */ while (pad--) *(p++) = 0; s = (union ip_vs_sync_conn *)p; /* Set message type & copy members */ s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); s->v4.state = htons(cp->state); s->v4.protocol = cp->protocol; s->v4.cport = cp->cport; s->v4.vport = cp->vport; s->v4.dport = cp->dport; s->v4.fwmark = htonl(cp->fwmark); s->v4.timeout = htonl(cp->timeout / HZ); m->nr_conns++; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) { p += sizeof(struct ip_vs_sync_v6); s->v6.caddr = cp->caddr.in6; s->v6.vaddr = cp->vaddr.in6; s->v6.daddr = cp->daddr.in6; } else #endif { p += sizeof(struct ip_vs_sync_v4); /* options ptr */ s->v4.caddr = cp->caddr.ip; s->v4.vaddr = cp->vaddr.ip; s->v4.daddr = cp->daddr.ip; } if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { *(p++) = IPVS_OPT_SEQ_DATA; *(p++) = sizeof(struct ip_vs_sync_conn_options); hton_seq((struct ip_vs_seq *)p, &cp->in_seq); p += sizeof(struct ip_vs_seq); hton_seq((struct ip_vs_seq *)p, &cp->out_seq); p += sizeof(struct ip_vs_seq); } /* Handle pe data */ if (cp->pe_data_len && cp->pe_data) { *(p++) = IPVS_OPT_PE_DATA; *(p++) = cp->pe_data_len; memcpy(p, cp->pe_data, cp->pe_data_len); p += cp->pe_data_len; if (pe_name_len) { /* Add PE_NAME */ *(p++) = IPVS_OPT_PE_NAME; *(p++) = pe_name_len; memcpy(p, cp->pe->name, pe_name_len); p += pe_name_len; } } spin_unlock_bh(&ipvs->sync_buff_lock); control: /* synchronize its controller if it has */ cp = cp->control; if (!cp) return; if (cp->flags & IP_VS_CONN_F_TEMPLATE) pkts = atomic_inc_return(&cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); goto sloop; } /* * fill_param used by version 1 */ static inline int ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, sc->v4.vport, p); /* Handle pe data */ if (pe_data_len) { if (pe_name_len) { char buff[IP_VS_PENAME_MAXLEN+1]; memcpy(buff, pe_name, pe_name_len); buff[pe_name_len]=0; p->pe = __ip_vs_pe_getbyname(buff); if (!p->pe) { IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff); return 1; } } else { IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); return 1; } p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); if (!p->pe_data) { module_put(p->pe->module); return -ENOMEM; } p->pe_data_len = pe_data_len; } return 0; } /* * Connection Add / Update. * Common for version 0 and 1 reception of backup sync_conns. * Param: ... * timeout is in sec. */ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param, unsigned int flags, unsigned int state, unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, unsigned long timeout, __u32 fwmark, struct ip_vs_sync_conn_options *opt) { struct ip_vs_dest *dest; struct ip_vs_conn *cp; if (!(flags & IP_VS_CONN_F_TEMPLATE)) { cp = ip_vs_conn_in_get(param); if (cp && ((cp->dport != dport) || !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) { if (!(flags & IP_VS_CONN_F_INACTIVE)) { ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); cp = NULL; } else { /* This is the expiration message for the * connection that was already replaced, so we * just ignore it. */ __ip_vs_conn_put(cp); kfree(param->pe_data); return; } } } else { cp = ip_vs_ct_in_get(param); } if (cp) { /* Free pe_data */ kfree(param->pe_data); dest = cp->dest; spin_lock_bh(&cp->lock); if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { if (flags & IP_VS_CONN_F_INACTIVE) { atomic_dec(&dest->activeconns); atomic_inc(&dest->inactconns); } else { atomic_inc(&dest->activeconns); atomic_dec(&dest->inactconns); } } flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; cp->flags = flags; spin_unlock_bh(&cp->lock); if (!dest) ip_vs_try_bind_dest(cp); } else { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ rcu_read_lock(); /* This function is only invoked by the synchronization * code. We do not currently support heterogeneous pools * with synchronization, so we can make the assumption that * the svc_af is the same as the dest_af */ dest = ip_vs_find_dest(ipvs, type, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest, fwmark); rcu_read_unlock(); if (!cp) { kfree(param->pe_data); IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } if (!(flags & IP_VS_CONN_F_TEMPLATE)) kfree(param->pe_data); } if (opt) { cp->in_seq = opt->in_seq; cp->out_seq = opt->out_seq; } atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; /* * For Ver 0 messages style * - Not possible to recover the right timeout for templates * - can not find the right fwmark * virtual service. If needed, we can do it for * non-fwmark persistent services. * Ver 1 messages style. * - No problem. */ if (timeout) { if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) timeout = MAX_SCHEDULE_TIMEOUT / HZ; cp->timeout = timeout*HZ; } else { struct ip_vs_proto_data *pd; pd = ip_vs_proto_data_get(ipvs, protocol); if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) cp->timeout = pd->timeout_table[state]; else cp->timeout = (3*60*HZ); } ip_vs_conn_put(cp); } /* * Process received multicast message for Version 0 */ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer, const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_conn_options *opt; struct ip_vs_protocol *pp; struct ip_vs_conn_param param; char *p; int i; p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); for (i=0; i<m->nr_conns; i++) { unsigned int flags, state; if (p + SIMPLE_CONN_SIZE > buffer+buflen) { IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); return; } s = (struct ip_vs_sync_conn_v0 *) p; flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; flags &= ~IP_VS_CONN_F_HASHED; if (flags & IP_VS_CONN_F_SEQ_MASK) { opt = (struct ip_vs_sync_conn_options *)&s[1]; p += FULL_CONN_SIZE; if (p > buffer+buflen) { IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); return; } } else { opt = NULL; p += SIMPLE_CONN_SIZE; } state = ntohs(s->state); if (!(flags & IP_VS_CONN_F_TEMPLATE)) { pp = ip_vs_proto_get(s->protocol); if (!pp) { IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", s->protocol); continue; } if (state >= pp->num_states) { IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", pp->name, state); continue; } } else { if (state >= IP_VS_CTPL_S_LAST) IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n", state); } ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, s->vport, ¶m); /* Send timeout as Zero */ ip_vs_proc_conn(ipvs, ¶m, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, 0, 0, opt); } } /* * Handle options */ static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, __u32 *opt_flags, struct ip_vs_sync_conn_options *opt) { struct ip_vs_sync_conn_options *topt; topt = (struct ip_vs_sync_conn_options *)p; if (plen != sizeof(struct ip_vs_sync_conn_options)) { IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); return -EINVAL; } if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { IP_VS_DBG(2, "BACKUP, conn options found twice\n"); return -EINVAL; } ntoh_seq(&topt->in_seq, &opt->in_seq); ntoh_seq(&topt->out_seq, &opt->out_seq); *opt_flags |= IPVS_OPT_F_SEQ_DATA; return 0; } static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, __u8 **data, unsigned int maxlen, __u32 *opt_flags, __u32 flag) { if (plen > maxlen) { IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); return -EINVAL; } if (*opt_flags & flag) { IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); return -EINVAL; } *data_len = plen; *data = p; *opt_flags |= flag; return 0; } /* * Process a Version 1 sync. connection */ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; struct ip_vs_protocol *pp; struct ip_vs_conn_param param; __u32 flags; unsigned int af, state, pe_data_len=0, pe_name_len=0; __u8 *pe_data=NULL, *pe_name=NULL; __u32 opt_flags=0; int retc=0; s = (union ip_vs_sync_conn *) p; if (s->v6.type & STYPE_F_INET6) { #ifdef CONFIG_IP_VS_IPV6 af = AF_INET6; p += sizeof(struct ip_vs_sync_v6); #else IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); retc = 10; goto out; #endif } else if (!s->v4.type) { af = AF_INET; p += sizeof(struct ip_vs_sync_v4); } else { return -10; } if (p > msg_end) return -20; /* Process optional params check Type & Len. */ while (p < msg_end) { int ptype; int plen; if (p+2 > msg_end) return -30; ptype = *(p++); plen = *(p++); if (!plen || ((p + plen) > msg_end)) return -40; /* Handle seq option p = param data */ switch (ptype & ~IPVS_OPT_F_PARAM) { case IPVS_OPT_SEQ_DATA: if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) return -50; break; case IPVS_OPT_PE_DATA: if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, IP_VS_PEDATA_MAXLEN, &opt_flags, IPVS_OPT_F_PE_DATA)) return -60; break; case IPVS_OPT_PE_NAME: if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, IP_VS_PENAME_MAXLEN, &opt_flags, IPVS_OPT_F_PE_NAME)) return -70; break; default: /* Param data mandatory ? */ if (!(ptype & IPVS_OPT_F_PARAM)) { IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", ptype & ~IPVS_OPT_F_PARAM); retc = 20; goto out; } } p += plen; /* Next option */ } /* Get flags and Mask off unsupported */ flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; flags |= IP_VS_CONN_F_SYNC; state = ntohs(s->v4.state); if (!(flags & IP_VS_CONN_F_TEMPLATE)) { pp = ip_vs_proto_get(s->v4.protocol); if (!pp) { IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", s->v4.protocol); retc = 30; goto out; } if (state >= pp->num_states) { IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", pp->name, state); retc = 40; goto out; } } else { if (state >= IP_VS_CTPL_S_LAST) IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n", state); } if (ip_vs_conn_fill_param_sync(ipvs, af, s, ¶m, pe_data, pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #ifdef CONFIG_IP_VS_IPV6 else ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #endif ip_vs_pe_put(param.pe); return 0; /* Error exit */ out: IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); return retc; } /* * Process received multicast message and create the corresponding * ip_vs_conn entries. * Handles Version 0 & 1 */ static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer, const size_t buflen) { struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; int i, nr_conns; if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { IP_VS_DBG(2, "BACKUP, message header too short\n"); return; } if (buflen != ntohs(m2->size)) { IP_VS_DBG(2, "BACKUP, bogus message size\n"); return; } /* SyncID sanity check */ if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) { IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); return; } /* Handle version 1 message */ if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) && (m2->spare == 0)) { msg_end = buffer + sizeof(struct ip_vs_sync_mesg); nr_conns = m2->nr_conns; for (i=0; i<nr_conns; i++) { union ip_vs_sync_conn *s; unsigned int size; int retc; p = msg_end; if (p + sizeof(s->v4) > buffer+buflen) { IP_VS_ERR_RL("BACKUP, Dropping buffer, too small\n"); return; } s = (union ip_vs_sync_conn *)p; size = ntohs(s->v4.ver_size) & SVER_MASK; msg_end = p + size; /* Basic sanity checks */ if (msg_end > buffer+buflen) { IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); return; } if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", ntohs(s->v4.ver_size) >> SVER_SHIFT); return; } /* Process a single sync_conn */ retc = ip_vs_proc_sync_conn(ipvs, p, msg_end); if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); return; } /* Make sure we have 32 bit alignment */ msg_end = p + ((size + 3) & ~3); } } else { /* Old type of message */ ip_vs_process_message_v0(ipvs, buffer, buflen); return; } } /* * Setup sndbuf (mode=1) or rcvbuf (mode=0) */ static void set_sock_size(struct sock *sk, int mode, int val) { /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */ /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */ lock_sock(sk); if (mode) { val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, READ_ONCE(sysctl_wmem_max)); sk->sk_sndbuf = val * 2; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } else { val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, READ_ONCE(sysctl_rmem_max)); sk->sk_rcvbuf = val * 2; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); } /* * Setup loopback of outgoing multicasts on a sending socket */ static void set_mcast_loop(struct sock *sk, u_char loop) { /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ inet_assign_bit(MC_LOOP, sk, loop); #ifdef CONFIG_IP_VS_IPV6 if (READ_ONCE(sk->sk_family) == AF_INET6) { /* IPV6_MULTICAST_LOOP */ inet6_assign_bit(MC6_LOOP, sk, loop); } #endif } /* * Specify TTL for outgoing multicasts on a sending socket */ static void set_mcast_ttl(struct sock *sk, u_char ttl) { struct inet_sock *inet = inet_sk(sk); /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ lock_sock(sk); WRITE_ONCE(inet->mc_ttl, ttl); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_HOPS */ WRITE_ONCE(np->mcast_hops, ttl); } #endif release_sock(sk); } /* Control fragmentation of messages */ static void set_mcast_pmtudisc(struct sock *sk, int val) { struct inet_sock *inet = inet_sk(sk); /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */ lock_sock(sk); WRITE_ONCE(inet->pmtudisc, val); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MTU_DISCOVER */ WRITE_ONCE(np->pmtudisc, val); } #endif release_sock(sk); } /* * Specifiy default interface for outgoing multicasts */ static int set_mcast_if(struct sock *sk, struct net_device *dev) { struct inet_sock *inet = inet_sk(sk); if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; lock_sock(sk); inet->mc_index = dev->ifindex; /* inet->mc_addr = 0; */ #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_IF */ WRITE_ONCE(np->mcast_oif, dev->ifindex); } #endif release_sock(sk); return 0; } /* * Join a multicast group. * the group is specified by a class D multicast address 224.0.0.0/8 * in the in_addr structure passed in as a parameter. */ static int join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev) { struct ip_mreqn mreq; int ret; memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; mreq.imr_ifindex = dev->ifindex; lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); return ret; } #ifdef CONFIG_IP_VS_IPV6 static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, struct net_device *dev) { int ret; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; lock_sock(sk); ret = ipv6_sock_mc_join(sk, dev->ifindex, addr); release_sock(sk); return ret; } #endif static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) { __be32 addr; struct sockaddr_in sin; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); if (!addr) pr_err("You probably need to specify IP address on " "multicast interface.\n"); IP_VS_DBG(7, "binding socket with (%s) %pI4\n", dev->name, &addr); /* Now bind the socket with the address of multicast interface */ sin.sin_family = AF_INET; sin.sin_addr.s_addr = addr; sin.sin_port = 0; return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin)); } static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, struct ipvs_sync_daemon_cfg *c, int id) { if (AF_INET6 == c->mcast_af) { sa->in6 = (struct sockaddr_in6) { .sin6_family = AF_INET6, .sin6_port = htons(c->mcast_port + id), }; sa->in6.sin6_addr = c->mcast_group.in6; *salen = sizeof(sa->in6); } else { sa->in = (struct sockaddr_in) { .sin_family = AF_INET, .sin_port = htons(c->mcast_port + id), }; sa->in.sin_addr = c->mcast_group.in; *salen = sizeof(sa->in); } } /* * Set up sending multicast socket over UDP */ static int make_send_sock(struct netns_ipvs *ipvs, int id, struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); goto error; } *sock_ret = sock; result = set_mcast_if(sock->sk, dev); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; } set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl); /* Allow fragmentation if MTU changes */ set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT); result = sysctl_sync_sock_size(ipvs); if (result > 0) set_sock_size(sock->sk, 1, result); if (AF_INET == ipvs->mcfg.mcast_af) result = bind_mcastif_addr(sock, dev); else result = 0; if (result < 0) { pr_err("Error binding address of the mcast interface\n"); goto error; } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; } return 0; error: return result; } /* * Set up receiving multicast socket over UDP */ static int make_receive_sock(struct netns_ipvs *ipvs, int id, struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); goto error; } *sock_ret = sock; /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); if (result > 0) set_sock_size(sock->sk, 0, result); get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); sock->sk->sk_bound_dev_if = dev->ifindex; result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; } /* join the multicast group */ #ifdef CONFIG_IP_VS_IPV6 if (ipvs->bcfg.mcast_af == AF_INET6) result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, dev); else #endif result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, dev); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; } return 0; error: return result; } static int ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) { struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL}; struct kvec iov; int len; iov.iov_base = (void *)buffer; iov.iov_len = length; len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length)); return len; } static int ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) { int msize; int ret; msize = ntohs(msg->size); ret = ip_vs_send_async(sock, (char *)msg, msize); if (ret >= 0 || ret == -EAGAIN) return ret; pr_err("ip_vs_send_async error %d\n", ret); return 0; } static int ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) { struct msghdr msg = {NULL,}; struct kvec iov = {buffer, buflen}; int len; /* Receive a packet */ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, buflen); len = sock_recvmsg(sock, &msg, MSG_DONTWAIT); if (len < 0) return len; return len; } /* Wakeup the master thread for sending */ static void master_wakeup_work_handler(struct work_struct *work) { struct ipvs_master_sync_state *ms = container_of(work, struct ipvs_master_sync_state, master_wakeup_work.work); struct netns_ipvs *ipvs = ms->ipvs; spin_lock_bh(&ipvs->sync_lock); if (ms->sync_queue_len && ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { int id = (int)(ms - ipvs->ms); ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; wake_up_process(ipvs->master_tinfo[id].task); } spin_unlock_bh(&ipvs->sync_lock); } /* Get next buffer to send */ static inline struct ip_vs_sync_buff * next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) { struct ip_vs_sync_buff *sb; sb = sb_dequeue(ipvs, ms); if (sb) return sb; /* Do not delay entries in buffer for more than 2 seconds */ return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME); } static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = tinfo->ipvs; struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; struct sock *sk = tinfo->sock->sk; struct ip_vs_sync_buff *sb; pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d, id = %d\n", ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id); for (;;) { sb = next_sync_buff(ipvs, ms); if (unlikely(kthread_should_stop())) break; if (!sb) { schedule_timeout(IPVS_SYNC_CHECK_PERIOD); continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { /* (Ab)use interruptible sleep to avoid increasing * the load avg. */ __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || kthread_should_stop()); if (unlikely(kthread_should_stop())) goto done; } ip_vs_sync_buff_release(sb); } done: __set_current_state(TASK_RUNNING); if (sb) ip_vs_sync_buff_release(sb); /* clean up the sync_buff queue */ while ((sb = sb_dequeue(ipvs, ms))) ip_vs_sync_buff_release(sb); __set_current_state(TASK_RUNNING); /* clean up the current sync_buff */ sb = get_curr_sync_buff(ipvs, ms, 0); if (sb) ip_vs_sync_buff_release(sb); return 0; } static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = tinfo->ipvs; struct sock *sk = tinfo->sock->sk; struct udp_sock *up = udp_sk(sk); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d, id = %d\n", ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(sk), !skb_queue_empty_lockless(&sk->sk_receive_queue) || !skb_queue_empty_lockless(&up->reader_queue) || kthread_should_stop()); /* do we have data now? */ while (!skb_queue_empty_lockless(&sk->sk_receive_queue) || !skb_queue_empty_lockless(&up->reader_queue)) { len = ip_vs_receive(tinfo->sock, tinfo->buf, ipvs->bcfg.sync_maxlen); if (len <= 0) { if (len != -EAGAIN) pr_err("receiving message error\n"); break; } ip_vs_process_message(ipvs, tinfo->buf, len); } } return 0; } int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { struct ip_vs_sync_thread_data *ti = NULL, *tinfo; struct task_struct *task; struct net_device *dev; char *name; int (*threadfn)(void *data); int id = 0, count, hlen; int result = -ENOMEM; u16 mtu, min_mtu; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); /* increase the module use count */ if (!ip_vs_use_count_inc()) return -ENOPROTOOPT; /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); if (mutex_trylock(&ipvs->sync_mutex)) break; rtnl_unlock(); mutex_lock(&ipvs->sync_mutex); if (rtnl_trylock()) break; mutex_unlock(&ipvs->sync_mutex); } if (!ipvs->sync_state) { count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); ipvs->threads_mask = count - 1; } else count = ipvs->threads_mask + 1; if (c->mcast_af == AF_UNSPEC) { c->mcast_af = AF_INET; c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP); } if (!c->mcast_port) c->mcast_port = IP_VS_SYNC_PORT; if (!c->mcast_ttl) c->mcast_ttl = 1; dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); if (!dev) { pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); result = -ENODEV; goto out_early; } hlen = (AF_INET6 == c->mcast_af) ? sizeof(struct ipv6hdr) + sizeof(struct udphdr) : sizeof(struct iphdr) + sizeof(struct udphdr); mtu = (state == IP_VS_STATE_BACKUP) ? clamp(dev->mtu, 1500U, 65535U) : 1500U; min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1; if (c->sync_maxlen) c->sync_maxlen = clamp_t(unsigned int, c->sync_maxlen, min_mtu, 65535 - hlen); else c->sync_maxlen = mtu - hlen; if (state == IP_VS_STATE_MASTER) { result = -EEXIST; if (ipvs->ms) goto out_early; ipvs->mcfg = *c; name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { result = -EEXIST; if (ipvs->backup_tinfo) goto out_early; ipvs->bcfg = *c; name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; } else { result = -EINVAL; goto out_early; } if (state == IP_VS_STATE_MASTER) { struct ipvs_master_sync_state *ms; result = -ENOMEM; ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL); if (!ipvs->ms) goto out; ms = ipvs->ms; for (id = 0; id < count; id++, ms++) { INIT_LIST_HEAD(&ms->sync_queue); ms->sync_queue_len = 0; ms->sync_queue_delay = 0; INIT_DELAYED_WORK(&ms->master_wakeup_work, master_wakeup_work_handler); ms->ipvs = ipvs; } } result = -ENOMEM; ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL); if (!ti) goto out; for (id = 0; id < count; id++) { tinfo = &ti[id]; tinfo->ipvs = ipvs; if (state == IP_VS_STATE_BACKUP) { result = -ENOMEM; tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) goto out; } tinfo->id = id; if (state == IP_VS_STATE_MASTER) result = make_send_sock(ipvs, id, dev, &tinfo->sock); else result = make_receive_sock(ipvs, id, dev, &tinfo->sock); if (result < 0) goto out; task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); if (IS_ERR(task)) { result = PTR_ERR(task); goto out; } tinfo->task = task; } /* mark as active */ if (state == IP_VS_STATE_MASTER) ipvs->master_tinfo = ti; else ipvs->backup_tinfo = ti; spin_lock_bh(&ipvs->sync_buff_lock); ipvs->sync_state |= state; spin_unlock_bh(&ipvs->sync_buff_lock); mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); return 0; out: /* We do not need RTNL lock anymore, release it here so that * sock_release below can use rtnl_lock to leave the mcast group. */ rtnl_unlock(); id = min(id, count - 1); if (ti) { for (tinfo = ti + id; tinfo >= ti; tinfo--) { if (tinfo->task) kthread_stop(tinfo->task); } } if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { kfree(ipvs->ms); ipvs->ms = NULL; } mutex_unlock(&ipvs->sync_mutex); /* No more mutexes, release socks */ if (ti) { for (tinfo = ti + id; tinfo >= ti; tinfo--) { if (tinfo->sock) sock_release(tinfo->sock); kfree(tinfo->buf); } kfree(ti); } /* decrease the module use count */ ip_vs_use_count_dec(); return result; out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); /* decrease the module use count */ ip_vs_use_count_dec(); return result; } int stop_sync_thread(struct netns_ipvs *ipvs, int state) { struct ip_vs_sync_thread_data *ti, *tinfo; int id; int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); mutex_lock(&ipvs->sync_mutex); if (state == IP_VS_STATE_MASTER) { retc = -ESRCH; if (!ipvs->ms) goto err; ti = ipvs->master_tinfo; /* * The lock synchronizes with sb_queue_tail(), so that we don't * add sync buffers to the queue, when we are already in * progress of stopping the master sync daemon. */ spin_lock_bh(&ipvs->sync_buff_lock); spin_lock(&ipvs->sync_lock); ipvs->sync_state &= ~IP_VS_STATE_MASTER; spin_unlock(&ipvs->sync_lock); spin_unlock_bh(&ipvs->sync_buff_lock); retc = 0; for (id = ipvs->threads_mask; id >= 0; id--) { struct ipvs_master_sync_state *ms = &ipvs->ms[id]; int ret; tinfo = &ti[id]; pr_info("stopping master sync thread %d ...\n", task_pid_nr(tinfo->task)); cancel_delayed_work_sync(&ms->master_wakeup_work); ret = kthread_stop(tinfo->task); if (retc >= 0) retc = ret; } kfree(ipvs->ms); ipvs->ms = NULL; ipvs->master_tinfo = NULL; } else if (state == IP_VS_STATE_BACKUP) { retc = -ESRCH; if (!ipvs->backup_tinfo) goto err; ti = ipvs->backup_tinfo; ipvs->sync_state &= ~IP_VS_STATE_BACKUP; retc = 0; for (id = ipvs->threads_mask; id >= 0; id--) { int ret; tinfo = &ti[id]; pr_info("stopping backup sync thread %d ...\n", task_pid_nr(tinfo->task)); ret = kthread_stop(tinfo->task); if (retc >= 0) retc = ret; } ipvs->backup_tinfo = NULL; } else { goto err; } id = ipvs->threads_mask; mutex_unlock(&ipvs->sync_mutex); /* No more mutexes, release socks */ for (tinfo = ti + id; tinfo >= ti; tinfo--) { if (tinfo->sock) sock_release(tinfo->sock); kfree(tinfo->buf); } kfree(ti); /* decrease the module use count */ ip_vs_use_count_dec(); return retc; err: mutex_unlock(&ipvs->sync_mutex); return retc; } /* * Initialize data struct for each netns */ int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs) { __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); return 0; } void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs) { int retc; retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); } |
4 1 7 1 1 1 1 1 1 1 1 6 6 6 1 1 30 30 30 27 27 3 1 1 30 24 1 5 5 5 5 5 5 5 5 5 5 5 1 4 5 2 2 5 5 5 52 53 53 53 5 3 3 3 49 1 1 2 1 47 47 46 49 49 49 3 49 4 4 4 4 3 3 3 3 1 1 1 1 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 6 6 6 6 2 2 2 2 1 1 2 6 6 6 6 5 1 6 1 6 6 6 6 6 6 6 6 1 6 6 2 2 2 6 1 6 5 6 5 6 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 1 5 5 5 5 5 5 5 5 5 5 5 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 1 1 9 2 9 9 8 8 2 9 28 28 1 1 28 1 3 46 1 1 1 14 2 6 14 1 1 1 13 3 12 3 1 11 4 3 10 3 10 3 14 3 3 3 3 3 1 2 2 3 3 3 3 37 37 37 37 37 1 37 2 2 2 2 2 2 2 2 4 3 3 2 4 2 2 2 2 2 8 8 8 8 8 7 8 7 7 4 7 7 7 7 4 2 4 4 2 2 2 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 2 2 2 4 4 4 27 27 27 25 25 25 25 32 31 32 31 2 30 2 2 30 2 1 1 1 1 1 37 37 37 37 37 37 4 36 30 37 45 1 1 45 1 44 1 1 1 1 61 61 61 60 60 61 61 61 61 61 61 61 60 59 61 61 60 61 37 37 37 37 37 35 37 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 | // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 configuration hooks for cfg80211 * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/nl80211.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <linux/rcupdate.h> #include <linux/fips.h> #include <linux/if_ether.h> #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "mesh.h" #include "wme.h" static struct ieee80211_link_data * ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id, bool require_valid) { struct ieee80211_link_data *link; if (link_id < 0) { /* * For keys, if sdata is not an MLD, we might not use * the return value at all (if it's not a pairwise key), * so in that case (require_valid==false) don't error. */ if (require_valid && ieee80211_vif_is_mld(&sdata->vif)) return ERR_PTR(-EINVAL); return &sdata->deflink; } link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return ERR_PTR(-ENOLINK); return link; } static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { bool mu_mimo_groups = false; bool mu_mimo_follow = false; if (params->vht_mumimo_groups) { u64 membership; BUILD_BUG_ON(sizeof(membership) != WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.position, params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, WLAN_USER_POSITION_LEN); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MU_GROUPS); /* don't care about endianness - just check for 0 */ memcpy(&membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); mu_mimo_groups = membership != 0; } if (params->vht_mumimo_follow_addr) { mu_mimo_follow = is_valid_ether_addr(params->vht_mumimo_follow_addr); ether_addr_copy(sdata->u.mntr.mu_follow_addr, params->vht_mumimo_follow_addr); } sdata->vif.bss_conf.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow; } static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *monitor_sdata; /* check flags first */ if (params->flags && ieee80211_sdata_running(sdata)) { u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE; /* * Prohibit MONITOR_FLAG_COOK_FRAMES and * MONITOR_FLAG_ACTIVE to be changed while the * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ if ((params->flags & mask) != (sdata->u.mntr.flags & mask)) return -EBUSY; } /* also validate MU-MIMO change */ monitor_sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!monitor_sdata && (params->vht_mumimo_groups || params->vht_mumimo_follow_addr)) return -EOPNOTSUPP; /* apply all changes now - no failures allowed */ if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { if (ieee80211_sdata_running(sdata)) { ieee80211_adjust_monitor_flags(sdata, -1); sdata->u.mntr.flags = params->flags; ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); } else { /* * Because the interface is down, ieee80211_do_stop * and ieee80211_do_open take care of "everything" * mentioned in the comment above. */ sdata->u.mntr.flags = params->flags; } } return 0; } static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, struct cfg80211_mbssid_config params, struct ieee80211_bss_conf *link_conf) { struct ieee80211_sub_if_data *tx_sdata; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) return -EINVAL; tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev); if (!tx_sdata) return -EINVAL; if (tx_sdata == sdata) { sdata->vif.mbssid_tx_vif = &sdata->vif; } else { sdata->vif.mbssid_tx_vif = &tx_sdata->vif; link_conf->nontransmitted = true; link_conf->bssid_index = params.index; } if (params.ema) link_conf->ema_ap = true; return 0; } static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct wireless_dev *wdev; struct ieee80211_sub_if_data *sdata; int err; err = ieee80211_if_add(local, name, name_assign_type, &wdev, type, params); if (err) return ERR_PTR(err); sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (type == NL80211_IFTYPE_MONITOR) { err = ieee80211_set_mon_options(sdata, params); if (err) { ieee80211_if_remove(sdata); return NULL; } } return wdev; } static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_if_remove(IEEE80211_WDEV_TO_SUB_IF(wdev)); return 0; } static int ieee80211_change_iface(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret; lockdep_assert_wiphy(local->hw.wiphy); ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; if (type == NL80211_IFTYPE_AP_VLAN && params->use_4addr == 0) { RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (params->use_4addr == ifmgd->use_4addr) return 0; /* FIXME: no support for 4-addr MLO yet */ if (ieee80211_vif_is_mld(&sdata->vif)) return -EOPNOTSUPP; sdata->u.mgd.use_4addr = params->use_4addr; if (!ifmgd->associated) return 0; sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); if (sta) drv_sta_set_4addr(local, sdata, &sta->sta, params->use_4addr); if (params->use_4addr) ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { ret = ieee80211_set_mon_options(sdata, params); if (ret) return ret; } return 0; } static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; return ieee80211_do_open(wdev, true); } static void ieee80211_stop_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } static int ieee80211_start_nan(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; ret = ieee80211_do_open(wdev, true); if (ret) return ret; ret = drv_start_nan(sdata->local, sdata, conf); if (ret) ieee80211_sdata_stop(sdata); sdata->u.nan.conf = *conf; return ret; } static void ieee80211_stop_nan(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); drv_stop_nan(sdata->local, sdata); ieee80211_sdata_stop(sdata); } static int ieee80211_nan_change_conf(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_conf new_conf; int ret = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; new_conf = sdata->u.nan.conf; if (changes & CFG80211_NAN_CONF_CHANGED_PREF) new_conf.master_pref = conf->master_pref; if (changes & CFG80211_NAN_CONF_CHANGED_BANDS) new_conf.bands = conf->bands; ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); if (!ret) sdata->u.nan.conf = new_conf; return ret; } static int ieee80211_add_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_func *nan_func) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; spin_lock_bh(&sdata->u.nan.func_lock); ret = idr_alloc(&sdata->u.nan.function_inst_ids, nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, GFP_ATOMIC); spin_unlock_bh(&sdata->u.nan.func_lock); if (ret < 0) return ret; nan_func->instance_id = ret; WARN_ON(nan_func->instance_id == 0); ret = drv_add_nan_func(sdata->local, sdata, nan_func); if (ret) { spin_lock_bh(&sdata->u.nan.func_lock); idr_remove(&sdata->u.nan.function_inst_ids, nan_func->instance_id); spin_unlock_bh(&sdata->u.nan.func_lock); } return ret; } static struct cfg80211_nan_func * ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, u64 cookie) { struct cfg80211_nan_func *func; int id; lockdep_assert_held(&sdata->u.nan.func_lock); idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { if (func->cookie == cookie) return func; } return NULL; } static void ieee80211_del_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_func *func; u8 instance_id = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN || !ieee80211_sdata_running(sdata)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = ieee80211_find_nan_func_by_cookie(sdata, cookie); if (func) instance_id = func->instance_id; spin_unlock_bh(&sdata->u.nan.func_lock); if (instance_id) drv_del_nan_func(sdata->local, sdata, instance_id); } static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->noack_map = noack_map; ieee80211_check_fast_xmit_iface(sdata); return 0; } static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, const u8 *mac_addr, u8 key_idx) { struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; struct sta_info *sta; int ret = -EINVAL; if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) return -EINVAL; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return -EINVAL; if (sta->ptk_idx == key_idx) return 0; key = wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) ret = ieee80211_set_tx_key(key); return ret; } static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; struct ieee80211_key *key; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; if (IS_ERR(link)) return PTR_ERR(link); if (pairwise && params->mode == NL80211_KEY_SET_TX) return ieee80211_set_tx(sdata, mac_addr, key_idx); /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_WEP104: if (link_id >= 0) return -EINVAL; if (WARN_ON_ONCE(fips_enabled)) return -EINVAL; break; default: break; } key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); key->conf.link_id = link_id; if (pairwise) key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; if (params->mode == NL80211_KEY_NO_TX) key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX; if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); /* * The ASSOC test makes sure the driver is ready to * receive the key. When wpa_supplicant has roamed * using FT, it attempts to set the key before * association has completed, this rejects that attempt * so it will set the key again after association. * * TODO: accept the key if we have a station entry and * add it to the device after the station. */ if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free_unused(key); return -ENOENT; } } switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: /* no MFP (yet) */ break; case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; #endif case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_OCB: /* shouldn't happen */ WARN_ON_ONCE(1); break; } err = ieee80211_key_link(key, link, sta); /* KRACK protection, shouldn't happen but just silently accept key */ if (err == -EALREADY) err = 0; return err; } static struct ieee80211_key * ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_local *local __maybe_unused = sdata->local; struct ieee80211_link_data *link = &sdata->deflink; struct ieee80211_key *key; if (link_id >= 0) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return NULL; } if (mac_addr) { struct sta_info *sta; struct link_sta_info *link_sta; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return NULL; if (link_id >= 0) { link_sta = rcu_dereference_check(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) return NULL; } else { link_sta = &sta->deflink; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) return wiphy_dereference(local->hw.wiphy, link_sta->gtk[key_idx]); return NULL; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); key = wiphy_dereference(local->hw.wiphy, link->gtk[key_idx]); if (key) return key; /* or maybe it was a WEP key */ if (key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); return NULL; } static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; lockdep_assert_wiphy(local->hw.wiphy); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) return -ENOENT; ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION); return 0; } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *params)) { struct ieee80211_sub_if_data *sdata; u8 seq[6] = {0}; struct key_params params; struct ieee80211_key *key; u64 pn64; u32 iv32; u16 iv16; int err = -ENOENT; struct ieee80211_key_seq kseq = {}; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) goto out; memset(¶ms, 0, sizeof(params)); params.cipher = key->conf.cipher; switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: pn64 = atomic64_read(&key->conf.tx_pn); iv32 = TKIP_PN_TO_IV32(pn64); iv16 = TKIP_PN_TO_IV16(pn64); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); iv32 = kseq.tkip.iv32; iv16 = kseq.tkip.iv16; } seq[0] = iv16 & 0xff; seq[1] = (iv16 >> 8) & 0xff; seq[2] = iv32 & 0xff; seq[3] = (iv32 >> 8) & 0xff; seq[4] = (iv32 >> 16) & 0xff; seq[5] = (iv32 >> 24) & 0xff; params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_cmac)); fallthrough; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_gmac)); fallthrough; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), gcmp)); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); memcpy(seq, kseq.ccmp.pn, 6); } else { pn64 = atomic64_read(&key->conf.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; seq[2] = pn64 >> 16; seq[3] = pn64 >> 24; seq[4] = pn64 >> 32; seq[5] = pn64 >> 40; } params.seq = seq; params.seq_len = 6; break; default: if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) break; if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) break; drv_get_key_seq(sdata->local, key, &kseq); params.seq = kseq.hw.seq; params.seq_len = kseq.hw.seq_len; break; } callback(cookie, ¶ms); err = 0; out: rcu_read_unlock(); return err; } static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool uni, bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_key(link, key_idx, uni, multi); return 0; } static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_mgmt_key(link, key_idx); return 0; } static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_beacon_key(link, key_idx); return 0; } void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo) { rinfo->flags = 0; if (rate->flags & IEEE80211_TX_RC_MCS) { rinfo->flags |= RATE_INFO_FLAGS_MCS; rinfo->mcs = rate->idx; } else if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; rinfo->mcs = ieee80211_rate_get_vht_mcs(rate); rinfo->nss = ieee80211_rate_get_vht_nss(rate); } else { struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sta->sdata); WARN_ON_ONCE(sband && !sband->bitrates); if (sband && sband->bitrates) rinfo->legacy = sband->bitrates[rate->idx].bitrate; } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_40; else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_80; else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_160; else rinfo->bw = RATE_INFO_BW_20; if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_by_idx(sdata, idx); if (sta) { ret = 0; memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, int idx, struct survey_info *survey) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); return drv_get_survey(local, idx, survey); } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (sta) { ret = 0; sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_set_monitor_channel(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; lockdep_assert_wiphy(local->hw.wiphy); if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, &chanreq.oper)) return 0; sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) goto done; if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, &chanreq.oper)) return 0; ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_EXCLUSIVE); if (ret) return ret; done: local->monitor_chanreq = chanreq; return 0; } static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, struct ieee80211_link_data *link) { struct probe_resp *new, *old; if (!resp || !resp_len) return 1; old = sdata_dereference(link->u.ap.probe_resp, sdata); new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = resp_len; memcpy(new->data, resp, resp_len); if (csa) memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp, csa->n_counter_offsets_presp * sizeof(new->cntdwn_counter_offsets[0])); else if (cca) new->cntdwn_counter_offsets[0] = cca->counter_offset_presp; rcu_assign_pointer(link->u.ap.probe_resp, new); if (old) kfree_rcu(old, rcu_head); return 0; } static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, struct cfg80211_fils_discovery *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct fils_discovery_data *new, *old = NULL; struct ieee80211_fils_discovery *fd; if (!params->update) return 0; fd = &link_conf->fils_discovery; fd->min_interval = params->min_interval; fd->max_interval = params->max_interval; old = sdata_dereference(link->u.ap.fils_discovery, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.fils_discovery, new); } else { RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); } *changed |= BSS_CHANGED_FILS_DISCOVERY; return 0; } static int ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, struct cfg80211_unsol_bcast_probe_resp *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct unsol_bcast_probe_resp_data *new, *old = NULL; if (!params->update) return 0; link_conf->unsol_bcast_probe_resp_interval = params->interval; old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); } else { RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); } *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; return 0; } static int ieee80211_set_ftm_responder_params( struct ieee80211_sub_if_data *sdata, const u8 *lci, size_t lci_len, const u8 *civicloc, size_t civicloc_len, struct ieee80211_bss_conf *link_conf) { struct ieee80211_ftm_responder_params *new, *old; u8 *pos; int len; if (!lci_len && !civicloc_len) return 0; old = link_conf->ftmr_params; len = lci_len + civicloc_len; new = kzalloc(sizeof(*new) + len, GFP_KERNEL); if (!new) return -ENOMEM; pos = (u8 *)(new + 1); if (lci_len) { new->lci_len = lci_len; new->lci = pos; memcpy(pos, lci, lci_len); pos += lci_len; } if (civicloc_len) { new->civicloc_len = civicloc_len; new->civicloc = pos; memcpy(pos, civicloc, civicloc_len); pos += civicloc_len; } link_conf->ftmr_params = new; kfree(old); return 0; } static int ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst, struct cfg80211_mbssid_elems *src) { int i, offset = 0; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } dst->cnt = src->cnt; return offset; } static int ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst, struct cfg80211_rnr_elems *src) { int i, offset = 0; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } dst->cnt = src->cnt; return offset; } static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_beacon_data *params, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, u64 *changed) { struct cfg80211_mbssid_elems *mbssid = NULL; struct cfg80211_rnr_elems *rnr = NULL; struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; u64 _changed = BSS_CHANGED_BEACON; struct ieee80211_bss_conf *link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); /* Need to have a beacon head if we don't have one yet */ if (!params->head && !old) return -EINVAL; /* new or old head? */ if (params->head) new_head_len = params->head_len; else new_head_len = old->head_len; /* new or old tail? */ if (params->tail || !old) /* params->tail_len will be zero for !params->tail */ new_tail_len = params->tail_len; else new_tail_len = old->tail_len; size = sizeof(*new) + new_head_len + new_tail_len; /* new or old multiple BSSID elements? */ if (params->mbssid_ies) { mbssid = params->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (params->rnr_ies) { rnr = params->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } else if (old && old->mbssid_ies) { mbssid = old->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (old && old->rnr_ies) { rnr = old->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } new = kzalloc(size, GFP_KERNEL); if (!new) return -ENOMEM; /* start filling the new info now */ /* * pointers go into the block we allocated, * memory is | beacon_data | head | tail | mbssid_ies | rnr_ies */ new->head = ((u8 *) new) + sizeof(*new); new->tail = new->head + new_head_len; new->head_len = new_head_len; new->tail_len = new_tail_len; /* copy in optional mbssid_ies */ if (mbssid) { u8 *pos = new->tail + new->tail_len; new->mbssid_ies = (void *)pos; pos += struct_size(new->mbssid_ies, elem, mbssid->cnt); pos += ieee80211_copy_mbssid_beacon(pos, new->mbssid_ies, mbssid); if (rnr) { new->rnr_ies = (void *)pos; pos += struct_size(new->rnr_ies, elem, rnr->cnt); ieee80211_copy_rnr_beacon(pos, new->rnr_ies, rnr); } /* update bssid_indicator */ link_conf->bssid_indicator = ilog2(__roundup_pow_of_two(mbssid->cnt + 1)); } if (csa) { new->cntdwn_current_counter = csa->count; memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon, csa->n_counter_offsets_beacon * sizeof(new->cntdwn_counter_offsets[0])); } else if (cca) { new->cntdwn_current_counter = cca->count; new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon; } /* copy in head */ if (params->head) memcpy(new->head, params->head, new_head_len); else memcpy(new->head, old->head, new_head_len); /* copy in optional tail */ if (params->tail) memcpy(new->tail, params->tail, new_tail_len); else if (old) memcpy(new->tail, old->tail, new_tail_len); err = ieee80211_set_probe_resp(sdata, params->probe_resp, params->probe_resp_len, csa, cca, link); if (err < 0) { kfree(new); return err; } if (err == 0) _changed |= BSS_CHANGED_AP_PROBE_RESP; if (params->ftm_responder != -1) { link_conf->ftm_responder = params->ftm_responder; err = ieee80211_set_ftm_responder_params(sdata, params->lci, params->lci_len, params->civicloc, params->civicloc_len, link_conf); if (err < 0) { kfree(new); return err; } _changed |= BSS_CHANGED_FTM_RESPONDER; } rcu_assign_pointer(link->u.ap.beacon, new); sdata->u.ap.active = true; if (old) kfree_rcu(old, rcu_head); *changed |= _changed; return 0; } static u8 ieee80211_num_beaconing_links(struct ieee80211_sub_if_data *sdata) { struct ieee80211_link_data *link; u8 link_id, num = 0; if (sdata->vif.type != NL80211_IFTYPE_AP && sdata->vif.type != NL80211_IFTYPE_P2P_GO) return num; if (!sdata->vif.valid_links) return num; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; if (sdata_dereference(link->u.ap.beacon, sdata)) num++; } return num; } static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct beacon_data *old; struct ieee80211_sub_if_data *vlan; u64 changed = BSS_CHANGED_BEACON_INT | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | BSS_CHANGED_TWT; int i, err; int prev_beacon_int; unsigned int link_id = params->beacon.link_id; struct ieee80211_link_data *link; struct ieee80211_bss_conf *link_conf; struct ieee80211_chan_req chanreq = { .oper = params->chandef }; lockdep_assert_wiphy(local->hw.wiphy); link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); if (old) return -EALREADY; if (params->smps_mode != NL80211_SMPS_OFF) return -EOPNOTSUPP; link->smps_mode = IEEE80211_SMPS_OFF; link->needed_rx_chains = sdata->local->rx_chains; prev_beacon_int = link_conf->beacon_int; link_conf->beacon_int = params->beacon_interval; if (params->ht_cap) link_conf->ht_ldpc = params->ht_cap->cap_info & cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING); if (params->vht_cap) { link_conf->vht_ldpc = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC); link_conf->vht_su_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); link_conf->vht_su_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); link_conf->vht_mu_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE); link_conf->vht_mu_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); } if (params->he_cap && params->he_oper) { link_conf->he_support = true; link_conf->htc_trig_based_pkt_ext = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK); link_conf->frame_time_rts_th = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); changed |= BSS_CHANGED_HE_OBSS_PD; if (params->beacon.he_bss_color.enabled) changed |= BSS_CHANGED_HE_BSS_COLOR; } if (params->he_cap) { link_conf->he_ldpc = params->he_cap->phy_cap_info[1] & IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD; link_conf->he_su_beamformer = params->he_cap->phy_cap_info[3] & IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; link_conf->he_su_beamformee = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE; link_conf->he_mu_beamformer = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER; link_conf->he_full_ul_mumimo = params->he_cap->phy_cap_info[2] & IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO; } if (params->eht_cap) { if (!link_conf->he_support) return -EOPNOTSUPP; link_conf->eht_support = true; link_conf->eht_su_beamformer = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER; link_conf->eht_su_beamformee = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; link_conf->eht_mu_beamformer = params->eht_cap->fixed.phy_cap_info[7] & (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); link_conf->eht_80mhz_full_bw_ul_mumimo = params->eht_cap->fixed.phy_cap_info[7] & (IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ); } else { link_conf->eht_su_beamformer = false; link_conf->eht_su_beamformee = false; link_conf->eht_mu_beamformer = false; } if (sdata->vif.type == NL80211_IFTYPE_AP && params->mbssid_config.tx_wdev) { err = ieee80211_set_ap_mbssid_options(sdata, params->mbssid_config, link_conf); if (err) return err; } err = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); if (!err) ieee80211_link_copy_chanctx_to_vlans(link, false); if (err) { link_conf->beacon_int = prev_beacon_int; return err; } /* * Apply control port protocol, this allows us to * not encrypt dynamic WEP control frames. */ sdata->control_port_protocol = params->crypto.control_port_ethertype; sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; sdata->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; sdata->control_port_no_preauth = params->crypto.control_port_no_preauth; list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->control_port_protocol = params->crypto.control_port_ethertype; vlan->control_port_no_encrypt = params->crypto.control_port_no_encrypt; vlan->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; vlan->control_port_no_preauth = params->crypto.control_port_no_preauth; } link_conf->dtim_period = params->dtim_period; link_conf->enable_beacon = true; link_conf->allow_p2p_go_ps = sdata->vif.p2p; link_conf->twt_responder = params->twt_responder; link_conf->he_obss_pd = params->he_obss_pd; link_conf->he_bss_color = params->beacon.he_bss_color; sdata->vif.cfg.s1g = params->chandef.chan->band == NL80211_BAND_S1GHZ; sdata->vif.cfg.ssid_len = params->ssid_len; if (params->ssid_len) memcpy(sdata->vif.cfg.ssid, params->ssid, params->ssid_len); link_conf->hidden_ssid = (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); memset(&link_conf->p2p_noa_attr, 0, sizeof(link_conf->p2p_noa_attr)); link_conf->p2p_noa_attr.oppps_ctwindow = params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; if (params->p2p_opp_ps) link_conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = params->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) link_conf->beacon_tx_rate = params->beacon_rate; err = ieee80211_assign_beacon(sdata, link, ¶ms->beacon, NULL, NULL, &changed); if (err < 0) goto error; err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, link, link_conf, &changed); if (err < 0) goto error; err = ieee80211_set_unsol_bcast_probe_resp(sdata, ¶ms->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) goto error; err = drv_start_ap(sdata->local, sdata, link_conf); if (err) { old = sdata_dereference(link->u.ap.beacon, sdata); if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(link->u.ap.beacon, NULL); if (ieee80211_num_beaconing_links(sdata) == 0) sdata->u.ap.active = false; goto error; } ieee80211_recalc_dtim(local, sdata); ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID); ieee80211_link_info_change_notify(sdata, link, changed); if (ieee80211_num_beaconing_links(sdata) <= 1) netif_carrier_on(dev); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_on(vlan->dev); return 0; error: ieee80211_link_release_channel(link); return err; } static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_update *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct cfg80211_beacon_data *beacon = ¶ms->beacon; struct beacon_data *old; int err; struct ieee80211_bss_conf *link_conf; u64 changed = 0; lockdep_assert_wiphy(wiphy); link = sdata_dereference(sdata->link[beacon->link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; /* don't allow changing the beacon while a countdown is in place - offset * of channel switch counter may change */ if (link_conf->csa_active || link_conf->color_change_active) return -EBUSY; old = sdata_dereference(link->u.ap.beacon, sdata); if (!old) return -ENOENT; err = ieee80211_assign_beacon(sdata, link, beacon, NULL, NULL, &changed); if (err < 0) return err; err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, link, link_conf, &changed); if (err < 0) return err; err = ieee80211_set_unsol_bcast_probe_resp(sdata, ¶ms->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) return err; if (beacon->he_bss_color_valid && beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled; changed |= BSS_CHANGED_HE_BSS_COLOR; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static void ieee80211_free_next_beacon(struct ieee80211_link_data *link) { if (!link->u.ap.next_beacon) return; kfree(link->u.ap.next_beacon->mbssid_ies); kfree(link->u.ap.next_beacon->rnr_ies); kfree(link->u.ap.next_beacon); link->u.ap.next_beacon = NULL; } static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_sub_if_data *vlan; struct ieee80211_local *local = sdata->local; struct beacon_data *old_beacon; struct probe_resp *old_probe_resp; struct fils_discovery_data *old_fils_discovery; struct unsol_bcast_probe_resp_data *old_unsol_bcast_probe_resp; struct cfg80211_chan_def chandef; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct ieee80211_bss_conf *link_conf = link->conf; LIST_HEAD(keys); lockdep_assert_wiphy(local->hw.wiphy); old_beacon = sdata_dereference(link->u.ap.beacon, sdata); if (!old_beacon) return -ENOENT; old_probe_resp = sdata_dereference(link->u.ap.probe_resp, sdata); old_fils_discovery = sdata_dereference(link->u.ap.fils_discovery, sdata); old_unsol_bcast_probe_resp = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); /* abort any running channel switch or color change */ link_conf->csa_active = false; link_conf->color_change_active = false; ieee80211_vif_unblock_queues_csa(sdata); ieee80211_free_next_beacon(link); /* turn off carrier for this interface and dependent VLANs */ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_off(vlan->dev); if (ieee80211_num_beaconing_links(sdata) <= 1) { netif_carrier_off(dev); sdata->u.ap.active = false; } /* remove beacon and probe response */ RCU_INIT_POINTER(link->u.ap.beacon, NULL); RCU_INIT_POINTER(link->u.ap.probe_resp, NULL); RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); kfree_rcu(old_beacon, rcu_head); if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); if (old_fils_discovery) kfree_rcu(old_fils_discovery, rcu_head); if (old_unsol_bcast_probe_resp) kfree_rcu(old_unsol_bcast_probe_resp, rcu_head); kfree(link_conf->ftmr_params); link_conf->ftmr_params = NULL; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; __sta_info_flush(sdata, true, link_id); ieee80211_remove_link_keys(link, &keys); if (!list_empty(&keys)) { synchronize_net(); ieee80211_free_key_list(local, &keys); } link_conf->enable_beacon = false; sdata->beacon_rate_set = false; sdata->vif.cfg.ssid_len = 0; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_BEACON_ENABLED); if (sdata->wdev.links[link_id].cac_started) { chandef = link_conf->chanreq.oper; wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL, link_id); } drv_stop_ap(sdata->local, sdata, link_conf); /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); ieee80211_link_copy_chanctx_to_vlans(link, true); ieee80211_link_release_channel(link); return 0; } static int sta_apply_auth_flags(struct ieee80211_local *local, struct sta_info *sta, u32 mask, u32 set) { int ret; if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && set & BIT(NL80211_STA_FLAG_ASSOCIATED) && !test_sta_flag(sta, WLAN_STA_ASSOC)) { /* * When peer becomes associated, init rate control as * well. Some drivers require rate control initialized * before drv_sta_state() is called. */ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) rate_control_rate_init(sta); ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); else ret = 0; if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && test_sta_flag(sta, WLAN_STA_ASSOC)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_NONE); if (ret) return ret; } return 0; } static void sta_apply_mesh_params(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { #ifdef CONFIG_MAC80211_MESH struct ieee80211_sub_if_data *sdata = sta->sdata; u64 changed = 0; if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) changed = mesh_plink_inc_estab_count(sdata); sta->mesh->plink_state = params->plink_state; sta->mesh->aid = params->peer_aid; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, sdata->u.mesh.mshcfg.power_mode); ewma_mesh_tx_rate_avg_init(&sta->mesh->tx_rate_avg); /* init at low value */ ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, 10); break; case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: case NL80211_PLINK_OPN_SNT: case NL80211_PLINK_OPN_RCVD: case NL80211_PLINK_CNF_RCVD: case NL80211_PLINK_HOLDING: if (sta->mesh->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->mesh->plink_state = params->plink_state; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, NL80211_MESH_POWER_UNKNOWN); break; default: /* nothing */ break; } } switch (params->plink_action) { case NL80211_PLINK_ACTION_NO_ACTION: /* nothing */ break; case NL80211_PLINK_ACTION_OPEN: changed |= mesh_plink_open(sta); break; case NL80211_PLINK_ACTION_BLOCK: changed |= mesh_plink_block(sta); break; } if (params->local_pm) changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); ieee80211_mbss_info_change_notify(sdata, changed); #endif } enum sta_link_apply_mode { STA_LINK_MODE_NEW, STA_LINK_MODE_STA_MODIFY, STA_LINK_MODE_LINK_MODIFY, }; static int sta_link_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, enum sta_link_apply_mode mode, struct link_station_parameters *params) { struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 link_id = params->link_id < 0 ? 0 : params->link_id; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); bool changes = params->link_mac || params->txpwr_set || params->supported_rates_len || params->ht_capa || params->vht_capa || params->he_capa || params->eht_capa || params->opmode_notif_used; switch (mode) { case STA_LINK_MODE_NEW: if (!params->link_mac) return -EINVAL; break; case STA_LINK_MODE_LINK_MODIFY: break; case STA_LINK_MODE_STA_MODIFY: if (params->link_id >= 0) break; if (!changes) return 0; break; } if (!link || !link_sta) return -EINVAL; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->link_mac) { if (mode == STA_LINK_MODE_NEW) { memcpy(link_sta->addr, params->link_mac, ETH_ALEN); memcpy(link_sta->pub->addr, params->link_mac, ETH_ALEN); } else if (!ether_addr_equal(link_sta->addr, params->link_mac)) { return -EINVAL; } } if (params->txpwr_set) { int ret; link_sta->pub->txpwr.type = params->txpwr.type; if (params->txpwr.type == NL80211_TX_POWER_LIMITED) link_sta->pub->txpwr.power = params->txpwr.power; ret = drv_sta_set_txpwr(local, sdata, sta); if (ret) return ret; } if (params->supported_rates && params->supported_rates_len) { ieee80211_parse_bitrates(link->conf->chanreq.oper.width, sband, params->supported_rates, params->supported_rates_len, &link_sta->pub->supp_rates[sband->band]); } if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, params->ht_capa, link_sta); /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, params->vht_capa, NULL, link_sta); if (params->he_capa) ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, (void *)params->he_capa, params->he_capa_len, (void *)params->he_6ghz_capa, link_sta); if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, (u8 *)params->he_capa, params->he_capa_len, params->eht_capa, params->eht_capa_len, link_sta); if (params->opmode_notif_used) { /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ __ieee80211_vht_handle_opmode(sdata, link_sta, params->opmode_notif, sband->band); } ieee80211_sta_init_nss(link_sta); return 0; } static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 mask, set; int ret = 0; mask = params->sta_flags_mask; set = params->sta_flags_set; if (ieee80211_vif_is_mesh(&sdata->vif)) { /* * In mesh mode, ASSOCIATED isn't part of the nl80211 * API but must follow AUTHENTICATED for driver state. */ if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { /* * TDLS -- everything follows authorized, but * only becoming authorized is possible, not * going back */ if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); } } if (mask & BIT(NL80211_STA_FLAG_WME) && local->hw.queues >= IEEE80211_NUM_ACS) sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); /* auth flags will be set later for TDLS, * and for unassociated stations that move to associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) && (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) set_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); else clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); } if (mask & BIT(NL80211_STA_FLAG_MFP)) { sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP)); if (set & BIT(NL80211_STA_FLAG_MFP)) set_sta_flag(sta, WLAN_STA_MFP); else clear_sta_flag(sta, WLAN_STA_MFP); } if (mask & BIT(NL80211_STA_FLAG_TDLS_PEER)) { if (set & BIT(NL80211_STA_FLAG_TDLS_PEER)) set_sta_flag(sta, WLAN_STA_TDLS_PEER); else clear_sta_flag(sta, WLAN_STA_TDLS_PEER); } if (mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) sta->sta.spp_amsdu = set & BIT(NL80211_STA_FLAG_SPP_AMSDU); /* mark TDLS channel switch support, if the AP allows it */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->deflink.u.mgd.tdls_chan_switch_prohibited && params->ext_capab_len >= 4 && params->ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH) set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->u.mgd.tdls_wider_bw_prohibited && ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && params->ext_capab_len >= 8 && params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED) set_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW); if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) { sta->sta.uapsd_queues = params->uapsd_queues; sta->sta.max_sp = params->max_sp; } ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab, params->ext_capab_len); /* * cfg80211 validates this (1-2007) and allows setting the AID * only when creating a new station entry */ if (params->aid) sta->sta.aid = params->aid; /* * Some of the following updates would be racy if called on an * existing station, via ieee80211_change_station(). However, * all such changes are rejected by cfg80211 except for updates * changing the supported rates on an existing but not yet used * TDLS peer. */ if (params->listen_interval >= 0) sta->listen_interval = params->listen_interval; ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_STA_MODIFY, ¶ms->link_sta_params); if (ret) return ret; if (params->support_p2p_ps >= 0) sta->sta.support_p2p_ps = params->support_p2p_ps; if (ieee80211_vif_is_mesh(&sdata->vif)) sta_apply_mesh_params(local, sta, params); if (params->airtime_weight) sta->airtime_weight = params->airtime_weight; /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || set & BIT(NL80211_STA_FLAG_ASSOCIATED)) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } /* Mark the STA as MLO if MLD MAC address is available */ if (params->link_sta_params.mld_mac) sta->sta.mlo = true; return 0; } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; lockdep_assert_wiphy(local->hw.wiphy); if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; if (!is_valid_ether_addr(mac)) return -EINVAL; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && sdata->vif.type == NL80211_IFTYPE_STATION && !sdata->u.mgd.associated) return -EINVAL; /* * If we have a link ID, it can be a non-MLO station on an AP MLD, * but we need to have a link_mac in that case as well, so use the * STA's MAC address in that case. */ if (params->link_sta_params.link_id >= 0) sta = sta_info_alloc_with_link(sdata, mac, params->link_sta_params.link_id, params->link_sta_params.link_mac ?: mac, GFP_KERNEL); else sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; /* Though the mutex is not needed here (since the station is not * visible yet), sta_apply_parameters (and inner functions) require * the mutex due to other paths. */ err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); return err; } /* * for TDLS and for unassociated station, rate control should be * initialized only when rates are known and station is marked * authorized/associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); return sta_info_insert(sta); } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, struct station_del_parameters *params) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (params->mac) return sta_info_destroy_addr_bss(sdata, params->mac); sta_info_flush(sdata, params->link_id); return 0; } static int ieee80211_change_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; enum cfg80211_station_type statype; int err; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (!sta) return -ENOENT; switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: if (sdata->u.mesh.user_mpm) statype = CFG80211_STA_MESH_PEER_USER; else statype = CFG80211_STA_MESH_PEER_KERNEL; break; case NL80211_IFTYPE_ADHOC: statype = CFG80211_STA_IBSS; break; case NL80211_IFTYPE_STATION: if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { statype = CFG80211_STA_AP_STA; break; } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) statype = CFG80211_STA_TDLS_PEER_ACTIVE; else statype = CFG80211_STA_TDLS_PEER_SETUP; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: if (test_sta_flag(sta, WLAN_STA_ASSOC)) statype = CFG80211_STA_AP_CLIENT; else statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; default: return -EOPNOTSUPP; } err = cfg80211_check_station_change(wiphy, params, statype); if (err) return err; if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) return -EBUSY; rcu_assign_pointer(vlansdata->u.vlan.sta, sta); __ieee80211_check_fast_rx_iface(vlansdata); drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); } } err = sta_apply_parameters(local, sta, params); if (err) return err; if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); } return 0; } #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_add(sdata, dst); if (IS_ERR(mpath)) { rcu_read_unlock(); return PTR_ERR(mpath); } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (dst) return mesh_path_del(sdata, dst); mesh_path_flush_by_iface(sdata); return 0; } static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, struct mpath_info *pinfo) { struct sta_info *next_hop_sta = rcu_dereference(mpath->next_hop); if (next_hop_sta) memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN); else eth_zero_addr(next_hop); memset(pinfo, 0, sizeof(*pinfo)); pinfo->generation = mpath->sdata->u.mesh.mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | MPATH_INFO_SN | MPATH_INFO_METRIC | MPATH_INFO_EXPTIME | MPATH_INFO_DISCOVERY_TIMEOUT | MPATH_INFO_DISCOVERY_RETRIES | MPATH_INFO_FLAGS | MPATH_INFO_HOP_COUNT | MPATH_INFO_PATH_CHANGE; pinfo->frame_qlen = mpath->frame_queue.qlen; pinfo->sn = mpath->sn; pinfo->metric = mpath->metric; if (time_before(jiffies, mpath->exp_time)) pinfo->exptime = jiffies_to_msecs(mpath->exp_time - jiffies); pinfo->discovery_timeout = jiffies_to_msecs(mpath->discovery_timeout); pinfo->discovery_retries = mpath->discovery_retries; if (mpath->flags & MESH_PATH_ACTIVE) pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; if (mpath->flags & MESH_PATH_SN_VALID) pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; if (mpath->flags & MESH_PATH_RESOLVED) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED; pinfo->hop_count = mpath->hop_count; pinfo->path_change_count = mpath->path_change_count; } static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static void mpp_set_pinfo(struct mesh_path *mpath, u8 *mpp, struct mpath_info *pinfo) { memset(pinfo, 0, sizeof(*pinfo)); memcpy(mpp, mpath->mpp, ETH_ALEN); pinfo->generation = mpath->sdata->u.mesh.mpp_paths_generation; } static int ieee80211_get_mpp(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpp(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_get_mesh_config(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); return 0; } static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) { return (mask >> (parm-1)) & 0x1; } static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); int i; /* allocate information elements */ new_ie = NULL; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, GFP_KERNEL); if (!new_ie) return -ENOMEM; } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); ifmsh->mesh_sp_id = setup->sync_method; ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; ifmsh->userspace_handles_dfs = setup->userspace_handles_dfs; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; if (setup->is_secure) ifmsh->security |= IEEE80211_MESH_SEC_SECURED; /* mcast rate setting in Mesh Node */ memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); sdata->vif.bss_conf.basic_rates = setup->basic_rates; sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(sdata->local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = setup->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } return 0; } static int ieee80211_update_mesh_config(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf) { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_mesh *ifmsh; sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifmsh = &sdata->u.mesh; /* Set the config options which we are interested in setting */ conf = &(sdata->u.mesh.mshcfg); if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) conf->dot11MeshRetryTimeout = nconf->dot11MeshRetryTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONFIRM_TIMEOUT, mask)) conf->dot11MeshConfirmTimeout = nconf->dot11MeshConfirmTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HOLDING_TIMEOUT, mask)) conf->dot11MeshHoldingTimeout = nconf->dot11MeshHoldingTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_PEER_LINKS, mask)) conf->dot11MeshMaxPeerLinks = nconf->dot11MeshMaxPeerLinks; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_RETRIES, mask)) conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) conf->dot11MeshTTL = nconf->dot11MeshTTL; if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) conf->element_ttl = nconf->element_ttl; if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) { if (ifmsh->user_mpm) return -EBUSY; conf->auto_open_plinks = nconf->auto_open_plinks; } if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask)) conf->dot11MeshNbrOffsetMaxNeighbor = nconf->dot11MeshNbrOffsetMaxNeighbor; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) conf->dot11MeshHWMPmaxPREQretries = nconf->dot11MeshHWMPmaxPREQretries; if (_chg_mesh_attr(NL80211_MESHCONF_PATH_REFRESH_TIME, mask)) conf->path_refresh_time = nconf->path_refresh_time; if (_chg_mesh_attr(NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, mask)) conf->min_discovery_timeout = nconf->min_discovery_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathTimeout = nconf->dot11MeshHWMPactivePathTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask)) conf->dot11MeshHWMPpreqMinInterval = nconf->dot11MeshHWMPpreqMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, mask)) conf->dot11MeshHWMPperrMinInterval = nconf->dot11MeshHWMPperrMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, mask)) conf->dot11MeshHWMPnetDiameterTraversalTime = nconf->dot11MeshHWMPnetDiameterTraversalTime; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOTMODE, mask)) { conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; ieee80211_mesh_root_setup(ifmsh); } if (_chg_mesh_attr(NL80211_MESHCONF_GATE_ANNOUNCEMENTS, mask)) { /* our current gate announcement implementation rides on root * announcements, so require this ifmsh to also be a root node * */ if (nconf->dot11MeshGateAnnouncementProtocol && !(conf->dot11MeshHWMPRootMode > IEEE80211_ROOTMODE_ROOT)) { conf->dot11MeshHWMPRootMode = IEEE80211_PROACTIVE_RANN; ieee80211_mesh_root_setup(ifmsh); } conf->dot11MeshGateAnnouncementProtocol = nconf->dot11MeshGateAnnouncementProtocol; } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) conf->dot11MeshHWMPRannInterval = nconf->dot11MeshHWMPRannInterval; if (_chg_mesh_attr(NL80211_MESHCONF_FORWARDING, mask)) conf->dot11MeshForwarding = nconf->dot11MeshForwarding; if (_chg_mesh_attr(NL80211_MESHCONF_RSSI_THRESHOLD, mask)) { /* our RSSI threshold implementation is supported only for * devices that report signal in dBm. */ if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM)) return -EOPNOTSUPP; conf->rssi_threshold = nconf->rssi_threshold; } if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) { conf->ht_opmode = nconf->ht_opmode; sdata->vif.bss_conf.ht_operation_mode = nconf->ht_opmode; ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_HT); } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathToRootTimeout = nconf->dot11MeshHWMPactivePathToRootTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) conf->dot11MeshHWMProotInterval = nconf->dot11MeshHWMProotInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) conf->dot11MeshHWMPconfirmationInterval = nconf->dot11MeshHWMPconfirmationInterval; if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) { conf->power_mode = nconf->power_mode; ieee80211_mps_local_status_update(sdata); } if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) conf->dot11MeshAwakeWindowDuration = nconf->dot11MeshAwakeWindowDuration; if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask)) conf->plink_timeout = nconf->plink_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask)) conf->dot11MeshConnectedToMeshGate = nconf->dot11MeshConnectedToMeshGate; if (_chg_mesh_attr(NL80211_MESHCONF_NOLEARN, mask)) conf->dot11MeshNolearn = nconf->dot11MeshNolearn; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_AS, mask)) conf->dot11MeshConnectedToAuthServer = nconf->dot11MeshConnectedToAuthServer; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, const struct mesh_config *conf, const struct mesh_setup *setup) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = setup->chandef }; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; int err; lockdep_assert_wiphy(sdata->local->hw.wiphy); memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); err = copy_mesh_setup(ifmsh, setup); if (err) return err; sdata->control_port_over_nl80211 = setup->control_port_over_nl80211; /* can mesh use other SMPS modes? */ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; return ieee80211_start_mesh(sdata); } static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); ieee80211_stop_mesh(sdata); ieee80211_link_release_channel(&sdata->deflink); kfree(sdata->u.mesh.ie); return 0; } #endif static int ieee80211_change_bss(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_supported_band *sband; u64 changed = 0; link = ieee80211_link_or_deflink(sdata, params->link_id, true); if (IS_ERR(link)) return PTR_ERR(link); if (!sdata_dereference(link->u.ap.beacon, sdata)) return -ENOENT; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->basic_rates) { if (!ieee80211_parse_bitrates(link->conf->chanreq.oper.width, wiphy->bands[sband->band], params->basic_rates, params->basic_rates_len, &link->conf->basic_rates)) return -EINVAL; changed |= BSS_CHANGED_BASIC_RATES; ieee80211_check_rate_mask(link); } if (params->use_cts_prot >= 0) { link->conf->use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (params->use_short_preamble >= 0) { link->conf->use_short_preamble = params->use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } if (!link->conf->use_short_slot && (sband->band == NL80211_BAND_5GHZ || sband->band == NL80211_BAND_6GHZ)) { link->conf->use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } if (params->use_short_slot_time >= 0) { link->conf->use_short_slot = params->use_short_slot_time; changed |= BSS_CHANGED_ERP_SLOT; } if (params->ap_isolate >= 0) { if (params->ap_isolate) sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; else sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; ieee80211_check_fast_rx_iface(sdata); } if (params->ht_opmode >= 0) { link->conf->ht_operation_mode = (u16)params->ht_opmode; changed |= BSS_CHANGED_HT; } if (params->p2p_ctwindow >= 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; link->conf->p2p_noa_attr.oppps_ctwindow |= params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; changed |= BSS_CHANGED_P2P_PS; } if (params->p2p_opp_ps > 0) { link->conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } else if (params->p2p_opp_ps == 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static int ieee80211_set_txq_params(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, params->link_id, true); struct ieee80211_tx_queue_params p; if (!local->ops->conf_tx) return -EOPNOTSUPP; if (local->hw.queues < IEEE80211_NUM_ACS) return -EOPNOTSUPP; if (IS_ERR(link)) return PTR_ERR(link); memset(&p, 0, sizeof(p)); p.aifs = params->aifs; p.cw_max = params->cwmax; p.cw_min = params->cwmin; p.txop = params->txop; /* * Setting tx queue params disables u-apsd because it's only * called in master mode. */ p.uapsd = false; ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac); link->tx_conf[params->ac] = p; if (drv_conf_tx(local, link, params->ac, &p)) { wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for AC %d\n", params->ac); return -EINVAL; } ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_QOS); return 0; } #ifdef CONFIG_PM static int ieee80211_suspend(struct wiphy *wiphy, struct cfg80211_wowlan *wowlan) { return __ieee80211_suspend(wiphy_priv(wiphy), wowlan); } static int ieee80211_resume(struct wiphy *wiphy) { return __ieee80211_resume(wiphy_priv(wiphy)); } #else #define ieee80211_suspend NULL #define ieee80211_resume NULL #endif static int ieee80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *req) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(req->wdev); switch (ieee80211_vif_type_p2p(&sdata->vif)) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) break; /* * FIXME: implement NoA while scanning in software, * for now fall through to allow scanning only when * beaconing hasn't been configured yet */ fallthrough; case NL80211_IFTYPE_AP: /* * If the scan has been forced (and the driver supports * forcing), don't care about being beaconing already. * This will create problems to the attached stations (e.g. all * the frames sent while scanning on other channel will be * lost) */ if (sdata->deflink.u.ap.beacon && (!(wiphy->features & NL80211_FEATURE_AP_SCAN) || !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } return ieee80211_request_scan(sdata, req); } static void ieee80211_abort_scan(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_scan_cancel(wiphy_priv(wiphy)); } static int ieee80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_sched_scan_request *req) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!sdata->local->ops->sched_scan_start) return -EOPNOTSUPP; return ieee80211_request_sched_scan_start(sdata, req); } static int ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev, u64 reqid) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->sched_scan_stop) return -EOPNOTSUPP; return ieee80211_request_sched_scan_stop(local); } static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_auth_request *req) { return ieee80211_mgd_auth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_assoc_request *req) { return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_deauth_request *req) { return ieee80211_mgd_deauth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req) { return ieee80211_mgd_disassoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *params) { return ieee80211_ibss_join(IEEE80211_DEV_TO_SUB_IF(dev), params); } static int ieee80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ibss_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_join_ocb(struct wiphy *wiphy, struct net_device *dev, struct ocb_setup *setup) { return ieee80211_ocb_join(IEEE80211_DEV_TO_SUB_IF(dev), setup); } static int ieee80211_leave_ocb(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ocb_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, int rate[NUM_NL80211_BANDS]) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(int) * NUM_NL80211_BANDS); if (ieee80211_sdata_running(sdata)) ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MCAST_RATE); return 0; } static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) { struct ieee80211_local *local = wiphy_priv(wiphy); int err; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { ieee80211_check_fast_xmit_all(local); err = drv_set_frag_threshold(local, wiphy->frag_threshold); if (err) { ieee80211_check_fast_xmit_all(local); return err; } } if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || (changed & WIPHY_PARAM_DYN_ACK)) { s16 coverage_class; coverage_class = changed & WIPHY_PARAM_COVERAGE_CLASS ? wiphy->coverage_class : -1; err = drv_set_coverage_class(local, coverage_class); if (err) return err; } if (changed & WIPHY_PARAM_RTS_THRESHOLD) { err = drv_set_rts_threshold(local, wiphy->rts_threshold); if (err) return err; } if (changed & WIPHY_PARAM_RETRY_SHORT) { if (wiphy->retry_short > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; } if (changed & WIPHY_PARAM_RETRY_LONG) { if (wiphy->retry_long > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; } if (changed & (WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); if (changed & (WIPHY_PARAM_TXQ_LIMIT | WIPHY_PARAM_TXQ_MEMORY_LIMIT | WIPHY_PARAM_TXQ_QUANTUM)) ieee80211_txq_set_params(local); return 0; } static int ieee80211_set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; enum nl80211_tx_power_setting txp_type = type; bool update_txp_type = false; bool has_monitor = false; lockdep_assert_wiphy(local->hw.wiphy); if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) return -EOPNOTSUPP; sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) return -EOPNOTSUPP; } switch (type) { case NL80211_TX_POWER_AUTOMATIC: sdata->deflink.user_power_level = IEEE80211_UNSET_POWER_LEVEL; txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: if (mbm < 0 || (mbm % 100)) return -EOPNOTSUPP; sdata->deflink.user_power_level = MBM_TO_DBM(mbm); break; } if (txp_type != sdata->vif.bss_conf.txpower_type) { update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; } ieee80211_recalc_txpower(sdata, update_txp_type); return 0; } switch (type) { case NL80211_TX_POWER_AUTOMATIC: local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: if (mbm < 0 || (mbm % 100)) return -EOPNOTSUPP; local->user_power_level = MBM_TO_DBM(mbm); break; } list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { has_monitor = true; continue; } sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; } list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; ieee80211_recalc_txpower(sdata, update_txp_type); } if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; ieee80211_recalc_txpower(sdata, update_txp_type); } } return 0; } static int ieee80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (local->ops->get_txpower) return drv_get_txpower(local, sdata, dbm); if (local->emulate_chanctx) *dbm = local->hw.conf.power_level; else *dbm = sdata->vif.bss_conf.txpower; /* INT_MIN indicates no power level was set yet */ if (*dbm == INT_MIN) return -EINVAL; return 0; } static void ieee80211_rfkill_poll(struct wiphy *wiphy) { struct ieee80211_local *local = wiphy_priv(wiphy); drv_rfkill_poll(local); } #ifdef CONFIG_NL80211_TESTMODE static int ieee80211_testmode_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_vif *vif = NULL; if (!local->ops->testmode_cmd) return -EOPNOTSUPP; if (wdev) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) vif = &sdata->vif; } return local->ops->testmode_cmd(&local->hw, vif, data, len); } static int ieee80211_testmode_dump(struct wiphy *wiphy, struct sk_buff *skb, struct netlink_callback *cb, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->testmode_dump) return -EOPNOTSUPP; return local->ops->testmode_dump(&local->hw, skb, cb, data, len); } #endif int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode) { const u8 *ap; enum ieee80211_smps_mode old_req; int err; struct sta_info *sta; bool tdls_peer_found = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return 0; old_req = link->u.mgd.req_smps; link->u.mgd.req_smps = smps_mode; /* The driver indicated that EML is enabled for the interface, which * implies that SMPS flows towards the AP should be stopped. */ if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) return 0; if (old_req == smps_mode && smps_mode != IEEE80211_SMPS_AUTOMATIC) return 0; /* * If not associated, or current association is not an HT * association, there's no need to do anything, just store * the new value until we associate. */ if (!sdata->u.mgd.associated || link->conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; ap = sdata->vif.cfg.ap_addr; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) continue; tdls_peer_found = true; break; } rcu_read_unlock(); if (smps_mode == IEEE80211_SMPS_AUTOMATIC) { if (tdls_peer_found || !sdata->u.mgd.powersave) smps_mode = IEEE80211_SMPS_OFF; else smps_mode = IEEE80211_SMPS_DYNAMIC; } /* send SM PS frame to AP */ err = ieee80211_send_smps_action(sdata, smps_mode, ap, ap, ieee80211_vif_is_mld(&sdata->vif) ? link->link_id : -1); if (err) link->u.mgd.req_smps = old_req; else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found) ieee80211_teardown_tdls_peers(link); return err; } static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); unsigned int link_id; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && timeout == local->dynamic_ps_forced_timeout) return 0; sdata->u.mgd.powersave = enabled; local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; __ieee80211_request_smps_mgd(sdata, link, link->u.mgd.req_smps); } if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); ieee80211_check_fast_rx_iface(sdata); return 0; } static void ieee80211_set_cqm_rssi_link(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, s32 rssi_thold, u32 rssi_hyst, s32 rssi_low, s32 rssi_high) { struct ieee80211_bss_conf *conf; if (!link || !link->conf) return; conf = link->conf; if (rssi_thold && rssi_hyst && rssi_thold == conf->cqm_rssi_thold && rssi_hyst == conf->cqm_rssi_hyst) return; conf->cqm_rssi_thold = rssi_thold; conf->cqm_rssi_hyst = rssi_hyst; conf->cqm_rssi_low = rssi_low; conf->cqm_rssi_high = rssi_high; link->u.mgd.last_cqm_event_signal = 0; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return; if (sdata->u.mgd.associated && (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_CQM); } static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER && !(vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, rssi_thold, rssi_hyst, 0, 0); } return 0; } static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_low, s32 rssi_high) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, 0, 0, rssi_low, rssi_high); } return 0; } static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int i, ret; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; /* * If active validate the setting and reject it if it doesn't leave * at least one basic rate usable, since we really have to be able * to send something, and if we're an AP we have to be able to do * so at a basic rate so that all clients can receive it. */ if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) && sdata->vif.bss_conf.chanreq.oper.chan) { u32 basic_rates = sdata->vif.bss_conf.basic_rates; enum nl80211_band band; band = sdata->vif.bss_conf.chanreq.oper.chan->band; if (!(mask->control[band].legacy & basic_rates)) return -EINVAL; } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { ret = drv_set_bitrate_mask(local, sdata, mask); if (ret) return ret; } for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; sdata->rc_rateidx_mask[i] = mask->control[i].legacy; memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs, sizeof(mask->control[i].ht_mcs)); memcpy(sdata->rc_rateidx_vht_mcs_mask[i], mask->control[i].vht_mcs, sizeof(mask->control[i].vht_mcs)); sdata->rc_has_mcs_mask[i] = false; sdata->rc_has_vht_mcs_mask[i] = false; if (!sband) continue; for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) { if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) { sdata->rc_has_mcs_mask[i] = true; break; } } for (j = 0; j < NL80211_VHT_NSS_MAX; j++) { if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) { sdata->rc_has_vht_mcs_mask[i] = true; break; } } } return 0; } static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, u32 cac_time_ms, int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = *chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; link_data = sdata_dereference(sdata->link[link_id], sdata); if (!link_data) return -ENOLINK; /* whatever, but channel contexts should not complain about that one */ link_data->smps_mode = IEEE80211_SMPS_OFF; link_data->needed_rx_chains = local->rx_chains; err = ieee80211_link_use_channel(link_data, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; wiphy_delayed_work_queue(wiphy, &link_data->dfs_cac_timer_work, msecs_to_jiffies(cac_time_ms)); return 0; } static void ieee80211_end_cac(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { link_data = sdata_dereference(sdata->link[link_id], sdata); if (!link_data) continue; wiphy_delayed_work_cancel(wiphy, &link_data->dfs_cac_timer_work); if (sdata->wdev.links[link_id].cac_started) { ieee80211_link_release_channel(link_data); sdata->wdev.links[link_id].cac_started = false; } } } static struct cfg80211_beacon_data * cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) { struct cfg80211_beacon_data *new_beacon; u8 *pos; int len; len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len + beacon->proberesp_ies_len + beacon->assocresp_ies_len + beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len; if (beacon->mbssid_ies) len += ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies, beacon->rnr_ies, beacon->mbssid_ies->cnt); new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL); if (!new_beacon) return NULL; if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { new_beacon->mbssid_ies = kzalloc(struct_size(new_beacon->mbssid_ies, elem, beacon->mbssid_ies->cnt), GFP_KERNEL); if (!new_beacon->mbssid_ies) { kfree(new_beacon); return NULL; } if (beacon->rnr_ies && beacon->rnr_ies->cnt) { new_beacon->rnr_ies = kzalloc(struct_size(new_beacon->rnr_ies, elem, beacon->rnr_ies->cnt), GFP_KERNEL); if (!new_beacon->rnr_ies) { kfree(new_beacon->mbssid_ies); kfree(new_beacon); return NULL; } } } pos = (u8 *)(new_beacon + 1); if (beacon->head_len) { new_beacon->head_len = beacon->head_len; new_beacon->head = pos; memcpy(pos, beacon->head, beacon->head_len); pos += beacon->head_len; } if (beacon->tail_len) { new_beacon->tail_len = beacon->tail_len; new_beacon->tail = pos; memcpy(pos, beacon->tail, beacon->tail_len); pos += beacon->tail_len; } if (beacon->beacon_ies_len) { new_beacon->beacon_ies_len = beacon->beacon_ies_len; new_beacon->beacon_ies = pos; memcpy(pos, beacon->beacon_ies, beacon->beacon_ies_len); pos += beacon->beacon_ies_len; } if (beacon->proberesp_ies_len) { new_beacon->proberesp_ies_len = beacon->proberesp_ies_len; new_beacon->proberesp_ies = pos; memcpy(pos, beacon->proberesp_ies, beacon->proberesp_ies_len); pos += beacon->proberesp_ies_len; } if (beacon->assocresp_ies_len) { new_beacon->assocresp_ies_len = beacon->assocresp_ies_len; new_beacon->assocresp_ies = pos; memcpy(pos, beacon->assocresp_ies, beacon->assocresp_ies_len); pos += beacon->assocresp_ies_len; } if (beacon->probe_resp_len) { new_beacon->probe_resp_len = beacon->probe_resp_len; new_beacon->probe_resp = pos; memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { pos += ieee80211_copy_mbssid_beacon(pos, new_beacon->mbssid_ies, beacon->mbssid_ies); if (beacon->rnr_ies && beacon->rnr_ies->cnt) pos += ieee80211_copy_rnr_beacon(pos, new_beacon->rnr_ies, beacon->rnr_ies); } /* might copy -1, meaning no changes requested */ new_beacon->ftm_responder = beacon->ftm_responder; if (beacon->lci) { new_beacon->lci_len = beacon->lci_len; new_beacon->lci = pos; memcpy(pos, beacon->lci, beacon->lci_len); pos += beacon->lci_len; } if (beacon->civicloc) { new_beacon->civicloc_len = beacon->civicloc_len; new_beacon->civicloc = pos; memcpy(pos, beacon->civicloc, beacon->civicloc_len); pos += beacon->civicloc_len; } return new_beacon; } void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link_data = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link_data)) { rcu_read_unlock(); return; } /* TODO: MBSSID with MLO changes */ if (vif->mbssid_tx_vif == vif) { /* Trigger ieee80211_csa_finish() on the non-transmitting * interfaces when channel switch is received on * transmitting interface */ struct ieee80211_sub_if_data *iter; list_for_each_entry_rcu(iter, &local->interfaces, list) { if (!ieee80211_sdata_running(iter)) continue; if (iter == sdata || iter->vif.mbssid_tx_vif != vif) continue; wiphy_work_queue(iter->local->hw.wiphy, &iter->deflink.csa.finalize_work); } } wiphy_work_queue(local->hw.wiphy, &link_data->csa.finalize_work); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_csa_finish); void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_tx) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; sdata->csa_blocked_queues = block_tx; sdata_info(sdata, "channel switch failed, disconnecting\n"); wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work); } EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); static int ieee80211_set_after_csa_beacon(struct ieee80211_link_data *link_data, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: if (!link_data->u.ap.next_beacon) return -EINVAL; err = ieee80211_assign_beacon(sdata, link_data, link_data->u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(link_data); if (err < 0) return err; break; case NL80211_IFTYPE_ADHOC: err = ieee80211_ibss_finish_csa(sdata, changed); if (err < 0) return err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata, changed); if (err < 0) return err; break; #endif default: WARN_ON(1); return -EINVAL; } return 0; } static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_bss_conf *link_conf = link_data->conf; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); /* * using reservation isn't immediate as it may be deferred until later * with multi-vif. once reservation is complete it will re-schedule the * work with no reserved_chanctx so verify chandef to check if it * completed successfully */ if (link_data->reserved_chanctx) { /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their * reservations */ if (link_data->reserved_ready) return 0; return ieee80211_link_use_reserved_context(link_data); } if (!cfg80211_chandef_identical(&link_conf->chanreq.oper, &link_data->csa.chanreq.oper)) return -EINVAL; link_conf->csa_active = false; err = ieee80211_set_after_csa_beacon(link_data, &changed); if (err) return err; ieee80211_link_info_change_notify(sdata, link_data, changed); ieee80211_vif_unblock_queues_csa(sdata); err = drv_post_channel_switch(link_data); if (err) return err; cfg80211_ch_switch_notify(sdata->dev, &link_data->csa.chanreq.oper, link_data->link_id); return 0; } static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; if (__ieee80211_csa_finalize(link_data)) { sdata_info(sdata, "failed to finalize CSA on link %d, disconnecting\n", link_data->link_id); cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, GFP_KERNEL); } } void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, csa.finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!link->conf->csa_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_csa_finalize(link); } static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data, struct cfg80211_csa_settings *params, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_csa_settings csa = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: link_data->u.ap.next_beacon = cfg80211_beacon_dup(¶ms->beacon_after); if (!link_data->u.ap.next_beacon) return -ENOMEM; /* * With a count of 0, we don't have to wait for any * TBTT before switching, so complete the CSA * immediately. In theory, with a count == 1 we * should delay the switch until just before the next * TBTT, but that would complicate things so we switch * immediately too. If we would delay the switch * until the next TBTT, we would have to set the probe * response here. * * TODO: A channel switch with count <= 1 without * sending a CSA action frame is kind of useless, * because the clients won't know we're changing * channels. The action frame must be implemented * either here or in the userspace. */ if (params->count <= 1) break; if ((params->n_counter_offsets_beacon > IEEE80211_MAX_CNTDWN_COUNTERS_NUM) || (params->n_counter_offsets_presp > IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) { ieee80211_free_next_beacon(link_data); return -EINVAL; } csa.counter_offsets_beacon = params->counter_offsets_beacon; csa.counter_offsets_presp = params->counter_offsets_presp; csa.n_counter_offsets_beacon = params->n_counter_offsets_beacon; csa.n_counter_offsets_presp = params->n_counter_offsets_presp; csa.count = params->count; err = ieee80211_assign_beacon(sdata, link_data, ¶ms->beacon_csa, &csa, NULL, changed); if (err < 0) { ieee80211_free_next_beacon(link_data); return err; } break; case NL80211_IFTYPE_ADHOC: if (!sdata->vif.cfg.ibss_joined) return -EINVAL; if (params->chandef.width != sdata->u.ibss.chandef.width) return -EINVAL; switch (params->chandef.width) { case NL80211_CHAN_WIDTH_40: if (cfg80211_get_chandef_type(¶ms->chandef) != cfg80211_get_chandef_type(&sdata->u.ibss.chandef)) return -EINVAL; break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: break; default: return -EINVAL; } /* changes into another band are not supported */ if (sdata->u.ibss.chandef.chan->band != params->chandef.chan->band) return -EINVAL; /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_ibss_csa_beacon(sdata, params, changed); if (err < 0) return err; } ieee80211_send_action_csa(sdata, params); break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* changes into another band are not supported */ if (sdata->vif.bss_conf.chanreq.oper.chan->band != params->chandef.chan->band) return -EINVAL; if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_NONE) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_INIT; if (!ifmsh->pre_value) ifmsh->pre_value = 1; else ifmsh->pre_value++; } /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_mesh_csa_beacon(sdata, params, changed); if (err < 0) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; return err; } } if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) ieee80211_send_action_csa(sdata, params); break; } #endif default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_abort(struct ieee80211_link_data *link) { link->conf->color_change_active = false; ieee80211_free_next_beacon(link); cfg80211_color_change_aborted_notify(link->sdata->dev, link->link_id); } static int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = params->chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_channel_switch ch_switch = { .link_id = params->link_id, }; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; struct ieee80211_bss_conf *link_conf; struct ieee80211_link_data *link_data; u64 changed = 0; u8 link_id = params->link_id; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; if (sdata->wdev.links[link_id].cac_started) return -EBUSY; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return -EINVAL; link_data = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link_data) return -ENOLINK; link_conf = link_data->conf; if (chanreq.oper.punctured && !link_conf->eht_support) return -EINVAL; /* don't allow another channel switch if one is already active. */ if (link_conf->csa_active) return -EBUSY; conf = wiphy_dereference(wiphy, link_conf->chanctx_conf); if (!conf) { err = -EBUSY; goto out; } if (params->chandef.chan->freq_offset) { /* this may work, but is untested */ err = -EOPNOTSUPP; goto out; } chanctx = container_of(conf, struct ieee80211_chanctx, conf); ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; ch_switch.block_tx = params->block_tx; ch_switch.chandef = chanreq.oper; ch_switch.count = params->count; err = drv_pre_channel_switch(sdata, &ch_switch); if (err) goto out; err = ieee80211_link_reserve_chanctx(link_data, &chanreq, chanctx->mode, params->radar_required); if (err) goto out; /* if reservation is invalid then this will fail */ err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0, -1); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } /* if there is a color change in progress, abort it */ if (link_conf->color_change_active) ieee80211_color_change_abort(link_data); err = ieee80211_set_csa_beacon(link_data, params, &changed); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } link_data->csa.chanreq = chanreq; link_conf->csa_active = true; if (params->block_tx) ieee80211_vif_block_queues_csa(sdata); cfg80211_ch_switch_started_notify(sdata->dev, &link_data->csa.chanreq.oper, link_id, params->count, params->block_tx); if (changed) { ieee80211_link_info_change_notify(sdata, link_data, changed); drv_channel_switch_beacon(sdata, &link_data->csa.chanreq.oper); } else { /* if the beacon didn't change, we can finalize immediately */ ieee80211_csa_finalize(link_data); } out: return err; } int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); return __ieee80211_channel_switch(wiphy, dev, params); } u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) { lockdep_assert_wiphy(local->hw.wiphy); local->roc_cookie_counter++; /* wow, you wrapped 64 bits ... more likely a bug */ if (WARN_ON(local->roc_cookie_counter == 0)) local->roc_cookie_counter++; return local->roc_cookie_counter; } int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; int id; ack_skb = skb_copy(skb, gfp); if (!ack_skb) return -ENOMEM; spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); if (id < 0) { kfree_skb(ack_skb); return -ENOMEM; } IEEE80211_SKB_CB(skb)->status_data_idr = 1; IEEE80211_SKB_CB(skb)->status_data = id; *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; return 0; } static void ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, struct wireless_dev *wdev, struct mgmt_frame_regs *upd) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4); u32 action_mask = BIT(IEEE80211_STYPE_ACTION >> 4); bool global_change, intf_change; global_change = (local->probe_req_reg != !!(upd->global_stypes & preq_mask)) || (local->rx_mcast_action_reg != !!(upd->global_mcast_stypes & action_mask)); local->probe_req_reg = upd->global_stypes & preq_mask; local->rx_mcast_action_reg = upd->global_mcast_stypes & action_mask; intf_change = (sdata->vif.probe_req_reg != !!(upd->interface_stypes & preq_mask)) || (sdata->vif.rx_mcast_action_reg != !!(upd->interface_mcast_stypes & action_mask)); sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask; sdata->vif.rx_mcast_action_reg = upd->interface_mcast_stypes & action_mask; if (!local->open_count) return; if (intf_change && ieee80211_sdata_running(sdata)) drv_config_iface_filter(local, sdata, sdata->vif.probe_req_reg ? FIF_PROBE_REQ : 0, FIF_PROBE_REQ); if (global_change) ieee80211_configure_filter(local); } static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); int ret; if (local->started) return -EOPNOTSUPP; ret = drv_set_antenna(local, tx_ant, rx_ant); if (ret) return ret; local->rx_chains = hweight8(rx_ant); return 0; } static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); return drv_get_antenna(local, tx_ant, rx_ant); } static int ieee80211_set_rekey_data(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_gtk_rekey_data *data) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!local->ops->set_rekey_data) return -EOPNOTSUPP; drv_set_rekey_data(local, sdata, data); return 0; } static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos; struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; enum nl80211_band band; int ret; /* the lock is needed to assign the cookie later */ lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); sta = sta_info_get_bss(sdata, peer); if (!sta) { ret = -ENOLINK; goto unlock; } qos = sta->sta.wme; chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { ret = -EINVAL; goto unlock; } band = chanctx_conf->def.chan->band; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | IEEE80211_FCTL_FROMDS); } else { size -= 2; fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS); } skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); if (!skb) { ret = -ENOMEM; goto unlock; } skb->dev = dev; skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); nullfunc->seq_ctrl = 0; info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_NL80211_FRAME_TX; info->band = band; skb_set_queue_mapping(skb, IEEE80211_AC_VO); skb->priority = 7; if (qos) nullfunc->qos_ctrl = cpu_to_le16(7); ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_ATOMIC); if (ret) { kfree_skb(skb); goto unlock; } local_bh_disable(); ieee80211_xmit(sdata, sta, skb); local_bh_enable(); ret = 0; unlock: rcu_read_unlock(); return ret; } static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_link_data *link; int ret = -ENODATA; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (!link) { ret = -ENOLINK; goto out; } chanctx_conf = rcu_dereference(link->conf->chanctx_conf); if (chanctx_conf) { *chandef = link->conf->chanreq.oper; ret = 0; } else if (local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { *chandef = local->monitor_chanreq.oper; ret = 0; } out: rcu_read_unlock(); return ret; } #ifdef CONFIG_PM static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) { drv_set_wakeup(wiphy_priv(wiphy), enabled); } #endif static int ieee80211_set_qos_map(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_qos_map *qos_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mac80211_qos_map *new_qos_map, *old_qos_map; if (qos_map) { new_qos_map = kzalloc(sizeof(*new_qos_map), GFP_KERNEL); if (!new_qos_map) return -ENOMEM; memcpy(&new_qos_map->qos_map, qos_map, sizeof(*qos_map)); } else { /* A NULL qos_map was passed to disable QoS mapping */ new_qos_map = NULL; } old_qos_map = sdata_dereference(sdata->qos_map, sdata); rcu_assign_pointer(sdata->qos_map, new_qos_map); if (old_qos_map) kfree_rcu(old_qos_map, rcu_head); return 0; } static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; u64 changed = 0; link = sdata_dereference(sdata->link[link_id], sdata); ret = ieee80211_link_change_chanreq(link, &chanreq, &changed); if (ret == 0) ieee80211_link_info_change_notify(sdata, link, changed); return ret; } static int ieee80211_add_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer, u8 up, u16 admitted_time) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int ac = ieee802_1d_to_ac[up]; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(sdata->wmm_acm & BIT(up))) return -EINVAL; if (ifmgd->tx_tspec[ac].admitted_time) return -EBUSY; if (admitted_time) { ifmgd->tx_tspec[ac].admitted_time = 32 * admitted_time; ifmgd->tx_tspec[ac].tsid = tsid; ifmgd->tx_tspec[ac].up = up; } return 0; } static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = wiphy_priv(wiphy); int ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; /* skip unused entries */ if (!tx_tspec->admitted_time) continue; if (tx_tspec->tsid != tsid) continue; /* due to this new packets will be reassigned to non-ACM ACs */ tx_tspec->up = -1; /* Make sure that all packets have been sent to avoid to * restore the QoS params on packets that are still on the * queues. */ synchronize_net(); ieee80211_flush_queues(local, sdata, false); /* restore the normal QoS parameters * (unconditionally to avoid races) */ tx_tspec->action = TX_TSPEC_ACTION_STOP_DOWNGRADE; tx_tspec->downgraded = false; ieee80211_sta_handle_tspec_ac_params(sdata); /* finally clear all the data */ memset(tx_tspec, 0, sizeof(*tx_tspec)); return 0; } return -ENOENT; } void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, u8 inst_id, enum nl80211_nan_func_term_reason reason, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; u64 cookie; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } cookie = func->cookie; idr_remove(&sdata->u.nan.function_inst_ids, inst_id); spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_free_nan_func(func); cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, reason, cookie, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_terminated); void ieee80211_nan_func_match(struct ieee80211_vif *vif, struct cfg80211_nan_match_params *match, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } match->cookie = func->cookie; spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_match); static int ieee80211_set_multicast_to_unicast(struct wiphy *wiphy, struct net_device *dev, const bool enabled) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->u.ap.multicast_to_unicast = enabled; return 0; } void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats, struct txq_info *txqi) { if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_BYTES); txqstats->backlog_bytes = txqi->tin.backlog_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS); txqstats->backlog_packets = txqi->tin.backlog_packets; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_FLOWS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_FLOWS); txqstats->flows = txqi->tin.flows; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_DROPS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_DROPS); txqstats->drops = txqi->cstats.drop_count; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_ECN_MARKS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_ECN_MARKS); txqstats->ecn_marks = txqi->cstats.ecn_mark; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_OVERLIMIT))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_OVERLIMIT); txqstats->overlimit = txqi->tin.overlimit; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_COLLISIONS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_COLLISIONS); txqstats->collisions = txqi->tin.collisions; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_BYTES); txqstats->tx_bytes = txqi->tin.tx_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_PACKETS); txqstats->tx_packets = txqi->tin.tx_packets; } } static int ieee80211_get_txq_stats(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_txq_stats *txqstats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; int ret = 0; spin_lock_bh(&local->fq.lock); rcu_read_lock(); if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (!sdata->vif.txq) { ret = 1; goto out; } ieee80211_fill_txq_stats(txqstats, to_txq_info(sdata->vif.txq)); } else { /* phy stats */ txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS) | BIT(NL80211_TXQ_STATS_BACKLOG_BYTES) | BIT(NL80211_TXQ_STATS_OVERLIMIT) | BIT(NL80211_TXQ_STATS_OVERMEMORY) | BIT(NL80211_TXQ_STATS_COLLISIONS) | BIT(NL80211_TXQ_STATS_MAX_FLOWS); txqstats->backlog_packets = local->fq.backlog; txqstats->backlog_bytes = local->fq.memory_usage; txqstats->overlimit = local->fq.overlimit; txqstats->overmemory = local->fq.overmemory; txqstats->collisions = local->fq.collisions; txqstats->max_flows = local->fq.flows_cnt; } out: rcu_read_unlock(); spin_unlock_bh(&local->fq.lock); return ret; } static int ieee80211_get_ftm_responder_stats(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ftm_responder_stats *ftm_stats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); return drv_get_ftm_responder_stats(local, sdata, ftm_stats); } static int ieee80211_start_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_start_pmsr(local, sdata, request); } static void ieee80211_abort_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_abort_pmsr(local, sdata, request); } static int ieee80211_set_tid_config(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_tid_config *tid_conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->set_tid_config) return -EOPNOTSUPP; if (!tid_conf->peer) return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf); sta = sta_info_get_bss(sdata, tid_conf->peer); if (!sta) return -ENOENT; return drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); } static int ieee80211_reset_tid_config(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 tids) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->reset_tid_config) return -EOPNOTSUPP; if (!peer) return drv_reset_tid_config(sdata->local, sdata, NULL, tids); sta = sta_info_get_bss(sdata, peer); if (!sta) return -ENOENT; return drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); } static int ieee80211_set_sar_specs(struct wiphy *wiphy, struct cfg80211_sar_specs *sar) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_sar_specs) return -EOPNOTSUPP; return local->ops->set_sar_specs(&local->hw, sar); } static int ieee80211_set_after_color_change_beacon(struct ieee80211_link_data *link, u64 *changed) { struct ieee80211_sub_if_data *sdata = link->sdata; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: { int ret; if (!link->u.ap.next_beacon) return -EINVAL; ret = ieee80211_assign_beacon(sdata, link, link->u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(link); if (ret < 0) return ret; break; } default: WARN_ON_ONCE(1); return -EINVAL; } return 0; } static int ieee80211_set_color_change_beacon(struct ieee80211_link_data *link, struct cfg80211_color_change_settings *params, u64 *changed) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_color_change_settings color_change = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: link->u.ap.next_beacon = cfg80211_beacon_dup(¶ms->beacon_next); if (!link->u.ap.next_beacon) return -ENOMEM; if (params->count <= 1) break; color_change.counter_offset_beacon = params->counter_offset_beacon; color_change.counter_offset_presp = params->counter_offset_presp; color_change.count = params->count; err = ieee80211_assign_beacon(sdata, link, ¶ms->beacon_color_change, NULL, &color_change, changed); if (err < 0) { ieee80211_free_next_beacon(link); return err; } break; default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_bss_config_notify(struct ieee80211_link_data *link, u8 color, int enable, u64 changed) { struct ieee80211_sub_if_data *sdata = link->sdata; lockdep_assert_wiphy(sdata->local->hw.wiphy); link->conf->he_bss_color.color = color; link->conf->he_bss_color.enabled = enable; changed |= BSS_CHANGED_HE_BSS_COLOR; ieee80211_link_info_change_notify(sdata, link, changed); if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { struct ieee80211_sub_if_data *child; list_for_each_entry(child, &sdata->local->interfaces, list) { if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { child->vif.bss_conf.he_bss_color.color = color; child->vif.bss_conf.he_bss_color.enabled = enable; ieee80211_link_info_change_notify(child, &child->deflink, BSS_CHANGED_HE_BSS_COLOR); } } } } static int ieee80211_color_change_finalize(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); link->conf->color_change_active = false; err = ieee80211_set_after_color_change_beacon(link, &changed); if (err) { cfg80211_color_change_aborted_notify(sdata->dev, link->link_id); return err; } ieee80211_color_change_bss_config_notify(link, link->conf->color_change_color, 1, changed); cfg80211_color_change_notify(sdata->dev, link->link_id); return 0; } void ieee80211_color_change_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, color_change_finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_bss_conf *link_conf = link->conf; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!link_conf->color_change_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_color_change_finalize(link); } void ieee80211_color_collision_detection_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct ieee80211_link_data *link = container_of(delayed_work, struct ieee80211_link_data, color_collision_detect_work); struct ieee80211_sub_if_data *sdata = link->sdata; cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap, link->link_id); } void ieee80211_color_change_finish(struct ieee80211_vif *vif, u8 link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) { rcu_read_unlock(); return; } wiphy_work_queue(sdata->local->hw.wiphy, &link->color_change_finalize_work); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, u64 color_bitmap, u8 link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) { rcu_read_unlock(); return; } if (link->conf->color_change_active || link->conf->csa_active) { rcu_read_unlock(); return; } if (delayed_work_pending(&link->color_collision_detect_work)) { rcu_read_unlock(); return; } link->color_bitmap = color_bitmap; /* queue the color collision detection event every 500 ms in order to * avoid sending too much netlink messages to userspace. */ ieee80211_queue_delayed_work(&sdata->local->hw, &link->color_collision_detect_work, msecs_to_jiffies(500)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify); static int ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_color_change_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_bss_conf *link_conf; struct ieee80211_link_data *link; u8 link_id = params->link_id; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return -EINVAL; link = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link) return -ENOLINK; link_conf = link->conf; if (link_conf->nontransmitted) return -EINVAL; /* don't allow another color change if one is already active or if csa * is active */ if (link_conf->color_change_active || link_conf->csa_active) { err = -EBUSY; goto out; } err = ieee80211_set_color_change_beacon(link, params, &changed); if (err) goto out; link_conf->color_change_active = true; link_conf->color_change_color = params->color; cfg80211_color_change_started_notify(sdata->dev, params->count, link_id); if (changed) ieee80211_color_change_bss_config_notify(link, 0, 0, changed); else /* if the beacon didn't change, we can finalize immediately */ ieee80211_color_change_finalize(link); out: return err; } static int ieee80211_set_radar_background(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_radar_background) return -EOPNOTSUPP; return local->ops->set_radar_background(&local->hw, chandef); } static int ieee80211_add_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); lockdep_assert_wiphy(sdata->local->hw.wiphy); if (wdev->use_4addr) return -EOPNOTSUPP; return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static void ieee80211_del_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); lockdep_assert_wiphy(sdata->local->hw.wiphy); ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static int ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; int ret; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!sta->sta.valid_links) return -EINVAL; if (sta->sta.valid_links & BIT(params->link_id)) return -EALREADY; ret = ieee80211_sta_allocate_link(sta, params->link_id); if (ret) return ret; ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_NEW, params); if (ret) { ieee80211_sta_free_link(sta, params->link_id); return ret; } /* ieee80211_sta_activate_link frees the link upon failure */ return ieee80211_sta_activate_link(sta, params->link_id); } static int ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; return sta_link_apply_parameters(local, sta, STA_LINK_MODE_LINK_MODIFY, params); } static int ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_del_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; /* must not create a STA without links */ if (sta->sta.valid_links == BIT(params->link_id)) return -EINVAL; ieee80211_sta_remove_link(sta, params->link_id); return 0; } static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_set_hw_timestamp *hwts) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; if (!local->ops->set_hw_timestamp) return -EOPNOTSUPP; if (!check_sdata_in_driver(sdata)) return -EIO; return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); } static int ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); return ieee80211_req_neg_ttlm(sdata, params); } const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, .change_virtual_intf = ieee80211_change_iface, .start_p2p_device = ieee80211_start_p2p_device, .stop_p2p_device = ieee80211_stop_p2p_device, .add_key = ieee80211_add_key, .del_key = ieee80211_del_key, .get_key = ieee80211_get_key, .set_default_key = ieee80211_config_default_key, .set_default_mgmt_key = ieee80211_config_default_mgmt_key, .set_default_beacon_key = ieee80211_config_default_beacon_key, .start_ap = ieee80211_start_ap, .change_beacon = ieee80211_change_beacon, .stop_ap = ieee80211_stop_ap, .add_station = ieee80211_add_station, .del_station = ieee80211_del_station, .change_station = ieee80211_change_station, .get_station = ieee80211_get_station, .dump_station = ieee80211_dump_station, .dump_survey = ieee80211_dump_survey, #ifdef CONFIG_MAC80211_MESH .add_mpath = ieee80211_add_mpath, .del_mpath = ieee80211_del_mpath, .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, .get_mpp = ieee80211_get_mpp, .dump_mpp = ieee80211_dump_mpp, .update_mesh_config = ieee80211_update_mesh_config, .get_mesh_config = ieee80211_get_mesh_config, .join_mesh = ieee80211_join_mesh, .leave_mesh = ieee80211_leave_mesh, #endif .join_ocb = ieee80211_join_ocb, .leave_ocb = ieee80211_leave_ocb, .change_bss = ieee80211_change_bss, .inform_bss = ieee80211_inform_bss, .set_txq_params = ieee80211_set_txq_params, .set_monitor_channel = ieee80211_set_monitor_channel, .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, .abort_scan = ieee80211_abort_scan, .sched_scan_start = ieee80211_sched_scan_start, .sched_scan_stop = ieee80211_sched_scan_stop, .auth = ieee80211_auth, .assoc = ieee80211_assoc, .deauth = ieee80211_deauth, .disassoc = ieee80211_disassoc, .join_ibss = ieee80211_join_ibss, .leave_ibss = ieee80211_leave_ibss, .set_mcast_rate = ieee80211_set_mcast_rate, .set_wiphy_params = ieee80211_set_wiphy_params, .set_tx_power = ieee80211_set_tx_power, .get_tx_power = ieee80211_get_tx_power, .rfkill_poll = ieee80211_rfkill_poll, CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) CFG80211_TESTMODE_DUMP(ieee80211_testmode_dump) .set_power_mgmt = ieee80211_set_power_mgmt, .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, .mgmt_tx = ieee80211_mgmt_tx, .mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config, .update_mgmt_frame_registrations = ieee80211_update_mgmt_frame_registrations, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, .set_rekey_data = ieee80211_set_rekey_data, .tdls_oper = ieee80211_tdls_oper, .tdls_mgmt = ieee80211_tdls_mgmt, .tdls_channel_switch = ieee80211_tdls_channel_switch, .tdls_cancel_channel_switch = ieee80211_tdls_cancel_channel_switch, .probe_client = ieee80211_probe_client, .set_noack_map = ieee80211_set_noack_map, #ifdef CONFIG_PM .set_wakeup = ieee80211_set_wakeup, #endif .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, .end_cac = ieee80211_end_cac, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, .nan_change_conf = ieee80211_nan_change_conf, .add_nan_func = ieee80211_add_nan_func, .del_nan_func = ieee80211_del_nan_func, .set_multicast_to_unicast = ieee80211_set_multicast_to_unicast, .tx_control_port = ieee80211_tx_control_port, .get_txq_stats = ieee80211_get_txq_stats, .get_ftm_responder_stats = ieee80211_get_ftm_responder_stats, .start_pmsr = ieee80211_start_pmsr, .abort_pmsr = ieee80211_abort_pmsr, .probe_mesh_link = ieee80211_probe_mesh_link, .set_tid_config = ieee80211_set_tid_config, .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, .color_change = ieee80211_color_change, .set_radar_background = ieee80211_set_radar_background, .add_intf_link = ieee80211_add_intf_link, .del_intf_link = ieee80211_del_intf_link, .add_link_station = ieee80211_add_link_station, .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, .set_hw_timestamp = ieee80211_set_hw_timestamp, .set_ttlm = ieee80211_set_ttlm, .get_radio_mask = ieee80211_get_radio_mask, }; |
4 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ #include "hash.h" #include "main.h" #include <linux/gfp.h> #include <linux/lockdep.h> #include <linux/slab.h> /* clears the hash */ static void batadv_hash_init(struct batadv_hashtable *hash) { u32 i; for (i = 0; i < hash->size; i++) { INIT_HLIST_HEAD(&hash->table[i]); spin_lock_init(&hash->list_locks[i]); } atomic_set(&hash->generation, 0); } /** * batadv_hash_destroy() - Free only the hashtable and the hash itself * @hash: hash object to destroy */ void batadv_hash_destroy(struct batadv_hashtable *hash) { kfree(hash->list_locks); kfree(hash->table); kfree(hash); } /** * batadv_hash_new() - Allocates and clears the hashtable * @size: number of hash buckets to allocate * * Return: newly allocated hashtable, NULL on errors */ struct batadv_hashtable *batadv_hash_new(u32 size) { struct batadv_hashtable *hash; hash = kmalloc(sizeof(*hash), GFP_ATOMIC); if (!hash) return NULL; hash->table = kmalloc_array(size, sizeof(*hash->table), GFP_ATOMIC); if (!hash->table) goto free_hash; hash->list_locks = kmalloc_array(size, sizeof(*hash->list_locks), GFP_ATOMIC); if (!hash->list_locks) goto free_table; hash->size = size; batadv_hash_init(hash); return hash; free_table: kfree(hash->table); free_hash: kfree(hash); return NULL; } /** * batadv_hash_set_lock_class() - Set specific lockdep class for hash spinlocks * @hash: hash object to modify * @key: lockdep class key address */ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, struct lock_class_key *key) { u32 i; for (i = 0; i < hash->size; i++) lockdep_set_class(&hash->list_locks[i], key); } |
2 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "truncate.h" struct io_ftrunc { struct file *file; loff_t len; }; int io_ftruncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc); if (sqe->rw_flags || sqe->addr || sqe->len || sqe->buf_index || sqe->splice_fd_in || sqe->addr3) return -EINVAL; ft->len = READ_ONCE(sqe->off); req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_ftruncate(struct io_kiocb *req, unsigned int issue_flags) { struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_ftruncate(req->file, ft->len, 1); io_req_set_res(req, ret, 0); return IOU_OK; } |
3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 | // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/cfm_bridge.h> #include <uapi/linux/cfm_bridge.h> #include "br_private_cfm.h" static struct br_cfm_mep *br_mep_find(struct net_bridge *br, u32 instance) { struct br_cfm_mep *mep; hlist_for_each_entry(mep, &br->mep_list, head) if (mep->instance == instance) return mep; return NULL; } static struct br_cfm_mep *br_mep_find_ifindex(struct net_bridge *br, u32 ifindex) { struct br_cfm_mep *mep; hlist_for_each_entry_rcu(mep, &br->mep_list, head, lockdep_rtnl_is_held()) if (mep->create.ifindex == ifindex) return mep; return NULL; } static struct br_cfm_peer_mep *br_peer_mep_find(struct br_cfm_mep *mep, u32 mepid) { struct br_cfm_peer_mep *peer_mep; hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head, lockdep_rtnl_is_held()) if (peer_mep->mepid == mepid) return peer_mep; return NULL; } static struct net_bridge_port *br_mep_get_port(struct net_bridge *br, u32 ifindex) { struct net_bridge_port *port; list_for_each_entry(port, &br->port_list, list) if (port->dev->ifindex == ifindex) return port; return NULL; } /* Calculate the CCM interval in us. */ static u32 interval_to_us(enum br_cfm_ccm_interval interval) { switch (interval) { case BR_CFM_CCM_INTERVAL_NONE: return 0; case BR_CFM_CCM_INTERVAL_3_3_MS: return 3300; case BR_CFM_CCM_INTERVAL_10_MS: return 10 * 1000; case BR_CFM_CCM_INTERVAL_100_MS: return 100 * 1000; case BR_CFM_CCM_INTERVAL_1_SEC: return 1000 * 1000; case BR_CFM_CCM_INTERVAL_10_SEC: return 10 * 1000 * 1000; case BR_CFM_CCM_INTERVAL_1_MIN: return 60 * 1000 * 1000; case BR_CFM_CCM_INTERVAL_10_MIN: return 10 * 60 * 1000 * 1000; } return 0; } /* Convert the interface interval to CCM PDU value. */ static u32 interval_to_pdu(enum br_cfm_ccm_interval interval) { switch (interval) { case BR_CFM_CCM_INTERVAL_NONE: return 0; case BR_CFM_CCM_INTERVAL_3_3_MS: return 1; case BR_CFM_CCM_INTERVAL_10_MS: return 2; case BR_CFM_CCM_INTERVAL_100_MS: return 3; case BR_CFM_CCM_INTERVAL_1_SEC: return 4; case BR_CFM_CCM_INTERVAL_10_SEC: return 5; case BR_CFM_CCM_INTERVAL_1_MIN: return 6; case BR_CFM_CCM_INTERVAL_10_MIN: return 7; } return 0; } /* Convert the CCM PDU value to interval on interface. */ static u32 pdu_to_interval(u32 value) { switch (value) { case 0: return BR_CFM_CCM_INTERVAL_NONE; case 1: return BR_CFM_CCM_INTERVAL_3_3_MS; case 2: return BR_CFM_CCM_INTERVAL_10_MS; case 3: return BR_CFM_CCM_INTERVAL_100_MS; case 4: return BR_CFM_CCM_INTERVAL_1_SEC; case 5: return BR_CFM_CCM_INTERVAL_10_SEC; case 6: return BR_CFM_CCM_INTERVAL_1_MIN; case 7: return BR_CFM_CCM_INTERVAL_10_MIN; } return BR_CFM_CCM_INTERVAL_NONE; } static void ccm_rx_timer_start(struct br_cfm_peer_mep *peer_mep) { u32 interval_us; interval_us = interval_to_us(peer_mep->mep->cc_config.exp_interval); /* Function ccm_rx_dwork must be called with 1/4 * of the configured CC 'expected_interval' * in order to detect CCM defect after 3.25 interval. */ queue_delayed_work(system_wq, &peer_mep->ccm_rx_dwork, usecs_to_jiffies(interval_us / 4)); } static void br_cfm_notify(int event, const struct net_bridge_port *port) { u32 filter = RTEXT_FILTER_CFM_STATUS; br_info_notify(event, port->br, NULL, filter); } static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep) { memset(&peer_mep->cc_status, 0, sizeof(peer_mep->cc_status)); peer_mep->ccm_rx_count_miss = 0; ccm_rx_timer_start(peer_mep); } static void cc_peer_disable(struct br_cfm_peer_mep *peer_mep) { cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); } static struct sk_buff *ccm_frame_build(struct br_cfm_mep *mep, const struct br_cfm_cc_ccm_tx_info *const tx_info) { struct br_cfm_common_hdr *common_hdr; struct net_bridge_port *b_port; struct br_cfm_maid *maid; u8 *itu_reserved, *e_tlv; struct ethhdr *eth_hdr; struct sk_buff *skb; __be32 *status_tlv; __be32 *snumber; __be16 *mepid; skb = dev_alloc_skb(CFM_CCM_MAX_FRAME_LENGTH); if (!skb) return NULL; rcu_read_lock(); b_port = rcu_dereference(mep->b_port); if (!b_port) { kfree_skb(skb); rcu_read_unlock(); return NULL; } skb->dev = b_port->dev; rcu_read_unlock(); /* The device cannot be deleted until the work_queue functions has * completed. This function is called from ccm_tx_work_expired() * that is a work_queue functions. */ skb->protocol = htons(ETH_P_CFM); skb->priority = CFM_FRAME_PRIO; /* Ethernet header */ eth_hdr = skb_put(skb, sizeof(*eth_hdr)); ether_addr_copy(eth_hdr->h_dest, tx_info->dmac.addr); ether_addr_copy(eth_hdr->h_source, mep->config.unicast_mac.addr); eth_hdr->h_proto = htons(ETH_P_CFM); /* Common CFM Header */ common_hdr = skb_put(skb, sizeof(*common_hdr)); common_hdr->mdlevel_version = mep->config.mdlevel << 5; common_hdr->opcode = BR_CFM_OPCODE_CCM; common_hdr->flags = (mep->rdi << 7) | interval_to_pdu(mep->cc_config.exp_interval); common_hdr->tlv_offset = CFM_CCM_TLV_OFFSET; /* Sequence number */ snumber = skb_put(skb, sizeof(*snumber)); if (tx_info->seq_no_update) { *snumber = cpu_to_be32(mep->ccm_tx_snumber); mep->ccm_tx_snumber += 1; } else { *snumber = 0; } mepid = skb_put(skb, sizeof(*mepid)); *mepid = cpu_to_be16((u16)mep->config.mepid); maid = skb_put(skb, sizeof(*maid)); memcpy(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data)); /* ITU reserved (CFM_CCM_ITU_RESERVED_SIZE octets) */ itu_reserved = skb_put(skb, CFM_CCM_ITU_RESERVED_SIZE); memset(itu_reserved, 0, CFM_CCM_ITU_RESERVED_SIZE); /* Generel CFM TLV format: * TLV type: one byte * TLV value length: two bytes * TLV value: 'TLV value length' bytes */ /* Port status TLV. The value length is 1. Total of 4 bytes. */ if (tx_info->port_tlv) { status_tlv = skb_put(skb, sizeof(*status_tlv)); *status_tlv = cpu_to_be32((CFM_PORT_STATUS_TLV_TYPE << 24) | (1 << 8) | /* Value length */ (tx_info->port_tlv_value & 0xFF)); } /* Interface status TLV. The value length is 1. Total of 4 bytes. */ if (tx_info->if_tlv) { status_tlv = skb_put(skb, sizeof(*status_tlv)); *status_tlv = cpu_to_be32((CFM_IF_STATUS_TLV_TYPE << 24) | (1 << 8) | /* Value length */ (tx_info->if_tlv_value & 0xFF)); } /* End TLV */ e_tlv = skb_put(skb, sizeof(*e_tlv)); *e_tlv = CFM_ENDE_TLV_TYPE; return skb; } static void ccm_frame_tx(struct sk_buff *skb) { skb_reset_network_header(skb); dev_queue_xmit(skb); } /* This function is called with the configured CC 'expected_interval' * in order to drive CCM transmission when enabled. */ static void ccm_tx_work_expired(struct work_struct *work) { struct delayed_work *del_work; struct br_cfm_mep *mep; struct sk_buff *skb; u32 interval_us; del_work = to_delayed_work(work); mep = container_of(del_work, struct br_cfm_mep, ccm_tx_dwork); if (time_before_eq(mep->ccm_tx_end, jiffies)) { /* Transmission period has ended */ mep->cc_ccm_tx_info.period = 0; return; } skb = ccm_frame_build(mep, &mep->cc_ccm_tx_info); if (skb) ccm_frame_tx(skb); interval_us = interval_to_us(mep->cc_config.exp_interval); queue_delayed_work(system_wq, &mep->ccm_tx_dwork, usecs_to_jiffies(interval_us)); } /* This function is called with 1/4 of the configured CC 'expected_interval' * in order to detect CCM defect after 3.25 interval. */ static void ccm_rx_work_expired(struct work_struct *work) { struct br_cfm_peer_mep *peer_mep; struct net_bridge_port *b_port; struct delayed_work *del_work; del_work = to_delayed_work(work); peer_mep = container_of(del_work, struct br_cfm_peer_mep, ccm_rx_dwork); /* After 13 counts (4 * 3,25) then 3.25 intervals are expired */ if (peer_mep->ccm_rx_count_miss < 13) { /* 3.25 intervals are NOT expired without CCM reception */ peer_mep->ccm_rx_count_miss++; /* Start timer again */ ccm_rx_timer_start(peer_mep); } else { /* 3.25 intervals are expired without CCM reception. * CCM defect detected */ peer_mep->cc_status.ccm_defect = true; /* Change in CCM defect status - notify */ rcu_read_lock(); b_port = rcu_dereference(peer_mep->mep->b_port); if (b_port) br_cfm_notify(RTM_NEWLINK, b_port); rcu_read_unlock(); } } static u32 ccm_tlv_extract(struct sk_buff *skb, u32 index, struct br_cfm_peer_mep *peer_mep) { __be32 *s_tlv; __be32 _s_tlv; u32 h_s_tlv; u8 *e_tlv; u8 _e_tlv; e_tlv = skb_header_pointer(skb, index, sizeof(_e_tlv), &_e_tlv); if (!e_tlv) return 0; /* TLV is present - get the status TLV */ s_tlv = skb_header_pointer(skb, index, sizeof(_s_tlv), &_s_tlv); if (!s_tlv) return 0; h_s_tlv = ntohl(*s_tlv); if ((h_s_tlv >> 24) == CFM_IF_STATUS_TLV_TYPE) { /* Interface status TLV */ peer_mep->cc_status.tlv_seen = true; peer_mep->cc_status.if_tlv_value = (h_s_tlv & 0xFF); } if ((h_s_tlv >> 24) == CFM_PORT_STATUS_TLV_TYPE) { /* Port status TLV */ peer_mep->cc_status.tlv_seen = true; peer_mep->cc_status.port_tlv_value = (h_s_tlv & 0xFF); } /* The Sender ID TLV is not handled */ /* The Organization-Specific TLV is not handled */ /* Return the length of this tlv. * This is the length of the value field plus 3 bytes for size of type * field and length field */ return ((h_s_tlv >> 8) & 0xFFFF) + 3; } /* note: already called with rcu_read_lock */ static int br_cfm_frame_rx(struct net_bridge_port *port, struct sk_buff *skb) { u32 mdlevel, interval, size, index, max; const struct br_cfm_common_hdr *hdr; struct br_cfm_peer_mep *peer_mep; const struct br_cfm_maid *maid; struct br_cfm_common_hdr _hdr; struct br_cfm_maid _maid; struct br_cfm_mep *mep; struct net_bridge *br; __be32 *snumber; __be32 _snumber; __be16 *mepid; __be16 _mepid; if (port->state == BR_STATE_DISABLED) return 0; hdr = skb_header_pointer(skb, 0, sizeof(_hdr), &_hdr); if (!hdr) return 1; br = port->br; mep = br_mep_find_ifindex(br, port->dev->ifindex); if (unlikely(!mep)) /* No MEP on this port - must be forwarded */ return 0; mdlevel = hdr->mdlevel_version >> 5; if (mdlevel > mep->config.mdlevel) /* The level is above this MEP level - must be forwarded */ return 0; if ((hdr->mdlevel_version & 0x1F) != 0) { /* Invalid version */ mep->status.version_unexp_seen = true; return 1; } if (mdlevel < mep->config.mdlevel) { /* The level is below this MEP level */ mep->status.rx_level_low_seen = true; return 1; } if (hdr->opcode == BR_CFM_OPCODE_CCM) { /* CCM PDU received. */ /* MA ID is after common header + sequence number + MEP ID */ maid = skb_header_pointer(skb, CFM_CCM_PDU_MAID_OFFSET, sizeof(_maid), &_maid); if (!maid) return 1; if (memcmp(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data))) /* MA ID not as expected */ return 1; /* MEP ID is after common header + sequence number */ mepid = skb_header_pointer(skb, CFM_CCM_PDU_MEPID_OFFSET, sizeof(_mepid), &_mepid); if (!mepid) return 1; peer_mep = br_peer_mep_find(mep, (u32)ntohs(*mepid)); if (!peer_mep) return 1; /* Interval is in common header flags */ interval = hdr->flags & 0x07; if (mep->cc_config.exp_interval != pdu_to_interval(interval)) /* Interval not as expected */ return 1; /* A valid CCM frame is received */ if (peer_mep->cc_status.ccm_defect) { peer_mep->cc_status.ccm_defect = false; /* Change in CCM defect status - notify */ br_cfm_notify(RTM_NEWLINK, port); /* Start CCM RX timer */ ccm_rx_timer_start(peer_mep); } peer_mep->cc_status.seen = true; peer_mep->ccm_rx_count_miss = 0; /* RDI is in common header flags */ peer_mep->cc_status.rdi = (hdr->flags & 0x80) ? true : false; /* Sequence number is after common header */ snumber = skb_header_pointer(skb, CFM_CCM_PDU_SEQNR_OFFSET, sizeof(_snumber), &_snumber); if (!snumber) return 1; if (ntohl(*snumber) != (mep->ccm_rx_snumber + 1)) /* Unexpected sequence number */ peer_mep->cc_status.seq_unexp_seen = true; mep->ccm_rx_snumber = ntohl(*snumber); /* TLV end is after common header + sequence number + MEP ID + * MA ID + ITU reserved */ index = CFM_CCM_PDU_TLV_OFFSET; max = 0; do { /* Handle all TLVs */ size = ccm_tlv_extract(skb, index, peer_mep); index += size; max += 1; } while (size != 0 && max < 4); /* Max four TLVs possible */ return 1; } mep->status.opcode_unexp_seen = true; return 1; } static struct br_frame_type cfm_frame_type __read_mostly = { .type = cpu_to_be16(ETH_P_CFM), .frame_handler = br_cfm_frame_rx, }; int br_cfm_mep_create(struct net_bridge *br, const u32 instance, struct br_cfm_mep_create *const create, struct netlink_ext_ack *extack) { struct net_bridge_port *p; struct br_cfm_mep *mep; ASSERT_RTNL(); if (create->domain == BR_CFM_VLAN) { NL_SET_ERR_MSG_MOD(extack, "VLAN domain not supported"); return -EINVAL; } if (create->domain != BR_CFM_PORT) { NL_SET_ERR_MSG_MOD(extack, "Invalid domain value"); return -EINVAL; } if (create->direction == BR_CFM_MEP_DIRECTION_UP) { NL_SET_ERR_MSG_MOD(extack, "Up-MEP not supported"); return -EINVAL; } if (create->direction != BR_CFM_MEP_DIRECTION_DOWN) { NL_SET_ERR_MSG_MOD(extack, "Invalid direction value"); return -EINVAL; } p = br_mep_get_port(br, create->ifindex); if (!p) { NL_SET_ERR_MSG_MOD(extack, "Port is not related to bridge"); return -EINVAL; } mep = br_mep_find(br, instance); if (mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance already exists"); return -EEXIST; } /* In PORT domain only one instance can be created per port */ if (create->domain == BR_CFM_PORT) { mep = br_mep_find_ifindex(br, create->ifindex); if (mep) { NL_SET_ERR_MSG_MOD(extack, "Only one Port MEP on a port allowed"); return -EINVAL; } } mep = kzalloc(sizeof(*mep), GFP_KERNEL); if (!mep) return -ENOMEM; mep->create = *create; mep->instance = instance; rcu_assign_pointer(mep->b_port, p); INIT_HLIST_HEAD(&mep->peer_mep_list); INIT_DELAYED_WORK(&mep->ccm_tx_dwork, ccm_tx_work_expired); if (hlist_empty(&br->mep_list)) br_add_frame(br, &cfm_frame_type); hlist_add_tail_rcu(&mep->head, &br->mep_list); return 0; } static void mep_delete_implementation(struct net_bridge *br, struct br_cfm_mep *mep) { struct br_cfm_peer_mep *peer_mep; struct hlist_node *n_store; ASSERT_RTNL(); /* Empty and free peer MEP list */ hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) { cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); } cancel_delayed_work_sync(&mep->ccm_tx_dwork); RCU_INIT_POINTER(mep->b_port, NULL); hlist_del_rcu(&mep->head); kfree_rcu(mep, rcu); if (hlist_empty(&br->mep_list)) br_del_frame(br, &cfm_frame_type); } int br_cfm_mep_delete(struct net_bridge *br, const u32 instance, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep_delete_implementation(br, mep); return 0; } int br_cfm_mep_config_set(struct net_bridge *br, const u32 instance, const struct br_cfm_mep_config *const config, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep->config = *config; return 0; } int br_cfm_cc_config_set(struct net_bridge *br, const u32 instance, const struct br_cfm_cc_config *const config, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } /* Check for no change in configuration */ if (memcmp(config, &mep->cc_config, sizeof(*config)) == 0) return 0; if (config->enable && !mep->cc_config.enable) /* CC is enabled */ hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) cc_peer_enable(peer_mep); if (!config->enable && mep->cc_config.enable) /* CC is disabled */ hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) cc_peer_disable(peer_mep); mep->cc_config = *config; mep->ccm_rx_snumber = 0; mep->ccm_tx_snumber = 1; return 0; } int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance, u32 mepid, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } peer_mep = br_peer_mep_find(mep, mepid); if (peer_mep) { NL_SET_ERR_MSG_MOD(extack, "Peer MEP-ID already exists"); return -EEXIST; } peer_mep = kzalloc(sizeof(*peer_mep), GFP_KERNEL); if (!peer_mep) return -ENOMEM; peer_mep->mepid = mepid; peer_mep->mep = mep; INIT_DELAYED_WORK(&peer_mep->ccm_rx_dwork, ccm_rx_work_expired); if (mep->cc_config.enable) cc_peer_enable(peer_mep); hlist_add_tail_rcu(&peer_mep->head, &mep->peer_mep_list); return 0; } int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance, u32 mepid, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } peer_mep = br_peer_mep_find(mep, mepid); if (!peer_mep) { NL_SET_ERR_MSG_MOD(extack, "Peer MEP-ID does not exists"); return -ENOENT; } cc_peer_disable(peer_mep); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); return 0; } int br_cfm_cc_rdi_set(struct net_bridge *br, const u32 instance, const bool rdi, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep->rdi = rdi; return 0; } int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance, const struct br_cfm_cc_ccm_tx_info *const tx_info, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } if (memcmp(tx_info, &mep->cc_ccm_tx_info, sizeof(*tx_info)) == 0) { /* No change in tx_info. */ if (mep->cc_ccm_tx_info.period == 0) /* Transmission is not enabled - just return */ return 0; /* Transmission is ongoing, the end time is recalculated */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); return 0; } if (tx_info->period == 0 && mep->cc_ccm_tx_info.period == 0) /* Some change in info and transmission is not ongoing */ goto save; if (tx_info->period != 0 && mep->cc_ccm_tx_info.period != 0) { /* Some change in info and transmission is ongoing * The end time is recalculated */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); goto save; } if (tx_info->period == 0 && mep->cc_ccm_tx_info.period != 0) { cancel_delayed_work_sync(&mep->ccm_tx_dwork); goto save; } /* Start delayed work to transmit CCM frames. It is done with zero delay * to send first frame immediately */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); queue_delayed_work(system_wq, &mep->ccm_tx_dwork, 0); save: mep->cc_ccm_tx_info = *tx_info; return 0; } int br_cfm_mep_count(struct net_bridge *br, u32 *count) { struct br_cfm_mep *mep; *count = 0; rcu_read_lock(); hlist_for_each_entry_rcu(mep, &br->mep_list, head) *count += 1; rcu_read_unlock(); return 0; } int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; *count = 0; rcu_read_lock(); hlist_for_each_entry_rcu(mep, &br->mep_list, head) hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) *count += 1; rcu_read_unlock(); return 0; } bool br_cfm_created(struct net_bridge *br) { return !hlist_empty(&br->mep_list); } /* Deletes the CFM instances on a specific bridge port */ void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port) { struct hlist_node *n_store; struct br_cfm_mep *mep; ASSERT_RTNL(); hlist_for_each_entry_safe(mep, n_store, &br->mep_list, head) if (mep->create.ifindex == port->dev->ifindex) mep_delete_implementation(br, mep); } |
4 2 33 20 49 48 20 22 28 39 1 108 1517 1517 146 296 230 3 235 353 426 196 7 165 854 833 89 15 27 22 1580 439 26 13 11 17 1287 188 213 20 66 2 756 1 1961 1735 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 | /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com */ #ifndef _LINUX_BPF_H #define _LINUX_BPF_H 1 #include <uapi/linux/bpf.h> #include <uapi/linux/filter.h> #include <linux/workqueue.h> #include <linux/file.h> #include <linux/percpu.h> #include <linux/err.h> #include <linux/rbtree_latch.h> #include <linux/numa.h> #include <linux/mm_types.h> #include <linux/wait.h> #include <linux/refcount.h> #include <linux/mutex.h> #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/capability.h> #include <linux/sched/mm.h> #include <linux/slab.h> #include <linux/percpu-refcount.h> #include <linux/stddef.h> #include <linux/bpfptr.h> #include <linux/btf.h> #include <linux/rcupdate_trace.h> #include <linux/static_call.h> #include <linux/memcontrol.h> #include <linux/cfi.h> struct bpf_verifier_env; struct bpf_verifier_log; struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; struct bpf_arena; struct sock; struct seq_file; struct btf; struct btf_type; struct exception_table_entry; struct seq_operations; struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; struct mem_cgroup; struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; struct bpf_token; struct user_namespace; struct super_block; struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); typedef unsigned int (*bpf_func_t)(const void *, const struct bpf_insn *); struct bpf_iter_seq_info { const struct seq_operations *seq_ops; bpf_iter_init_seq_priv_t init_seq_private; bpf_iter_fini_seq_priv_t fini_seq_private; u32 seq_priv_size; }; /* map is generic key/value storage optionally accessible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ int (*map_alloc_check)(union bpf_attr *attr); struct bpf_map *(*map_alloc)(union bpf_attr *attr); void (*map_release)(struct bpf_map *map, struct file *map_file); void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); void (*map_release_uref)(struct bpf_map *map); void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_lookup_and_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); int (*map_update_batch)(struct bpf_map *map, struct file *map_file, const union bpf_attr *attr, union bpf_attr __user *uattr); int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); long (*map_delete_elem)(struct bpf_map *map, void *key); long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); long (*map_pop_elem)(struct bpf_map *map, void *value); long (*map_peek_elem)(struct bpf_map *map, void *value); void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); /* If need_defer is true, the implementation should guarantee that * the to-be-put element is still alive before the bpf program, which * may manipulate it, exists. */ void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer); int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); /* Prog poke tracking helpers. */ int (*map_poke_track)(struct bpf_map *map, struct bpf_prog_aux *aux); void (*map_poke_untrack)(struct bpf_map *map, struct bpf_prog_aux *aux); void (*map_poke_run)(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new); /* Direct value access helpers. */ int (*map_direct_value_addr)(const struct bpf_map *map, u64 *imm, u32 off); int (*map_direct_value_meta)(const struct bpf_map *map, u64 imm, u32 *off); int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, void *owner, u32 size); void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap, void *owner, u32 size); struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* Misc helpers.*/ long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure * an inner map can be inserted to an outer map. * * Some properties of the inner map has been used during the * verification time. When inserting an inner map at the runtime, * map_meta_equal has to ensure the inserting map has the same * properties that the verifier has used earlier. */ bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1); int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); long (*map_for_each_callback)(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags); u64 (*map_mem_usage)(const struct bpf_map *map); /* BTF id of struct allocated by map_alloc */ int *map_btf_id; /* bpf_iter info used to open a seq_file */ const struct bpf_iter_seq_info *iter_seq_info; }; enum { /* Support at most 11 fields in a BTF type */ BTF_FIELDS_MAX = 11, }; enum btf_field_type { BPF_SPIN_LOCK = (1 << 0), BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), BPF_KPTR_PERCPU = (1 << 4), BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, BPF_LIST_HEAD = (1 << 5), BPF_LIST_NODE = (1 << 6), BPF_RB_ROOT = (1 << 7), BPF_RB_NODE = (1 << 8), BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), BPF_WORKQUEUE = (1 << 10), }; typedef void (*btf_dtor_kfunc_t)(void *); struct btf_field_kptr { struct btf *btf; struct module *module; /* dtor used if btf_is_kernel(btf), otherwise the type is * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used */ btf_dtor_kfunc_t dtor; u32 btf_id; }; struct btf_field_graph_root { struct btf *btf; u32 value_btf_id; u32 node_offset; struct btf_record *value_rec; }; struct btf_field { u32 offset; u32 size; enum btf_field_type type; union { struct btf_field_kptr kptr; struct btf_field_graph_root graph_root; }; }; struct btf_record { u32 cnt; u32 field_mask; int spin_lock_off; int timer_off; int wq_off; int refcount_off; struct btf_field fields[]; }; /* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */ struct bpf_rb_node_kern { struct rb_node rb_node; void *owner; } __attribute__((aligned(8))); /* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */ struct bpf_list_node_kern { struct list_head list_head; void *owner; } __attribute__((aligned(8))); struct bpf_map { const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; #endif enum bpf_map_type map_type; u32 key_size; u32 value_size; u32 max_entries; u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; u32 id; struct btf_record *record; int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; u32 btf_vmlinux_value_type_id; struct btf *btf; #ifdef CONFIG_MEMCG struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; struct mutex freeze_mutex; atomic64_t refcnt; atomic64_t usercnt; /* rcu is used before freeing and work is only used during freeing */ union { struct work_struct work; struct rcu_head rcu; }; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program * that is going to use this map or by the first program which FD is * stored in the map to make sure that all callers and callees have the * same prog type, JITed flag and xdp_has_frags flag. */ struct { const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; bool xdp_has_frags; } owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; bool free_after_rcu_gp; atomic64_t sleepable_refcnt; s64 __percpu *elem_count; }; static inline const char *btf_field_type_name(enum btf_field_type type) { switch (type) { case BPF_SPIN_LOCK: return "bpf_spin_lock"; case BPF_TIMER: return "bpf_timer"; case BPF_WORKQUEUE: return "bpf_wq"; case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; case BPF_KPTR_PERCPU: return "percpu_kptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: return "bpf_list_node"; case BPF_RB_ROOT: return "bpf_rb_root"; case BPF_RB_NODE: return "bpf_rb_node"; case BPF_REFCOUNT: return "bpf_refcount"; default: WARN_ON_ONCE(1); return "unknown"; } } static inline u32 btf_field_type_size(enum btf_field_type type) { switch (type) { case BPF_SPIN_LOCK: return sizeof(struct bpf_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); case BPF_WORKQUEUE: return sizeof(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); case BPF_LIST_NODE: return sizeof(struct bpf_list_node); case BPF_RB_ROOT: return sizeof(struct bpf_rb_root); case BPF_RB_NODE: return sizeof(struct bpf_rb_node); case BPF_REFCOUNT: return sizeof(struct bpf_refcount); default: WARN_ON_ONCE(1); return 0; } } static inline u32 btf_field_type_align(enum btf_field_type type) { switch (type) { case BPF_SPIN_LOCK: return __alignof__(struct bpf_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); case BPF_WORKQUEUE: return __alignof__(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); case BPF_LIST_NODE: return __alignof__(struct bpf_list_node); case BPF_RB_ROOT: return __alignof__(struct bpf_rb_root); case BPF_RB_NODE: return __alignof__(struct bpf_rb_node); case BPF_REFCOUNT: return __alignof__(struct bpf_refcount); default: WARN_ON_ONCE(1); return 0; } } static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) { memset(addr, 0, field->size); switch (field->type) { case BPF_REFCOUNT: refcount_set((refcount_t *)addr, 1); break; case BPF_RB_NODE: RB_CLEAR_NODE((struct rb_node *)addr); break; case BPF_LIST_HEAD: case BPF_LIST_NODE: INIT_LIST_HEAD((struct list_head *)addr); break; case BPF_RB_ROOT: /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ case BPF_SPIN_LOCK: case BPF_TIMER: case BPF_WORKQUEUE: case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: break; default: WARN_ON_ONCE(1); return; } } static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type) { if (IS_ERR_OR_NULL(rec)) return false; return rec->field_mask & type; } static inline void bpf_obj_init(const struct btf_record *rec, void *obj) { int i; if (IS_ERR_OR_NULL(rec)) return; for (i = 0; i < rec->cnt; i++) bpf_obj_init_field(&rec->fields[i], obj + rec->fields[i].offset); } /* 'dst' must be a temporary buffer and should not point to memory that is being * used in parallel by a bpf program or bpf syscall, otherwise the access from * the bpf program or bpf syscall may be corrupted by the reinitialization, * leading to weird problems. Even 'dst' is newly-allocated from bpf memory * allocator, it is still possible for 'dst' to be used in parallel by a bpf * program or bpf syscall. */ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { bpf_obj_init(map->record, dst); } /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. * Best-effort only. No barriers here, since it _will_ race with concurrent * updates from BPF programs. Called from bpf syscall and mostly used with * size 8 or 16 bytes, so ask compiler to inline it. */ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) { const long *lsrc = src; long *ldst = dst; size /= sizeof(long); while (size--) data_race(*ldst++ = *lsrc++); } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ static inline void bpf_obj_memcpy(struct btf_record *rec, void *dst, void *src, u32 size, bool long_memcpy) { u32 curr_off = 0; int i; if (IS_ERR_OR_NULL(rec)) { if (long_memcpy) bpf_long_memcpy(dst, src, round_up(size, 8)); else memcpy(dst, src, size); return; } for (i = 0; i < rec->cnt; i++) { u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memcpy(dst + curr_off, src + curr_off, sz); curr_off += rec->fields[i].size + sz; } memcpy(dst + curr_off, src + curr_off, size - curr_off); } static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { bpf_obj_memcpy(map->record, dst, src, map->value_size, false); } static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) { bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; int i; if (IS_ERR_OR_NULL(rec)) { memset(dst, 0, size); return; } for (i = 0; i < rec->cnt; i++) { u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memset(dst + curr_off, 0, sz); curr_off += rec->fields[i].size + sz; } memset(dst + curr_off, 0, size - curr_off); } static inline void zero_map_value(struct bpf_map *map, void *dst) { bpf_obj_memzero(map->record, dst, map->value_size); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); void bpf_wq_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock); u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; struct bpf_offloaded_map; struct bpf_map_dev_ops { int (*map_get_next_key)(struct bpf_offloaded_map *map, void *key, void *next_key); int (*map_lookup_elem)(struct bpf_offloaded_map *map, void *key, void *value); int (*map_update_elem)(struct bpf_offloaded_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key); }; struct bpf_offloaded_map { struct bpf_map map; struct net_device *netdev; const struct bpf_map_dev_ops *dev_ops; void *dev_priv; struct list_head offloads; }; static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) { return container_of(map, struct bpf_offloaded_map, map); } static inline bool bpf_map_offload_neutral(const struct bpf_map *map) { return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; } static inline bool bpf_map_support_seq_show(const struct bpf_map *map) { return (map->btf_value_type_id || map->btf_vmlinux_value_type_id) && map->ops->map_seq_show_elem; } int map_check_no_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); bool bpf_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1); extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of * arg_type, ret_type and reg_type. For example, a pointer value may be null, * or a memory is read-only. We classify types into two categories: base types * and extended types. Extended types are base types combined with a type flag. * * Currently there are no more than 32 base types in arg_type, ret_type and * reg_types. */ #define BPF_BASE_TYPE_BITS 8 enum bpf_type_flag { /* PTR may be NULL. */ PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), /* MEM is read-only. When applied on bpf_arg, it indicates the arg is * compatible with both mutable and immutable memory. */ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), /* MEM points to BPF ring buffer reservation. */ MEM_RINGBUF = BIT(2 + BPF_BASE_TYPE_BITS), /* MEM is in user address space. */ MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), /* MEM is a percpu memory. MEM_PERCPU tags PTR_TO_BTF_ID. When tagged * with MEM_PERCPU, PTR_TO_BTF_ID _cannot_ be directly accessed. In * order to drop this tag, it must be passed into bpf_per_cpu_ptr() * or bpf_this_cpu_ptr(), which will return the pointer corresponding * to the specified cpu. */ MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS), /* Indicates that the argument will be released. */ OBJ_RELEASE = BIT(5 + BPF_BASE_TYPE_BITS), /* PTR is not trusted. This is only used with PTR_TO_BTF_ID, to mark * unreferenced and referenced kptr loaded from map value using a load * instruction, so that they can only be dereferenced but not escape the * BPF program into the kernel (i.e. cannot be passed as arguments to * kfunc or bpf helpers). */ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), /* DYNPTR points to memory local to the bpf program. */ DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), /* DYNPTR points to a kernel-produced ringbuf record. */ DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), /* Size is known at compile time. */ MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS), /* MEM is of an allocated object of type in program BTF. This is used to * tag PTR_TO_BTF_ID allocated using bpf_obj_new. */ MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS), /* PTR was passed from the kernel in a trusted context, and may be * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions. * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above. * PTR_UNTRUSTED refers to a kptr that was read directly from a map * without invoking bpf_kptr_xchg(). What we really need to know is * whether a pointer is safe to pass to a kfunc or BPF helper function. * While PTR_UNTRUSTED pointers are unsafe to pass to kfuncs and BPF * helpers, they do not cover all possible instances of unsafe * pointers. For example, a pointer that was obtained from walking a * struct will _not_ get the PTR_UNTRUSTED type modifier, despite the * fact that it may be NULL, invalid, etc. This is due to backwards * compatibility requirements, as this was the behavior that was first * introduced when kptrs were added. The behavior is now considered * deprecated, and PTR_UNTRUSTED will eventually be removed. * * PTR_TRUSTED, on the other hand, is a pointer that the kernel * guarantees to be valid and safe to pass to kfuncs and BPF helpers. * For example, pointers passed to tracepoint arguments are considered * PTR_TRUSTED, as are pointers that are passed to struct_ops * callbacks. As alluded to above, pointers that are obtained from * walking PTR_TRUSTED pointers are _not_ trusted. For example, if a * struct task_struct *task is PTR_TRUSTED, then accessing * task->last_wakee will lose the PTR_TRUSTED modifier when it's stored * in a BPF register. Similarly, pointers passed to certain programs * types such as kretprobes are not guaranteed to be valid, as they may * for example contain an object that was recently freed. */ PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS), /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */ MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning. * Currently only valid for linked-list and rbtree nodes. If the nodes * have a bpf_refcount_field, they must be tagged MEM_RCU as well. */ NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), /* DYNPTR points to sk_buff */ DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS), /* DYNPTR points to xdp_buff */ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), /* Memory must be aligned on some architectures, used in combination with * MEM_FIXED_SIZE. */ MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ | DYNPTR_TYPE_XDP) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) /* Max number of all types. */ #define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1)) /* function argument constraints */ enum bpf_arg_type { ARG_DONTCARE = 0, /* unused argument in helper function */ /* the following constraints used to prototype * bpf_map_lookup/update/delete_elem() functions */ ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ /* Used to prototype bpf_memcmp() and other functions that access data * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ ARG_PTR_TO_ARENA, ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */ ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE, ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, /* pointer to memory does not need to be initialized, helper function must fill * all bytes or clear them in error case. */ ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, /* Pointer to valid memory of size known at compile time. */ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. */ __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* type of values returned from helper functions */ enum bpf_return_type { RET_INTEGER, /* function returns integer */ RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ RET_PTR_TO_MEM, /* returns a pointer to memory */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ __BPF_RET_TYPE_MAX, /* Extended ret_types. */ RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE, RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, RET_PTR_TO_RINGBUF_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM, RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, RET_PTR_TO_BTF_ID_TRUSTED = PTR_TRUSTED | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. */ __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT, }; static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL * instructions after verifying */ struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; bool pkt_access; bool might_sleep; /* set to true if helper follows contract for llvm * attribute bpf_fastcall: * - void functions do not scratch r0 * - functions taking N arguments scratch only registers r1-rN */ bool allow_fastcall; enum bpf_return_type ret_type; union { struct { enum bpf_arg_type arg1_type; enum bpf_arg_type arg2_type; enum bpf_arg_type arg3_type; enum bpf_arg_type arg4_type; enum bpf_arg_type arg5_type; }; enum bpf_arg_type arg_type[5]; }; union { struct { u32 *arg1_btf_id; u32 *arg2_btf_id; u32 *arg3_btf_id; u32 *arg4_btf_id; u32 *arg5_btf_id; }; u32 *arg_btf_id[5]; struct { size_t arg1_size; size_t arg2_size; size_t arg3_size; size_t arg4_size; size_t arg5_size; }; size_t arg_size[5]; }; int *ret_btf_id; /* return value btf_id */ bool (*allowed)(const struct bpf_prog *prog); }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is * the first argument to eBPF programs. * For socket filters: 'struct bpf_context *' == 'struct sk_buff *' */ struct bpf_context; enum bpf_access_type { BPF_READ = 1, BPF_WRITE = 2 }; /* types of values stored in eBPF registers */ /* Pointer types represent: * pointer * pointer + imm * pointer + (u16) var * pointer + (u16) var + imm * if (range > 0) then [ptr, ptr + range - off) is safe to access * if (id > 0) means that some 'var' was added * if (off > 0) means that 'imm' was added */ enum bpf_reg_type { NOT_INIT = 0, /* nothing was written into register */ SCALAR_VALUE, /* reg doesn't contain a valid pointer */ PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ PTR_TO_MAP_KEY, /* reg points to a map element key */ PTR_TO_STACK, /* reg == frame_pointer + offset */ PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ PTR_TO_SOCKET, /* reg points to struct bpf_sock */ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ /* PTR_TO_BTF_ID points to a kernel struct that does not need * to be null checked by the BPF program. This does not imply the * pointer is _not_ null and in practice this can easily be a null * pointer when reading pointer chains. The assumption is program * context will handle null pointer dereference typically via fault * handling. The verifier must keep this in mind and can make no * assumptions about null or non-null when doing branch analysis. * Further, when passed into helpers the helpers can not, without * additional context, assume the value is non-null. */ PTR_TO_BTF_ID, /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not * been checked for null. Used primarily to inform the verifier * an explicit null check is required for this struct. */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ __BPF_REG_TYPE_MAX, /* Extended reg_types. */ PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. */ __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* The information passed from prog-specific *_is_valid_access * back to the verifier. */ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; bool is_ldsx; union { int ctx_field_size; struct { struct btf *btf; u32 btf_id; }; }; struct bpf_verifier_log *log; /* for verbose logs */ bool is_retval; /* is accessing function return value ? */ }; static inline void bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) { aux->ctx_field_size = size; } static bool bpf_is_ldimm64(const struct bpf_insn *insn) { return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); }; struct bpf_reg_state; struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto * (*get_func_proto)(enum bpf_func_id func_id, const struct bpf_prog *prog); /* return true if 'size' wide access at offset 'off' within bpf_context * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog, s16 ctx_stack_off); int (*gen_ld_abs)(const struct bpf_insn *orig, struct bpf_insn *insn_buf); u32 (*convert_ctx_access)(enum bpf_access_type type, const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size); }; struct bpf_prog_offload_ops { /* verifier basic callbacks */ int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); /* verifier optimization callbacks (called after .finalize) */ int (*replace_insn)(struct bpf_verifier_env *env, u32 off, struct bpf_insn *insn); int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt); /* program management callbacks */ int (*prepare)(struct bpf_prog *prog); int (*translate)(struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); }; struct bpf_prog_offload { struct bpf_prog *prog; struct net_device *netdev; struct bpf_offload_dev *offdev; void *dev_priv; struct list_head offloads; bool dev_state; bool opt_failed; void *jited_image; u32 jited_len; }; enum bpf_cgroup_storage_type { BPF_CGROUP_STORAGE_SHARED, BPF_CGROUP_STORAGE_PERCPU, __BPF_CGROUP_STORAGE_MAX }; #define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX /* The longest tracepoint has 12 args. * See include/trace/bpf_probe.h */ #define MAX_BPF_FUNC_ARGS 12 /* The maximum number of arguments passed through registers * a single function may have. */ #define MAX_BPF_FUNC_REG_ARGS 5 /* The argument is a structure. */ #define BTF_FMODEL_STRUCT_ARG BIT(0) /* The argument is signed. */ #define BTF_FMODEL_SIGNED_ARG BIT(1) struct btf_func_model { u8 ret_size; u8 ret_flags; u8 nr_args; u8 arg_size[MAX_BPF_FUNC_ARGS]; u8 arg_flags[MAX_BPF_FUNC_ARGS]; }; /* Restore arguments before returning from trampoline to let original function * continue executing. This flag is used for fentry progs when there are no * fexit progs. */ #define BPF_TRAMP_F_RESTORE_REGS BIT(0) /* Call original function after fentry progs, but before fexit progs. * Makes sense for fentry/fexit, normal calls and indirect calls. */ #define BPF_TRAMP_F_CALL_ORIG BIT(1) /* Skip current frame and return to parent. Makes sense for fentry/fexit * programs only. Should not be used with normal calls and indirect calls. */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) /* Store IP address of the caller on the trampoline stack, * so it's available for trampoline's programs. */ #define BPF_TRAMP_F_IP_ARG BIT(3) /* Return the return value of fentry prog. Only used by bpf_struct_ops. */ #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Get original function from stack instead of from provided direct address. * Makes sense for trampolines with fexit or fmod_ret programs. */ #define BPF_TRAMP_F_ORIG_STACK BIT(5) /* This trampoline is on a function with another ftrace_ops with IPMODIFY, * e.g., a live patch. This flag is set and cleared by ftrace call backs, */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) /* Indicate that current trampoline is in a tail call context. Then, it has to * cache and restore tail_call_cnt to avoid infinite tail call loop. */ #define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) /* * Indicate the trampoline should be suitable to receive indirect calls; * without this indirectly calling the generated code can result in #UD/#CP, * depending on the CFI options. * * Used by bpf_struct_ops. * * Incompatible with FENTRY usage, overloads @func_addr argument. */ #define BPF_TRAMP_F_INDIRECT BIT(8) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ enum { #if defined(__s390x__) BPF_MAX_TRAMP_LINKS = 27, #else BPF_MAX_TRAMP_LINKS = 38, #endif }; struct bpf_tramp_links { struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; int nr_links; }; struct bpf_tramp_run_ctx; /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS * fentry = a set of programs to run before returning from trampoline * * 2. replace nop at the function entry (kprobe + kretprobe equivalent) * flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME * orig_call = fentry_ip + MCOUNT_INSN_SIZE * fentry = a set of program to run before calling original function * fexit = a set of program to run after original function * * 3. replace direct call instruction anywhere in the function body * or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid) * With flags = 0 * fentry = a set of programs to run before returning from trampoline * With flags = BPF_TRAMP_F_CALL_ORIG * orig_call = original callback addr or direct function addr * fentry = a set of program to run before calling original function * fexit = a set of program to run after original function */ struct bpf_tramp_image; int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr); void *arch_alloc_bpf_trampoline(unsigned int size); void arch_free_bpf_trampoline(void *image, unsigned int size); int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size); int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr); u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); struct bpf_ksym { unsigned long start; unsigned long end; char name[KSYM_NAME_LEN]; struct list_head lnode; struct latch_tree_node tnode; bool prog; }; enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ }; struct bpf_tramp_image { void *image; int size; struct bpf_ksym ksym; struct percpu_ref pcref; void *ip_after_call; void *ip_epilogue; union { struct rcu_head rcu; struct work_struct work; }; }; struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; struct ftrace_ops *fops; /* serializes access to fields of this trampoline */ struct mutex mutex; refcount_t refcnt; u32 flags; u64 key; struct { struct btf_func_model model; void *addr; bool ftrace_managed; } func; /* if !NULL this is BPF_PROG_TYPE_EXT program that extends another BPF * program by replacing one of its functions. func.addr is the address * of the function it replaced. */ struct bpf_prog *extension_prog; /* list of BPF programs using this trampoline */ struct hlist_head progs_hlist[BPF_TRAMP_MAX]; /* Number of attached programs. A counter per kind. */ int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; }; struct bpf_attach_target_info { struct btf_func_model fmodel; long tgt_addr; struct module *tgt_mod; const char *tgt_name; const struct btf_type *tgt_type; }; #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ struct bpf_dispatcher_prog { struct bpf_prog *prog; refcount_t users; }; struct bpf_dispatcher { /* dispatcher mutex */ struct mutex mutex; void *func; struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; int num_progs; void *image; void *rw_image; u32 image_off; struct bpf_ksym ksym; #ifdef CONFIG_HAVE_STATIC_CALL struct static_call_key *sc_key; void *sc_tramp; #endif }; #ifndef __bpfcall #define __bpfcall __nocfi #endif static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, bpf_func_t bpf_func) { return bpf_func(ctx, insnsi); } /* the implementation of the opaque uapi struct bpf_dynptr */ struct bpf_dynptr_kern { void *data; /* Size represents the number of usable bytes of dynptr data. * If for example the offset is at 4 for a local dynptr whose data is * of type u64, the number of usable bytes is 4. * * The upper 8 bits are reserved. It is as follows: * Bits 0 - 23 = size * Bits 24 - 30 = dynptr type * Bit 31 = whether dynptr is read-only */ u32 size; u32 offset; } __aligned(8); enum bpf_dynptr_type { BPF_DYNPTR_TYPE_INVALID, /* Points to memory that is local to the bpf program */ BPF_DYNPTR_TYPE_LOCAL, /* Underlying data is a ringbuf record */ BPF_DYNPTR_TYPE_RINGBUF, /* Underlying data is a sk_buff */ BPF_DYNPTR_TYPE_SKB, /* Underlying data is a xdp_buff */ BPF_DYNPTR_TYPE_XDP, }; int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); /* * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn * indirection with a direct call to the bpf program. If the architecture does * not have STATIC_CALL, avoid a double-indirection. */ #ifdef CONFIG_HAVE_STATIC_CALL #define __BPF_DISPATCHER_SC_INIT(_name) \ .sc_key = &STATIC_CALL_KEY(_name), \ .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), #define __BPF_DISPATCHER_SC(name) \ DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) #define __BPF_DISPATCHER_CALL(name) \ static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) #define __BPF_DISPATCHER_UPDATE(_d, _new) \ __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) #else #define __BPF_DISPATCHER_SC_INIT(name) #define __BPF_DISPATCHER_SC(name) #define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) #define __BPF_DISPATCHER_UPDATE(_d, _new) #endif #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ .progs = {}, \ .num_progs = 0, \ .image = NULL, \ .image_off = 0, \ .ksym = { \ .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ __BPF_DISPATCHER_SC_INIT(_name##_call) \ } #define DEFINE_BPF_DISPATCHER(name) \ __BPF_DISPATCHER_SC(name); \ noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ { \ return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; } static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; } static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) #define DECLARE_BPF_DISPATCHER(name) #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func #define BPF_DISPATCHER_PTR(name) NULL static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to) {} static inline bool is_bpf_image_address(unsigned long address) { return false; } static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) { return false; } #endif struct bpf_func_info_aux { u16 linkage; bool unreliable; bool called : 1; bool verified : 1; }; enum bpf_jit_poke_reason { BPF_POKE_REASON_TAIL_CALL, }; /* Descriptor of pokes pointing /into/ the JITed image. */ struct bpf_jit_poke_descriptor { void *tailcall_target; void *tailcall_bypass; void *bypass_addr; void *aux; union { struct { struct bpf_map *map; u32 key; } tail_call; }; bool tailcall_target_stable; u8 adj_off; u16 reason; u32 insn_idx; }; /* reg_type info for ctx arguments */ struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; struct btf *btf; u32 btf_id; }; struct btf_mod_pair { struct btf *btf; struct module *module; }; struct bpf_kfunc_desc_tab; struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; u32 used_btf_cnt; u32 max_ctx_offset; u32 max_pkt_offset; u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; enum bpf_prog_type saved_dst_prog_type; enum bpf_attach_type saved_dst_attach_type; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool dev_bound; /* Program is bound to the netdev. */ bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; bool tail_call_reachable; bool xdp_has_frags; bool exception_cb; bool exception_boundary; struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ const char *attach_func_name; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; struct bpf_kfunc_desc_tab *kfunc_tab; struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; #ifdef CONFIG_FINEIBT struct bpf_ksym ksym_prefix; #endif struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ struct btf_mod_pair *used_btfs; struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ u32 verified_insns; int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64); #ifdef CONFIG_SECURITY void *security; #endif struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; struct bpf_func_info_aux *func_info_aux; /* bpf_line_info loaded from userspace. linfo->insn_off * has the xlated insn offset. * Both the main and sub prog share the same linfo. * The subprog can access its first linfo by * using the linfo_idx. */ struct bpf_line_info *linfo; /* jited_linfo is the jited addr of the linfo. It has a * one to one mapping to linfo: * jited_linfo[i] is the jited addr for the linfo[i]->insn_off. * Both the main and sub prog share the same jited_linfo. * The subprog can access its first jited_linfo by * using the linfo_idx. */ void **jited_linfo; u32 func_info_cnt; u32 nr_linfo; /* subprog can use linfo_idx to access its first linfo and * jited_linfo. * main prog always has linfo_idx == 0 */ u32 linfo_idx; struct module *mod; u32 num_exentries; struct exception_table_entry *extable; union { struct work_struct work; struct rcu_head rcu; }; }; struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ jit_requested:1,/* archs need to JIT the prog */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ blinding_requested:1, /* needs constant blinding */ blinded:1, /* Was blinded */ is_func:1, /* program is a bpf function */ kprobe_override:1, /* Do we override a kprobe? */ has_callchain_buf:1, /* callchain buffer allocated? */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; struct bpf_prog_stats __percpu *stats; int __percpu *active; unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ /* Instructions for interpreter */ union { DECLARE_FLEX_ARRAY(struct sock_filter, insns); DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); }; }; struct bpf_array_aux { /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; struct mutex poke_mutex; struct work_struct work; }; struct bpf_link { atomic64_t refcnt; u32 id; enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; /* rcu is used before freeing, work can be used to schedule that * RCU-based freeing before that, so they never overlap */ union { struct rcu_head rcu; struct work_struct work; }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); /* deallocate link resources callback, called without RCU grace period * waiting */ void (*dealloc)(struct bpf_link *link); /* deallocate link resources callback, called after RCU grace period; * if underlying BPF program is sleepable we go through tasks trace * RCU GP and then "classic" RCU GP */ void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, struct bpf_map *old_map); __poll_t (*poll)(struct file *file, struct poll_table_struct *pts); }; struct bpf_tramp_link { struct bpf_link link; struct hlist_node tramp_hlist; u64 cookie; }; struct bpf_shim_tramp_link { struct bpf_tramp_link link; struct bpf_trampoline *trampoline; }; struct bpf_tracing_link { struct bpf_tramp_link link; enum bpf_attach_type attach_type; struct bpf_trampoline *trampoline; struct bpf_prog *tgt_prog; }; struct bpf_raw_tp_link { struct bpf_link link; struct bpf_raw_event_map *btp; u64 cookie; }; struct bpf_link_primer { struct bpf_link *link; struct file *file; int fd; u32 id; }; struct bpf_mount_opts { kuid_t uid; kgid_t gid; umode_t mode; /* BPF token-related delegation options */ u64 delegate_cmds; u64 delegate_maps; u64 delegate_progs; u64 delegate_attachs; }; struct bpf_token { struct work_struct work; atomic64_t refcnt; struct user_namespace *userns; u64 allowed_cmds; u64 allowed_maps; u64 allowed_progs; u64 allowed_attachs; #ifdef CONFIG_SECURITY void *security; #endif }; struct bpf_struct_ops_value; struct btf_member; #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 /** * struct bpf_struct_ops - A structure of callbacks allowing a subsystem to * define a BPF_MAP_TYPE_STRUCT_OPS map type composed * of BPF_PROG_TYPE_STRUCT_OPS progs. * @verifier_ops: A structure of callbacks that are invoked by the verifier * when determining whether the struct_ops progs in the * struct_ops map are valid. * @init: A callback that is invoked a single time, and before any other * callback, to initialize the structure. A nonzero return value means * the subsystem could not be initialized. * @check_member: When defined, a callback invoked by the verifier to allow * the subsystem to determine if an entry in the struct_ops map * is valid. A nonzero return value means that the map is * invalid and should be rejected by the verifier. * @init_member: A callback that is invoked for each member of the struct_ops * map to allow the subsystem to initialize the member. A nonzero * value means the member could not be initialized. This callback * is exclusive with the @type, @type_id, @value_type, and * @value_id fields. * @reg: A callback that is invoked when the struct_ops map has been * initialized and is being attached to. Zero means the struct_ops map * has been successfully registered and is live. A nonzero return value * means the struct_ops map could not be registered. * @unreg: A callback that is invoked when the struct_ops map should be * unregistered. * @update: A callback that is invoked when the live struct_ops map is being * updated to contain new values. This callback is only invoked when * the struct_ops map is loaded with BPF_F_LINK. If not defined, the * it is assumed that the struct_ops map cannot be updated. * @validate: A callback that is invoked after all of the members have been * initialized. This callback should perform static checks on the * map, meaning that it should either fail or succeed * deterministically. A struct_ops map that has been validated may * not necessarily succeed in being registered if the call to @reg * fails. For example, a valid struct_ops map may be loaded, but * then fail to be registered due to there being another active * struct_ops map on the system in the subsystem already. For this * reason, if this callback is not defined, the check is skipped as * the struct_ops map will have final verification performed in * @reg. * @type: BTF type. * @value_type: Value type. * @name: The name of the struct bpf_struct_ops object. * @func_models: Func models * @type_id: BTF type id. * @value_id: BTF value id. */ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; int (*init)(struct btf *btf); int (*check_member)(const struct btf_type *t, const struct btf_member *member, const struct bpf_prog *prog); int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); int (*reg)(void *kdata, struct bpf_link *link); void (*unreg)(void *kdata, struct bpf_link *link); int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); void *cfi_stubs; struct module *owner; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; }; /* Every member of a struct_ops type has an instance even a member is not * an operator (function pointer). The "info" field will be assigned to * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the * argument information required by the verifier to verify the program. * * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the * corresponding entry for an given argument. */ struct bpf_struct_ops_arg_info { struct bpf_ctx_arg_aux *info; u32 cnt; }; struct bpf_struct_ops_desc { struct bpf_struct_ops *st_ops; const struct btf_type *type; const struct btf_type *value_type; u32 type_id; u32 value_id; /* Collection of argument information for each member */ struct bpf_struct_ops_arg_info *arg_info; }; enum bpf_struct_ops_state { BPF_STRUCT_OPS_STATE_INIT, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE, BPF_STRUCT_OPS_STATE_READY, }; struct bpf_struct_ops_common_value { refcount_t refcnt; enum bpf_struct_ops_state state; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) /* This macro helps developer to register a struct_ops type and generate * type information correctly. Developers should use this macro to register * a struct_ops type instead of calling __register_bpf_struct_ops() directly. */ #define register_bpf_struct_ops(st_ops, type) \ ({ \ struct bpf_struct_ops_##type { \ struct bpf_struct_ops_common_value common; \ struct type data ____cacheline_aligned_in_smp; \ }; \ BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ __register_bpf_struct_ops(st_ops); \ }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, void *stub_func, void **image, u32 *image_off, bool allow_alloc); void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) return bpf_struct_ops_get(data); else return try_module_get(owner); } static inline void bpf_module_put(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) bpf_struct_ops_put(data); else module_put(owner); } int bpf_struct_ops_link_create(union bpf_attr *attr); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ struct bpf_dummy_ops_state { int val; }; struct bpf_dummy_ops { int (*test_1)(struct bpf_dummy_ops_state *cb); int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, char a3, unsigned long a4); int (*test_sleepable)(struct bpf_dummy_ops_state *cb); }; int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, struct btf *btf, struct bpf_verifier_log *log); void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else #define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); } static inline void bpf_module_put(const void *data, struct module *owner) { module_put(owner); } static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) { return -ENOTSUPP; } static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value) { return -EINVAL; } static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) { } static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) { } #endif #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, int cgroup_atype); void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, int cgroup_atype) { return -EOPNOTSUPP; } static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) { } #endif struct bpf_array { struct bpf_map map; u32 elem_size; u32 index_mask; struct bpf_array_aux *aux; union { DECLARE_FLEX_ARRAY(char, value) __aligned(8); DECLARE_FLEX_ARRAY(void *, ptrs) __aligned(8); DECLARE_FLEX_ARRAY(void __percpu *, pptrs) __aligned(8); }; }; #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 /* Maximum number of loops for bpf_loop and bpf_iter_num. * It's enum to expose it (and thus make it discoverable) through BTF. */ enum { BPF_MAX_LOOPS = 8 * 1024 * 1024, }; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ BPF_F_WRONLY | \ BPF_F_WRONLY_PROG) #define BPF_MAP_CAN_READ BIT(0) #define BPF_MAP_CAN_WRITE BIT(1) /* Maximum number of user-producer ring buffer samples that can be drained in * a call to bpf_user_ringbuf_drain(). */ #define BPF_MAX_USER_RINGBUF_SAMPLES (128 * 1024) static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) { u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is * not possible. */ if (access_flags & BPF_F_RDONLY_PROG) return BPF_MAP_CAN_READ; else if (access_flags & BPF_F_WRONLY_PROG) return BPF_MAP_CAN_WRITE; else return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE; } static inline bool bpf_map_flags_access_ok(u32 access_flags) { return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) != (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } struct bpf_event_entry { struct perf_event *event; struct file *perf_file; struct file *map_file; struct rcu_head rcu; }; static inline bool map_type_contains_progs(struct bpf_map *map) { return map->map_type == BPF_MAP_TYPE_PROG_ARRAY || map->map_type == BPF_MAP_TYPE_DEVMAP || map->map_type == BPF_MAP_TYPE_CPUMAP; } bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); /* an array of programs to be executed under rcu_lock. * * Typical usage: * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run); * * the structure returned by bpf_prog_array_alloc() should be populated * with program pointers and the last pointer must be NULL. * The user has to keep refcnt on the program and make sure the program * is removed from the array before bpf_prog_put(). * The 'struct bpf_prog_array *' should only be replaced with xchg() * since other cpus are walking the array of pointers in parallel. */ struct bpf_prog_array_item { struct bpf_prog *prog; union { struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; u64 bpf_cookie; }; }; struct bpf_prog_array { struct rcu_head rcu; struct bpf_prog_array_item items[]; }; struct bpf_empty_prog_array { struct bpf_prog_array hdr; struct bpf_prog *null_prog; }; /* to avoid allocating empty bpf_prog_array for cgroups that * don't have bpf program attached use one global 'bpf_empty_prog_array' * It will not be modified the caller of bpf_prog_array_alloc() * (since caller requested prog_cnt == 0) * that pointer should be 'freed' by bpf_prog_array_free() */ extern struct bpf_empty_prog_array bpf_empty_prog_array; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); /* Use when traversal over the bpf_prog_array uses tasks_trace rcu */ void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); bool bpf_prog_array_is_empty(struct bpf_prog_array *array); int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, __u32 __user *prog_ids, u32 cnt); void bpf_prog_array_delete_safe(struct bpf_prog_array *progs, struct bpf_prog *old_prog); int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index); int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, struct bpf_prog *prog); int bpf_prog_array_copy_info(struct bpf_prog_array *array, u32 *prog_ids, u32 request_cnt, u32 *prog_cnt); int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, u64 bpf_cookie, struct bpf_prog_array **new_array); struct bpf_run_ctx {}; struct bpf_cg_run_ctx { struct bpf_run_ctx run_ctx; const struct bpf_prog_array_item *prog_item; int retval; }; struct bpf_trace_run_ctx { struct bpf_run_ctx run_ctx; u64 bpf_cookie; bool is_uprobe; }; struct bpf_tramp_run_ctx { struct bpf_run_ctx run_ctx; u64 bpf_cookie; struct bpf_run_ctx *saved_run_ctx; }; static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) { struct bpf_run_ctx *old_ctx = NULL; #ifdef CONFIG_BPF_SYSCALL old_ctx = current->bpf_ctx; current->bpf_ctx = new_ctx; #endif return old_ctx; } static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) { #ifdef CONFIG_BPF_SYSCALL current->bpf_ctx = old_ctx; #endif } /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) /* BPF program asks to set CN on the packet. */ #define BPF_RET_SET_CN (1 << 0) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); static __always_inline u32 bpf_prog_run_array(const struct bpf_prog_array *array, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held"); if (unlikely(!array)) return ret; run_ctx.is_uprobe = false; migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; } bpf_reset_run_ctx(old_run_ctx); migrate_enable(); return ret; } /* Notes on RCU design for bpf_prog_arrays containing sleepable programs: * * We use the tasks_trace rcu flavor read section to protect the bpf_prog_array * overall. As a result, we must use the bpf_prog_array_free_sleepable * in order to use the tasks_trace rcu grace period. * * When a non-sleepable program is inside the array, we take the rcu read * section and disable preemption for that program alone, so it can access * rcu-protected dynamically sized maps. */ static __always_inline u32 bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; might_fault(); rcu_read_lock_trace(); migrate_disable(); run_ctx.is_uprobe = true; array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); if (unlikely(!array)) goto out; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); out: migrate_enable(); rcu_read_unlock_trace(); return ret; } #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; /* * Block execution of BPF programs attached to instrumentation (perf, * kprobes, tracepoints) to prevent deadlocks on map operations as any of * these events can happen inside a region which holds a map bucket lock * and can deadlock on it. */ static inline void bpf_disable_instrumentation(void) { migrate_disable(); this_cpu_inc(bpf_prog_active); } static inline void bpf_enable_instrumentation(void) { this_cpu_dec(bpf_prog_active); migrate_enable(); } extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; #define BPF_LINK_TYPE(_id, _name) #include <linux/bpf_types.h> #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE #undef BPF_LINK_TYPE extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv); void bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog); void bpf_map_free_id(struct bpf_map *map); struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, u32 field_mask); void btf_record_free(struct btf_record *rec); void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); static inline struct bpf_map *__bpf_map_get(struct fd f) { if (fd_empty(f)) return ERR_PTR(-EBADF); if (unlikely(fd_file(f)->f_op != &bpf_map_fops)) return ERR_PTR(-EINVAL); return fd_file(f)->private_data; } void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); bool bpf_map_write_active(const struct bpf_map *map); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); int generic_map_update_batch(struct bpf_map *map, struct file *map_file, const union bpf_attr *attr, union bpf_attr __user *uattr); int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags); void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else /* * These specialized allocators have to be macros for their allocations to be * accounted separately (to have separate alloc_tag). */ #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ kmalloc_node(_size, _flags, _node) #define bpf_map_kzalloc(_map, _size, _flags) \ kzalloc(_size, _flags) #define bpf_map_kvcalloc(_map, _n, _size, _flags) \ kvcalloc(_n, _size, _flags) #define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ __alloc_percpu_gfp(_size, _align, _flags) #endif static inline int bpf_map_init_elem_count(struct bpf_map *map) { size_t size = sizeof(*map->elem_count), align = size; gfp_t flags = GFP_USER | __GFP_NOWARN; map->elem_count = bpf_map_alloc_percpu(map, size, align, flags); if (!map->elem_count) return -ENOMEM; return 0; } static inline void bpf_map_free_elem_count(struct bpf_map *map) { free_percpu(map->elem_count); } static inline void bpf_map_inc_elem_count(struct bpf_map *map) { this_cpu_inc(*map->elem_count); } static inline void bpf_map_dec_elem_count(struct bpf_map *map) { this_cpu_dec(*map->elem_count); } extern int sysctl_unprivileged_bpf_disabled; bool bpf_token_capable(const struct bpf_token *token, int cap); static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { return bpf_token_capable(token, CAP_PERFMON); } static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { return bpf_token_capable(token, CAP_PERFMON); } static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); void bpf_token_inc(struct bpf_token *token); void bpf_token_put(struct bpf_token *token); int bpf_token_create(union bpf_attr *attr); struct bpf_token *bpf_token_get_from_fd(u32 ufd); bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); bool bpf_token_allow_prog_type(const struct bpf_token *token, enum bpf_prog_type prog_type, enum bpf_attach_type attach_type); int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } /* * The task type of iterators. * * For BPF task iterators, they can be parameterized with various * parameters to visit only some of tasks. * * BPF_TASK_ITER_ALL (default) * Iterate over resources of every task. * * BPF_TASK_ITER_TID * Iterate over resources of a task/tid. * * BPF_TASK_ITER_TGID * Iterate over resources of every task of a process / task group. */ enum bpf_iter_task_type { BPF_TASK_ITER_ALL = 0, BPF_TASK_ITER_TID, BPF_TASK_ITER_TGID, }; struct bpf_iter_aux_info { /* for map_elem iter */ struct bpf_map *map; /* for cgroup iter */ struct { struct cgroup *start; /* starting cgroup */ enum bpf_cgroup_iter_order order; } cgroup; struct { enum bpf_iter_task_type type; u32 pid; } task; }; typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, struct seq_file *seq); typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); typedef const struct bpf_func_proto * (*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id, const struct bpf_prog *prog); enum bpf_iter_feature { BPF_ITER_RESCHED = BIT(0), }; #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; bpf_iter_attach_target_t attach_target; bpf_iter_detach_target_t detach_target; bpf_iter_show_fdinfo_t show_fdinfo; bpf_iter_fill_link_info_t fill_link_info; bpf_iter_get_func_proto_t get_func_proto; u32 ctx_arg_info_size; u32 feature; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; struct bpf_iter_meta { __bpf_md_ptr(struct seq_file *, seq); u64 session_id; u64 seq_num; }; struct bpf_iter__bpf_map_elem { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct bpf_map *, map); __bpf_md_ptr(void *, key); __bpf_md_ptr(void *, value); }; int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); const struct bpf_func_proto * bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); int bpf_iter_new_fd(struct bpf_link *link); bool bpf_link_is_iter(struct bpf_link *link); struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop); int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq); int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); int map_set_for_each_callback_args(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_get_file_flag(int flags); int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); #ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); #endif struct btf *bpf_get_btf_vmlinux(void); /* Map specifics */ struct xdp_frame; struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; void __dev_flush(struct list_head *flush_list); int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, struct sk_buff *skb); /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) { return (attr->map_flags & BPF_F_NUMA_NODE) ? attr->numa_node : NUMA_NO_NODE; } struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); int array_map_alloc_check(union bpf_attr *attr); int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_nf(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); static inline bool bpf_tracing_ctx_access(int off, int size, enum bpf_access_type type) { if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) return false; if (type != BPF_READ) return false; if (off % size != 0) return false; return true; } static inline bool bpf_tracing_btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { if (!bpf_tracing_ctx_access(off, size, type)) return false; return btf_ctx_access(off, size, type, prog, info); } int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id, bool strict); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *func_proto, const char *func_name, struct btf_func_model *m); struct bpf_reg_state; int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, int comp_idx, const char *tag_key); int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, int comp_idx, const char *tag_key, int last_id); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, u16 btf_fd_idx, u8 **func_addr); struct bpf_core_ctx { struct bpf_verifier_log *log; const struct btf *btf; }; bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, const char *field_name, u32 btf_id, const char *suffix); bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, const struct btf *reg_btf, u32 reg_id, const struct btf *arg_btf, u32 arg_id); int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, int relo_idx, void *insn); static inline bool unprivileged_ebpf_enabled(void) { return !sysctl_unprivileged_bpf_disabled; } /* Not all bpf prog type has the bpf_ctx. * For the bpf prog type that has initialized the bpf_ctx, * this function can be used to decide if a kernel function * is called by a bpf program. */ static inline bool has_current_bpf_ctx(void) { return !!current->bpf_ctx; } void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog); void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { return ERR_PTR(-EOPNOTSUPP); } static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv) { return ERR_PTR(-EOPNOTSUPP); } static inline void bpf_prog_add(struct bpf_prog *prog, int i) { } static inline void bpf_prog_sub(struct bpf_prog *prog, int i) { } static inline void bpf_prog_put(struct bpf_prog *prog) { } static inline void bpf_prog_inc(struct bpf_prog *prog) { } static inline struct bpf_prog *__must_check bpf_prog_inc_not_zero(struct bpf_prog *prog) { return ERR_PTR(-EOPNOTSUPP); } static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) { } static inline int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) { return -EOPNOTSUPP; } static inline int bpf_link_settle(struct bpf_link_primer *primer) { return -EOPNOTSUPP; } static inline void bpf_link_cleanup(struct bpf_link_primer *primer) { } static inline void bpf_link_inc(struct bpf_link *link) { } static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) { return NULL; } static inline void bpf_link_put(struct bpf_link *link) { } static inline int bpf_obj_get_user(const char __user *pathname, int flags) { return -EOPNOTSUPP; } static inline bool bpf_token_capable(const struct bpf_token *token, int cap) { return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); } static inline void bpf_token_inc(struct bpf_token *token) { } static inline void bpf_token_put(struct bpf_token *token) { } static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) { return ERR_PTR(-EOPNOTSUPP); } static inline void __dev_flush(struct list_head *flush_list) { } struct xdp_frame; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; static inline int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { return 0; } struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog) { return 0; } static inline int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress) { return 0; } static inline void __cpu_map_flush(struct list_head *flush_list) { } static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, struct sk_buff *skb) { return -EOPNOTSUPP; } static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) { return ERR_PTR(-EOPNOTSUPP); } static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { return -ENOTSUPP; } static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { return -ENOTSUPP; } static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { return -ENOTSUPP; } static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { return -ENOTSUPP; } static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { return -ENOTSUPP; } static inline void bpf_map_put(struct bpf_map *map) { } static inline struct bpf_prog *bpf_prog_by_id(u32 id) { return ERR_PTR(-ENOTSUPP); } static inline int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name) { return -EACCES; } static inline const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } static inline void bpf_task_storage_free(struct task_struct *task) { } static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) { return false; } static inline const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn) { return NULL; } static inline int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, u16 btf_fd_idx, u8 **func_addr) { return -ENOTSUPP; } static inline bool unprivileged_ebpf_enabled(void) { return false; } static inline bool has_current_bpf_ctx(void) { return false; } static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) { } static inline void bpf_cgrp_storage_free(struct cgroup *cgroup) { } static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size) { } static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) { } static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) { } #endif /* CONFIG_BPF_SYSCALL */ static __always_inline int bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) { int ret = -EFAULT; if (IS_ENABLED(CONFIG_BPF_EVENTS)) ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; } void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len); static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { return bpf_prog_get_type_dev(ufd, type, false); } void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len); bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_dev_bound_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog); int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map); int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_map_offload_update_elem(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_map_offload_delete_elem(struct bpf_map *map, void *key); int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key); bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); struct bpf_offload_dev * bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv); void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev); void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev); int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev); void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev); bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, struct bpf_prog_aux *prog_aux); void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id); int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog); void bpf_dev_bound_netdev_unregister(struct net_device *dev); static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) { return aux->dev_bound; } static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) { return aux->offload_requested; } bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs); static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return unlikely(map->ops == &bpf_map_offload_ops); } struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); void bpf_map_offload_map_free(struct bpf_map *map); u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map); int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog); void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, struct bpf_prog_aux *prog_aux) { return -EOPNOTSUPP; } static inline void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id) { return NULL; } static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr) { return -EOPNOTSUPP; } static inline int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog) { return -EOPNOTSUPP; } static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev) { } static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) { return false; } static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) { return false; } static inline bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs) { return false; } static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return false; } static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) { return ERR_PTR(-EOPNOTSUPP); } static inline void bpf_map_offload_map_free(struct bpf_map *map) { } static inline u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map) { return 0; } static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { return -ENOTSUPP; } #ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) { return -EOPNOTSUPP; } static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags) { return -EOPNOTSUPP; } static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { return -EINVAL; } static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ static __always_inline void bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) { const struct bpf_prog_array_item *item; struct bpf_prog *prog; if (unlikely(!array)) return; item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { bpf_prog_inc_misses_counter(prog); item++; } } #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags); #else static inline void bpf_sk_reuseport_detach(struct sock *sk) { } #ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { return -EOPNOTSUPP; } static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { return -EOPNOTSUPP; } #endif /* CONFIG_BPF_SYSCALL */ #endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */ /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_map_push_elem_proto; extern const struct bpf_func_proto bpf_map_pop_elem_proto; extern const struct bpf_func_proto bpf_map_peek_elem_proto; extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto; extern const struct bpf_func_proto bpf_ktime_get_tai_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_get_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_stackid_proto_pe; extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; extern const struct bpf_func_proto bpf_sk_redirect_map_proto; extern const struct bpf_func_proto bpf_spin_lock_proto; extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_event_output_data_proto; extern const struct bpf_func_proto bpf_ringbuf_output_proto; extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; extern const struct bpf_func_proto bpf_task_storage_get_recur_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; extern const struct bpf_func_proto bpf_set_retval_proto; extern const struct bpf_func_proto bpf_get_retval_proto; extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto; extern const struct bpf_func_proto bpf_cgrp_storage_get_proto; extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #if defined(CONFIG_NET) bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { return false; } static inline bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { return false; } static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size) { return 0; } static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, struct bpf_dynptr *ptr) { return -EOPNOTSUPP; } #endif #ifdef CONFIG_INET struct sk_reuseport_kern { struct sk_buff *skb; struct sock *sk; struct sock *selected_sk; struct sock *migrating_sk; void *data_end; u32 hash; u32 reuseport_id; bool bind_inany; }; bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); #else static inline bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { return false; } static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size) { return 0; } static inline bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { return false; } static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size) { return 0; } #endif /* CONFIG_INET */ enum bpf_text_poke_type { BPF_MOD_CALL, BPF_MOD_JUMP, }; int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, struct bpf_prog *new, struct bpf_prog *old); void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 #define MAX_BPRINTF_BUF 1024 struct bpf_bprintf_data { u32 *bin_args; char *buf; bool get_bin_args; bool get_buf; }; int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); void bpf_cgroup_atype_put(int cgroup_atype); #else static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {} static inline void bpf_cgroup_atype_put(int cgroup_atype) {} #endif /* CONFIG_BPF_LSM */ struct key; #ifdef CONFIG_KEYS struct bpf_key { struct key *key; bool has_ref; }; #endif /* CONFIG_KEYS */ static inline bool type_is_alloc(u32 type) { return type & MEM_ALLOC; } static inline gfp_t bpf_memcg_flags(gfp_t flags) { if (memcg_bpf_enabled()) return flags | __GFP_ACCOUNT; return flags; } static inline bool bpf_is_subprog(const struct bpf_prog *prog) { return prog->aux->func_idx != 0; } #endif /* _LINUX_BPF_H */ |
1032 1038 3 1034 1032 19 19 19 19 50 50 43 19 50 52 51 53 28 53 61 61 61 59 31 31 31 30 13 8021 8000 7698 8024 8021 7999 8020 6 5487 5500 5479 7659 7665 7687 7654 7664 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | // SPDX-License-Identifier: GPL-2.0 /* * mm/pgtable-generic.c * * Generic pgtable methods declared in linux/pgtable.h * * Copyright (C) 2010 Linus Torvalds */ #include <linux/pagemap.h> #include <linux/hugetlb.h> #include <linux/pgtable.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/mm_inline.h> #include <asm/pgalloc.h> #include <asm/tlb.h> /* * If a p?d_bad entry is found while walking page tables, report * the error, before resetting entry to p?d_none. Usually (but * very seldom) called out from the p?d_none_or_clear_bad macros. */ void pgd_clear_bad(pgd_t *pgd) { pgd_ERROR(*pgd); pgd_clear(pgd); } #ifndef __PAGETABLE_P4D_FOLDED void p4d_clear_bad(p4d_t *p4d) { p4d_ERROR(*p4d); p4d_clear(p4d); } #endif #ifndef __PAGETABLE_PUD_FOLDED void pud_clear_bad(pud_t *pud) { pud_ERROR(*pud); pud_clear(pud); } #endif /* * Note that the pmd variant below can't be stub'ed out just as for p4d/pud * above. pmd folding is special and typically pmd_* macros refer to upper * level even when folded */ void pmd_clear_bad(pmd_t *pmd) { pmd_ERROR(*pmd); pmd_clear(pmd); } #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* * Only sets the access flags (dirty, accessed), as well as write * permission. Furthermore, we know it always gets set to a "more * permissive" setting, which allows most architectures to optimize * this. We return whether the PTE actually changed, which in turn * instructs the caller to do things like update__mmu_cache. This * used to be done in the caller, but sparc needs minor faults to * force that call on sun4c so we changed this macro slightly */ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { int changed = !pte_same(ptep_get(ptep), entry); if (changed) { set_pte_at(vma->vm_mm, address, ptep, entry); flush_tlb_fix_spurious_fault(vma, address, ptep); } return changed; } #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { int young; young = ptep_test_and_clear_young(vma, address, ptep); if (young) flush_tlb_page(vma, address); return young; } #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { struct mm_struct *mm = (vma)->vm_mm; pte_t pte; pte = ptep_get_and_clear(mm, address, ptep); if (pte_accessible(mm, pte)) flush_tlb_page(vma, address); return pte; } #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { int changed = !pmd_same(*pmdp, entry); VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed) { set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; } #endif #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { int young; VM_BUG_ON(address & ~HPAGE_PMD_MASK); young = pmdp_test_and_clear_young(vma, address, pmdp); if (young) flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return young; } #endif #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, pud_t *pudp) { pud_t pud; VM_BUG_ON(address & ~HPAGE_PUD_MASK); VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp)); pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp); flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); return pud; } #endif #endif #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ if (!pmd_huge_pte(mm, pmdp)) INIT_LIST_HEAD(&pgtable->lru); else list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); pmd_huge_pte(mm, pmdp) = pgtable; } #endif #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW /* no "address" argument so destroys page coloring of some arch */ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { pgtable_t pgtable; assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ pgtable = pmd_huge_pte(mm, pmdp); pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru, struct page, lru); if (pmd_huge_pte(mm, pmdp)) list_del(&pgtable->lru); return pgtable; } #endif #ifndef __HAVE_ARCH_PMDP_INVALIDATE pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { VM_WARN_ON_ONCE(!pmd_present(*pmdp)); pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return old; } #endif #ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { VM_WARN_ON_ONCE(!pmd_present(*pmdp)); return pmdp_invalidate(vma, address, pmdp); } #endif #ifndef pmdp_collapse_flush pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { /* * pmd and hugepage pte format are same. So we could * use the same function. */ pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(pmd_trans_huge(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); /* collapse entails shooting down ptes not pmd */ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } #endif /* arch define pte_free_defer in asm/pgalloc.h for its own implementation */ #ifndef pte_free_defer static void pte_free_now(struct rcu_head *head) { struct page *page; page = container_of(head, struct page, rcu_head); pte_free(NULL /* mm not passed and not used */, (pgtable_t)page); } void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) { struct page *page; page = pgtable; call_rcu(&page->rcu_head, pte_free_now); } #endif /* pte_free_defer */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #if defined(CONFIG_GUP_GET_PXX_LOW_HIGH) && \ (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RCU)) /* * See the comment above ptep_get_lockless() in include/linux/pgtable.h: * the barriers in pmdp_get_lockless() cannot guarantee that the value in * pmd_high actually belongs with the value in pmd_low; but holding interrupts * off blocks the TLB flush between present updates, which guarantees that a * successful __pte_offset_map() points to a page from matched halves. */ static unsigned long pmdp_get_lockless_start(void) { unsigned long irqflags; local_irq_save(irqflags); return irqflags; } static void pmdp_get_lockless_end(unsigned long irqflags) { local_irq_restore(irqflags); } #else static unsigned long pmdp_get_lockless_start(void) { return 0; } static void pmdp_get_lockless_end(unsigned long irqflags) { } #endif pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) { unsigned long irqflags; pmd_t pmdval; rcu_read_lock(); irqflags = pmdp_get_lockless_start(); pmdval = pmdp_get_lockless(pmd); pmdp_get_lockless_end(irqflags); if (pmdvalp) *pmdvalp = pmdval; if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval))) goto nomap; if (unlikely(pmd_trans_huge(pmdval) || pmd_devmap(pmdval))) goto nomap; if (unlikely(pmd_bad(pmdval))) { pmd_clear_bad(pmd); goto nomap; } return __pte_map(&pmdval, addr); nomap: rcu_read_unlock(); return NULL; } pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, spinlock_t **ptlp) { pmd_t pmdval; pte_t *pte; pte = __pte_offset_map(pmd, addr, &pmdval); if (likely(pte)) *ptlp = pte_lockptr(mm, &pmdval); return pte; } /* * pte_offset_map_lock(mm, pmd, addr, ptlp), and its internal implementation * __pte_offset_map_lock() below, is usually called with the pmd pointer for * addr, reached by walking down the mm's pgd, p4d, pud for addr: either while * holding mmap_lock or vma lock for read or for write; or in truncate or rmap * context, while holding file's i_mmap_lock or anon_vma lock for read (or for * write). In a few cases, it may be used with pmd pointing to a pmd_t already * copied to or constructed on the stack. * * When successful, it returns the pte pointer for addr, with its page table * kmapped if necessary (when CONFIG_HIGHPTE), and locked against concurrent * modification by software, with a pointer to that spinlock in ptlp (in some * configs mm->page_table_lock, in SPLIT_PTLOCK configs a spinlock in table's * struct page). pte_unmap_unlock(pte, ptl) to unlock and unmap afterwards. * * But it is unsuccessful, returning NULL with *ptlp unchanged, if there is no * page table at *pmd: if, for example, the page table has just been removed, * or replaced by the huge pmd of a THP. (When successful, *pmd is rechecked * after acquiring the ptlock, and retried internally if it changed: so that a * page table can be safely removed or replaced by THP while holding its lock.) * * pte_offset_map(pmd, addr), and its internal helper __pte_offset_map() above, * just returns the pte pointer for addr, its page table kmapped if necessary; * or NULL if there is no page table at *pmd. It does not attempt to lock the * page table, so cannot normally be used when the page table is to be updated, * or when entries read must be stable. But it does take rcu_read_lock(): so * that even when page table is racily removed, it remains a valid though empty * and disconnected table. Until pte_unmap(pte) unmaps and rcu_read_unlock()s * afterwards. * * pte_offset_map_nolock(mm, pmd, addr, ptlp), above, is like pte_offset_map(); * but when successful, it also outputs a pointer to the spinlock in ptlp - as * pte_offset_map_lock() does, but in this case without locking it. This helps * the caller to avoid a later pte_lockptr(mm, *pmd), which might by that time * act on a changed *pmd: pte_offset_map_nolock() provides the correct spinlock * pointer for the page table that it returns. In principle, the caller should * recheck *pmd once the lock is taken; in practice, no callsite needs that - * either the mmap_lock for write, or pte_same() check on contents, is enough. * * Note that free_pgtables(), used after unmapping detached vmas, or when * exiting the whole mm, does not take page table lock before freeing a page * table, and may not use RCU at all: "outsiders" like khugepaged should avoid * pte_offset_map() and co once the vma is detached from mm or mm_users is zero. */ pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, spinlock_t **ptlp) { spinlock_t *ptl; pmd_t pmdval; pte_t *pte; again: pte = __pte_offset_map(pmd, addr, &pmdval); if (unlikely(!pte)) return pte; ptl = pte_lockptr(mm, &pmdval); spin_lock(ptl); if (likely(pmd_same(pmdval, pmdp_get_lockless(pmd)))) { *ptlp = ptl; return pte; } pte_unmap_unlock(pte, ptl); goto again; } |
1 5 4 4 1 4 4 1 3 1 2 2 4 4 7 4 4 4 1 1 3 3 3 3 1 3 2 2 2 2 1 2 1 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | /* * FUSE: Filesystem in Userspace * Copyright (C) 2016 Canonical Ltd. <seth.forshee@canonical.com> * * This program can be distributed under the terms of the GNU GPL. * See the file COPYING. */ #include "fuse_i.h" #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc, struct inode *inode, int type, bool rcu) { int size; const char *name; void *value = NULL; struct posix_acl *acl; if (rcu) return ERR_PTR(-ECHILD); if (fuse_is_bad(inode)) return ERR_PTR(-EIO); if (fc->no_getxattr) return NULL; if (type == ACL_TYPE_ACCESS) name = XATTR_NAME_POSIX_ACL_ACCESS; else if (type == ACL_TYPE_DEFAULT) name = XATTR_NAME_POSIX_ACL_DEFAULT; else return ERR_PTR(-EOPNOTSUPP); value = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!value) return ERR_PTR(-ENOMEM); size = fuse_getxattr(inode, name, value, PAGE_SIZE); if (size > 0) acl = posix_acl_from_xattr(fc->user_ns, value, size); else if ((size == 0) || (size == -ENODATA) || (size == -EOPNOTSUPP && fc->no_getxattr)) acl = NULL; else if (size == -ERANGE) acl = ERR_PTR(-E2BIG); else acl = ERR_PTR(size); kfree(value); return acl; } static inline bool fuse_no_acl(const struct fuse_conn *fc, const struct inode *inode) { /* * Refuse interacting with POSIX ACLs for daemons that * don't support FUSE_POSIX_ACL and are not mounted on * the host to retain backwards compatibility. */ return !fc->posix_acl && (i_user_ns(inode) != &init_user_ns); } struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { struct inode *inode = d_inode(dentry); struct fuse_conn *fc = get_fuse_conn(inode); if (fuse_no_acl(fc, inode)) return ERR_PTR(-EOPNOTSUPP); return __fuse_get_acl(fc, inode, type, false); } struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu) { struct fuse_conn *fc = get_fuse_conn(inode); /* * FUSE daemons before FUSE_POSIX_ACL was introduced could get and set * POSIX ACLs without them being used for permission checking by the * vfs. Retain that behavior for backwards compatibility as there are * filesystems that do all permission checking for acls in the daemon * and not in the kernel. */ if (!fc->posix_acl) return NULL; return __fuse_get_acl(fc, inode, type, rcu); } int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct inode *inode = d_inode(dentry); struct fuse_conn *fc = get_fuse_conn(inode); const char *name; int ret; if (fuse_is_bad(inode)) return -EIO; if (fc->no_setxattr || fuse_no_acl(fc, inode)) return -EOPNOTSUPP; if (type == ACL_TYPE_ACCESS) name = XATTR_NAME_POSIX_ACL_ACCESS; else if (type == ACL_TYPE_DEFAULT) name = XATTR_NAME_POSIX_ACL_DEFAULT; else return -EINVAL; if (acl) { unsigned int extra_flags = 0; /* * Fuse userspace is responsible for updating access * permissions in the inode, if needed. fuse_setxattr * invalidates the inode attributes, which will force * them to be refreshed the next time they are used, * and it also updates i_ctime. */ size_t size = posix_acl_xattr_size(acl->a_count); void *value; if (size > PAGE_SIZE) return -E2BIG; value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; ret = posix_acl_to_xattr(fc->user_ns, acl, value, size); if (ret < 0) { kfree(value); return ret; } /* * Fuse daemons without FUSE_POSIX_ACL never changed the passed * through POSIX ACLs. Such daemons don't expect setgid bits to * be stripped. */ if (fc->posix_acl && !in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; ret = fuse_setxattr(inode, name, value, size, 0, extra_flags); kfree(value); } else { ret = fuse_removexattr(inode, name); } if (fc->posix_acl) { /* * Fuse daemons without FUSE_POSIX_ACL never cached POSIX ACLs * and didn't invalidate attributes. Retain that behavior. */ forget_all_cached_acls(inode); fuse_invalidate_attr(inode); } return ret; } |
2246 1483 1481 52 525 52 52 2253 2256 2251 2261 2255 2246 58 2259 24 24 24 1480 1481 1483 102 102 101 102 102 102 102 180 100 176 2923 2923 2922 323 323 323 323 324 321 3263 3263 3263 3251 180 174 781 782 770 607 79 180 180 180 179 179 137 139 179 102 102 101 102 180 180 101 64 205 20 20 20 20 4 4 180 181 682 684 684 684 680 195 193 195 195 194 39 39 39 39 5 39 349 348 297 294 293 294 3938 3952 2 2 2 2 349 350 349 308 347 5 2 4 4 350 35 324 350 297 297 348 350 349 690 433 305 692 2 2 693 458 449 350 348 321 39 39 39 1 1 349 694 20 20 20 20 20 20 20 20 20 3655 3663 3646 3056 106 106 106 3066 3055 807 3066 3055 2195 156 3654 3294 3291 2629 3296 3216 558 3294 2966 2338 2250 58 2249 2232 58 2247 2254 2249 1478 1480 1682 1217 1684 172 174 173 26 174 26 26 26 26 26 26 26 26 174 174 221 205 126 79 79 201 202 3 3 220 173 174 174 485 483 234 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 | // SPDX-License-Identifier: GPL-2.0-only /* * fs/fs-writeback.c * * Copyright (C) 2002, Linus Torvalds. * * Contains all the functions related to writing back and waiting * upon dirty inodes against superblocks, and writing back dirty * pages against inodes. ie: data writeback. Writeout of the * inode itself is not handled here. * * 10Apr2002 Andrew Morton * Split out of fs/inode.c * Additions for address_space-based writeback */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/kthread.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/tracepoint.h> #include <linux/device.h> #include <linux/memcontrol.h> #include "internal.h" /* * 4MB minimal write chunk size */ #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) /* * Passed into wb_writeback(), essentially a subset of writeback_control */ struct wb_writeback_work { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ unsigned int auto_free:1; /* free on completion */ enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ struct wb_completion *done; /* set if the caller waits */ }; /* * If an inode is constantly having its pages dirtied, but then the * updates stop dirtytime_expire_interval seconds in the past, it's * possible for the worst case time between when an inode has its * timestamps updated and when they finally get written out to be two * dirtytime_expire_intervals. We set the default to 12 hours (in * seconds), which means most of the time inodes will have their * timestamps written to disk after 12 hours, but in the worst case a * few inodes might not their timestamps updated for 24 hours. */ unsigned int dirtytime_expire_interval = 12 * 60 * 60; static inline struct inode *wb_inode(struct list_head *head) { return list_entry(head, struct inode, i_io_list); } /* * Include the creation of the trace points after defining the * wb_writeback_work structure and inline functions so that the definition * remains local to this file. */ #define CREATE_TRACE_POINTS #include <trace/events/writeback.h> EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage); static bool wb_io_lists_populated(struct bdi_writeback *wb) { if (wb_has_dirty_io(wb)) { return false; } else { set_bit(WB_has_dirty_io, &wb->state); WARN_ON_ONCE(!wb->avg_write_bandwidth); atomic_long_add(wb->avg_write_bandwidth, &wb->bdi->tot_write_bandwidth); return true; } } static void wb_io_lists_depopulated(struct bdi_writeback *wb) { if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) && list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) { clear_bit(WB_has_dirty_io, &wb->state); WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth, &wb->bdi->tot_write_bandwidth) < 0); } } /** * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list * @inode: inode to be moved * @wb: target bdi_writeback * @head: one of @wb->b_{dirty|io|more_io|dirty_time} * * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io. * Returns %true if @inode is the first occupant of the !dirty_time IO * lists; otherwise, %false. */ static bool inode_io_list_move_locked(struct inode *inode, struct bdi_writeback *wb, struct list_head *head) { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); WARN_ON_ONCE(inode->i_state & I_FREEING); list_move(&inode->i_io_list, head); /* dirty_time doesn't count as dirty_io until expiration */ if (head != &wb->b_dirty_time) return wb_io_lists_populated(wb); wb_io_lists_depopulated(wb); return false; } static void wb_wakeup(struct bdi_writeback *wb) { spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) mod_delayed_work(bdi_wq, &wb->dwork, 0); spin_unlock_irq(&wb->work_lock); } /* * This function is used when the first inode for this wb is marked dirty. It * wakes-up the corresponding bdi thread which should then take care of the * periodic background write-out of dirty inodes. Since the write-out would * starts only 'dirty_writeback_interval' centisecs from now anyway, we just * set up a timer which wakes the bdi thread up later. * * Note, we wouldn't bother setting up the timer, but this function is on the * fast-path (used by '__mark_inode_dirty()'), so we save few context switches * by delaying the wake-up. * * We have to be careful not to postpone flush work if it is scheduled for * earlier. Thus we use queue_delayed_work(). */ static void wb_wakeup_delayed(struct bdi_writeback *wb) { unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) queue_delayed_work(bdi_wq, &wb->dwork, timeout); spin_unlock_irq(&wb->work_lock); } static void finish_writeback_work(struct wb_writeback_work *work) { struct wb_completion *done = work->done; if (work->auto_free) kfree(work); if (done) { wait_queue_head_t *waitq = done->waitq; /* @done can't be accessed after the following dec */ if (atomic_dec_and_test(&done->cnt)) wake_up_all(waitq); } } static void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { trace_writeback_queue(wb, work); if (work->done) atomic_inc(&work->done->cnt); spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) { list_add_tail(&work->list, &wb->work_list); mod_delayed_work(bdi_wq, &wb->dwork, 0); } else finish_writeback_work(work); spin_unlock_irq(&wb->work_lock); } /** * wb_wait_for_completion - wait for completion of bdi_writeback_works * @done: target wb_completion * * Wait for one or more work items issued to @bdi with their ->done field * set to @done, which should have been initialized with * DEFINE_WB_COMPLETION(). This function returns after all such work items * are completed. Work items which are waited upon aren't freed * automatically on completion. */ void wb_wait_for_completion(struct wb_completion *done) { atomic_dec(&done->cnt); /* put down the initial count */ wait_event(*done->waitq, !atomic_read(&done->cnt)); } #ifdef CONFIG_CGROUP_WRITEBACK /* * Parameters for foreign inode detection, see wbc_detach_inode() to see * how they're used. * * These paramters are inherently heuristical as the detection target * itself is fuzzy. All we want to do is detaching an inode from the * current owner if it's being written to by some other cgroups too much. * * The current cgroup writeback is built on the assumption that multiple * cgroups writing to the same inode concurrently is very rare and a mode * of operation which isn't well supported. As such, the goal is not * taking too long when a different cgroup takes over an inode while * avoiding too aggressive flip-flops from occasional foreign writes. * * We record, very roughly, 2s worth of IO time history and if more than * half of that is foreign, trigger the switch. The recording is quantized * to 16 slots. To avoid tiny writes from swinging the decision too much, * writes smaller than 1/8 of avg size are ignored. */ #define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */ #define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */ #define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */ #define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */ #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */ #define WB_FRN_HIST_UNIT (WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS) /* each slot's duration is 2s / 16 */ #define WB_FRN_HIST_THR_SLOTS (WB_FRN_HIST_SLOTS / 2) /* if foreign slots >= 8, switch */ #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) /* one round can affect upto 5 slots */ #define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */ /* * Maximum inodes per isw. A specific value has been chosen to make * struct inode_switch_wbs_context fit into 1024 bytes kmalloc. */ #define WB_MAX_INODES_PER_ISW ((1024UL - sizeof(struct inode_switch_wbs_context)) \ / sizeof(struct inode *)) static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); static struct workqueue_struct *isw_wq; void __inode_attach_wb(struct inode *inode, struct folio *folio) { struct backing_dev_info *bdi = inode_to_bdi(inode); struct bdi_writeback *wb = NULL; if (inode_cgwb_enabled(inode)) { struct cgroup_subsys_state *memcg_css; if (folio) { memcg_css = mem_cgroup_css_from_folio(folio); wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); } else { /* must pin memcg_css, see wb_get_create() */ memcg_css = task_get_css(current, memory_cgrp_id); wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); css_put(memcg_css); } } if (!wb) wb = &bdi->wb; /* * There may be multiple instances of this function racing to * update the same inode. Use cmpxchg() to tell the winner. */ if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) wb_put(wb); } EXPORT_SYMBOL_GPL(__inode_attach_wb); /** * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list * @inode: inode of interest with i_lock held * @wb: target bdi_writeback * * Remove the inode from wb's io lists and if necessarily put onto b_attached * list. Only inodes attached to cgwb's are kept on this list. */ static void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; if (wb != &wb->bdi->wb) list_move(&inode->i_io_list, &wb->b_attached); else list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); } /** * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it * @inode: inode of interest with i_lock held * * Returns @inode's wb with its list_lock held. @inode->i_lock must be * held on entry and is released on return. The returned wb is guaranteed * to stay @inode's associated wb until its list_lock is released. */ static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) __acquires(&wb->list_lock) { while (true) { struct bdi_writeback *wb = inode_to_wb(inode); /* * inode_to_wb() association is protected by both * @inode->i_lock and @wb->list_lock but list_lock nests * outside i_lock. Drop i_lock and verify that the * association hasn't changed after acquiring list_lock. */ wb_get(wb); spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock); /* i_wb may have changed inbetween, can't use inode_to_wb() */ if (likely(wb == inode->i_wb)) { wb_put(wb); /* @inode already has ref */ return wb; } spin_unlock(&wb->list_lock); wb_put(wb); cpu_relax(); spin_lock(&inode->i_lock); } } /** * inode_to_wb_and_lock_list - determine an inode's wb and lock it * @inode: inode of interest * * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held * on entry. */ static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode) __acquires(&wb->list_lock) { spin_lock(&inode->i_lock); return locked_inode_to_wb_and_lock_list(inode); } struct inode_switch_wbs_context { struct rcu_work work; /* * Multiple inodes can be switched at once. The switching procedure * consists of two parts, separated by a RCU grace period. To make * sure that the second part is executed for each inode gone through * the first part, all inode pointers are placed into a NULL-terminated * array embedded into struct inode_switch_wbs_context. Otherwise * an inode could be left in a non-consistent state. */ struct bdi_writeback *new_wb; struct inode *inodes[]; }; static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { down_write(&bdi->wb_switch_rwsem); } static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { up_write(&bdi->wb_switch_rwsem); } static bool inode_do_switch_wbs(struct inode *inode, struct bdi_writeback *old_wb, struct bdi_writeback *new_wb) { struct address_space *mapping = inode->i_mapping; XA_STATE(xas, &mapping->i_pages, 0); struct folio *folio; bool switched = false; spin_lock(&inode->i_lock); xa_lock_irq(&mapping->i_pages); /* * Once I_FREEING or I_WILL_FREE are visible under i_lock, the eviction * path owns the inode and we shouldn't modify ->i_io_list. */ if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE))) goto skip_switch; trace_inode_switch_wbs(inode, old_wb, new_wb); /* * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points * to possibly dirty folios while PAGECACHE_TAG_WRITEBACK points to * folios actually under writeback. */ xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_DIRTY) { if (folio_test_dirty(folio)) { long nr = folio_nr_pages(folio); wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr); wb_stat_mod(new_wb, WB_RECLAIMABLE, nr); } } xas_set(&xas, 0); xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { long nr = folio_nr_pages(folio); WARN_ON_ONCE(!folio_test_writeback(folio)); wb_stat_mod(old_wb, WB_WRITEBACK, -nr); wb_stat_mod(new_wb, WB_WRITEBACK, nr); } if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { atomic_dec(&old_wb->writeback_inodes); atomic_inc(&new_wb->writeback_inodes); } wb_get(new_wb); /* * Transfer to @new_wb's IO list if necessary. If the @inode is dirty, * the specific list @inode was on is ignored and the @inode is put on * ->b_dirty which is always correct including from ->b_dirty_time. * The transfer preserves @inode->dirtied_when ordering. If the @inode * was clean, it means it was on the b_attached list, so move it onto * the b_attached list of @new_wb. */ if (!list_empty(&inode->i_io_list)) { inode->i_wb = new_wb; if (inode->i_state & I_DIRTY_ALL) { struct inode *pos; list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) if (time_after_eq(inode->dirtied_when, pos->dirtied_when)) break; inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev); } else { inode_cgwb_move_to_attached(inode, new_wb); } } else { inode->i_wb = new_wb; } /* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */ inode->i_wb_frn_winner = 0; inode->i_wb_frn_avg_time = 0; inode->i_wb_frn_history = 0; switched = true; skip_switch: /* * Paired with load_acquire in unlocked_inode_to_wb_begin() and * ensures that the new wb is visible if they see !I_WB_SWITCH. */ smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); xa_unlock_irq(&mapping->i_pages); spin_unlock(&inode->i_lock); return switched; } static void inode_switch_wbs_work_fn(struct work_struct *work) { struct inode_switch_wbs_context *isw = container_of(to_rcu_work(work), struct inode_switch_wbs_context, work); struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]); struct bdi_writeback *old_wb = isw->inodes[0]->i_wb; struct bdi_writeback *new_wb = isw->new_wb; unsigned long nr_switched = 0; struct inode **inodep; /* * If @inode switches cgwb membership while sync_inodes_sb() is * being issued, sync_inodes_sb() might miss it. Synchronize. */ down_read(&bdi->wb_switch_rwsem); /* * By the time control reaches here, RCU grace period has passed * since I_WB_SWITCH assertion and all wb stat update transactions * between unlocked_inode_to_wb_begin/end() are guaranteed to be * synchronizing against the i_pages lock. * * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock * gives us exclusion against all wb related operations on @inode * including IO list manipulations and stat updates. */ if (old_wb < new_wb) { spin_lock(&old_wb->list_lock); spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING); } else { spin_lock(&new_wb->list_lock); spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); } for (inodep = isw->inodes; *inodep; inodep++) { WARN_ON_ONCE((*inodep)->i_wb != old_wb); if (inode_do_switch_wbs(*inodep, old_wb, new_wb)) nr_switched++; } spin_unlock(&new_wb->list_lock); spin_unlock(&old_wb->list_lock); up_read(&bdi->wb_switch_rwsem); if (nr_switched) { wb_wakeup(new_wb); wb_put_many(old_wb, nr_switched); } for (inodep = isw->inodes; *inodep; inodep++) iput(*inodep); wb_put(new_wb); kfree(isw); atomic_dec(&isw_nr_in_flight); } static bool inode_prepare_wbs_switch(struct inode *inode, struct bdi_writeback *new_wb) { /* * Paired with smp_mb() in cgroup_writeback_umount(). * isw_nr_in_flight must be increased before checking SB_ACTIVE and * grabbing an inode, otherwise isw_nr_in_flight can be observed as 0 * in cgroup_writeback_umount() and the isw_wq will be not flushed. */ smp_mb(); if (IS_DAX(inode)) return false; /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); if (!(inode->i_sb->s_flags & SB_ACTIVE) || inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) || inode_to_wb(inode) == new_wb) { spin_unlock(&inode->i_lock); return false; } inode->i_state |= I_WB_SWITCH; __iget(inode); spin_unlock(&inode->i_lock); return true; } /** * inode_switch_wbs - change the wb association of an inode * @inode: target inode * @new_wb_id: ID of the new wb * * Switch @inode's wb association to the wb identified by @new_wb_id. The * switching is performed asynchronously and may fail silently. */ static void inode_switch_wbs(struct inode *inode, int new_wb_id) { struct backing_dev_info *bdi = inode_to_bdi(inode); struct cgroup_subsys_state *memcg_css; struct inode_switch_wbs_context *isw; /* noop if seems to be already in progress */ if (inode->i_state & I_WB_SWITCH) return; /* avoid queueing a new switch if too many are already in flight */ if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT) return; isw = kzalloc(struct_size(isw, inodes, 2), GFP_ATOMIC); if (!isw) return; atomic_inc(&isw_nr_in_flight); /* find and pin the new wb */ rcu_read_lock(); memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys); if (memcg_css && !css_tryget(memcg_css)) memcg_css = NULL; rcu_read_unlock(); if (!memcg_css) goto out_free; isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); css_put(memcg_css); if (!isw->new_wb) goto out_free; if (!inode_prepare_wbs_switch(inode, isw->new_wb)) goto out_free; isw->inodes[0] = inode; /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the i_page * lock so that stat transfer can synchronize against them. * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); queue_rcu_work(isw_wq, &isw->work); return; out_free: atomic_dec(&isw_nr_in_flight); if (isw->new_wb) wb_put(isw->new_wb); kfree(isw); } static bool isw_prepare_wbs_switch(struct inode_switch_wbs_context *isw, struct list_head *list, int *nr) { struct inode *inode; list_for_each_entry(inode, list, i_io_list) { if (!inode_prepare_wbs_switch(inode, isw->new_wb)) continue; isw->inodes[*nr] = inode; (*nr)++; if (*nr >= WB_MAX_INODES_PER_ISW - 1) return true; } return false; } /** * cleanup_offline_cgwb - detach associated inodes * @wb: target wb * * Switch all inodes attached to @wb to a nearest living ancestor's wb in order * to eventually release the dying @wb. Returns %true if not all inodes were * switched and the function has to be restarted. */ bool cleanup_offline_cgwb(struct bdi_writeback *wb) { struct cgroup_subsys_state *memcg_css; struct inode_switch_wbs_context *isw; int nr; bool restart = false; isw = kzalloc(struct_size(isw, inodes, WB_MAX_INODES_PER_ISW), GFP_KERNEL); if (!isw) return restart; atomic_inc(&isw_nr_in_flight); for (memcg_css = wb->memcg_css->parent; memcg_css; memcg_css = memcg_css->parent) { isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL); if (isw->new_wb) break; } if (unlikely(!isw->new_wb)) isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */ nr = 0; spin_lock(&wb->list_lock); /* * In addition to the inodes that have completed writeback, also switch * cgwbs for those inodes only with dirty timestamps. Otherwise, those * inodes won't be written back for a long time when lazytime is * enabled, and thus pinning the dying cgwbs. It won't break the * bandwidth restrictions, as writeback of inode metadata is not * accounted for. */ restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr); if (!restart) restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr); spin_unlock(&wb->list_lock); /* no attached inodes? bail out */ if (nr == 0) { atomic_dec(&isw_nr_in_flight); wb_put(isw->new_wb); kfree(isw); return restart; } /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the i_page * lock so that stat transfer can synchronize against them. * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); queue_rcu_work(isw_wq, &isw->work); return restart; } /** * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it * @wbc: writeback_control of interest * @inode: target inode * * @inode is locked and about to be written back under the control of @wbc. * Record @inode's writeback context into @wbc and unlock the i_lock. On * writeback completion, wbc_detach_inode() should be called. This is used * to track the cgroup writeback context. */ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) { if (!inode_cgwb_enabled(inode)) { spin_unlock(&inode->i_lock); return; } wbc->wb = inode_to_wb(inode); wbc->inode = inode; wbc->wb_id = wbc->wb->memcg_css->id; wbc->wb_lcand_id = inode->i_wb_frn_winner; wbc->wb_tcand_id = 0; wbc->wb_bytes = 0; wbc->wb_lcand_bytes = 0; wbc->wb_tcand_bytes = 0; wb_get(wbc->wb); spin_unlock(&inode->i_lock); /* * A dying wb indicates that either the blkcg associated with the * memcg changed or the associated memcg is dying. In the first * case, a replacement wb should already be available and we should * refresh the wb immediately. In the second case, trying to * refresh will keep failing. */ if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) inode_switch_wbs(inode, wbc->wb_id); } EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode); /** * wbc_detach_inode - disassociate wbc from inode and perform foreign detection * @wbc: writeback_control of the just finished writeback * * To be called after a writeback attempt of an inode finishes and undoes * wbc_attach_and_unlock_inode(). Can be called under any context. * * As concurrent write sharing of an inode is expected to be very rare and * memcg only tracks page ownership on first-use basis severely confining * the usefulness of such sharing, cgroup writeback tracks ownership * per-inode. While the support for concurrent write sharing of an inode * is deemed unnecessary, an inode being written to by different cgroups at * different points in time is a lot more common, and, more importantly, * charging only by first-use can too readily lead to grossly incorrect * behaviors (single foreign page can lead to gigabytes of writeback to be * incorrectly attributed). * * To resolve this issue, cgroup writeback detects the majority dirtier of * an inode and transfers the ownership to it. To avoid unnecessary * oscillation, the detection mechanism keeps track of history and gives * out the switch verdict only if the foreign usage pattern is stable over * a certain amount of time and/or writeback attempts. * * On each writeback attempt, @wbc tries to detect the majority writer * using Boyer-Moore majority vote algorithm. In addition to the byte * count from the majority voting, it also counts the bytes written for the * current wb and the last round's winner wb (max of last round's current * wb, the winner from two rounds ago, and the last round's majority * candidate). Keeping track of the historical winner helps the algorithm * to semi-reliably detect the most active writer even when it's not the * absolute majority. * * Once the winner of the round is determined, whether the winner is * foreign or not and how much IO time the round consumed is recorded in * inode->i_wb_frn_history. If the amount of recorded foreign IO time is * over a certain threshold, the switch verdict is given. */ void wbc_detach_inode(struct writeback_control *wbc) { struct bdi_writeback *wb = wbc->wb; struct inode *inode = wbc->inode; unsigned long avg_time, max_bytes, max_time; u16 history; int max_id; if (!wb) return; history = inode->i_wb_frn_history; avg_time = inode->i_wb_frn_avg_time; /* pick the winner of this round */ if (wbc->wb_bytes >= wbc->wb_lcand_bytes && wbc->wb_bytes >= wbc->wb_tcand_bytes) { max_id = wbc->wb_id; max_bytes = wbc->wb_bytes; } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) { max_id = wbc->wb_lcand_id; max_bytes = wbc->wb_lcand_bytes; } else { max_id = wbc->wb_tcand_id; max_bytes = wbc->wb_tcand_bytes; } /* * Calculate the amount of IO time the winner consumed and fold it * into the running average kept per inode. If the consumed IO * time is lower than avag / WB_FRN_TIME_CUT_DIV, ignore it for * deciding whether to switch or not. This is to prevent one-off * small dirtiers from skewing the verdict. */ max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT, wb->avg_write_bandwidth); if (avg_time) avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) - (avg_time >> WB_FRN_TIME_AVG_SHIFT); else avg_time = max_time; /* immediate catch up on first run */ if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) { int slots; /* * The switch verdict is reached if foreign wb's consume * more than a certain proportion of IO time in a * WB_FRN_TIME_PERIOD. This is loosely tracked by 16 slot * history mask where each bit represents one sixteenth of * the period. Determine the number of slots to shift into * history from @max_time. */ slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT), (unsigned long)WB_FRN_HIST_MAX_SLOTS); history <<= slots; if (wbc->wb_id != max_id) history |= (1U << slots) - 1; if (history) trace_inode_foreign_history(inode, wbc, history); /* * Switch if the current wb isn't the consistent winner. * If there are multiple closely competing dirtiers, the * inode may switch across them repeatedly over time, which * is okay. The main goal is avoiding keeping an inode on * the wrong wb for an extended period of time. */ if (hweight16(history) > WB_FRN_HIST_THR_SLOTS) inode_switch_wbs(inode, max_id); } /* * Multiple instances of this function may race to update the * following fields but we don't mind occassional inaccuracies. */ inode->i_wb_frn_winner = max_id; inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX); inode->i_wb_frn_history = history; wb_put(wbc->wb); wbc->wb = NULL; } EXPORT_SYMBOL_GPL(wbc_detach_inode); /** * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership * @wbc: writeback_control of the writeback in progress * @page: page being written out * @bytes: number of bytes being written out * * @bytes from @page are about to written out during the writeback * controlled by @wbc. Keep the book for foreign inode detection. See * wbc_detach_inode(). */ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes) { struct folio *folio; struct cgroup_subsys_state *css; int id; /* * pageout() path doesn't attach @wbc to the inode being written * out. This is intentional as we don't want the function to block * behind a slow cgroup. Ultimately, we want pageout() to kick off * regular writeback instead of writing things out itself. */ if (!wbc->wb || wbc->no_cgroup_owner) return; folio = page_folio(page); css = mem_cgroup_css_from_folio(folio); /* dead cgroups shouldn't contribute to inode ownership arbitration */ if (!(css->flags & CSS_ONLINE)) return; id = css->id; if (id == wbc->wb_id) { wbc->wb_bytes += bytes; return; } if (id == wbc->wb_lcand_id) wbc->wb_lcand_bytes += bytes; /* Boyer-Moore majority vote algorithm */ if (!wbc->wb_tcand_bytes) wbc->wb_tcand_id = id; if (id == wbc->wb_tcand_id) wbc->wb_tcand_bytes += bytes; else wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); } EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner); /** * wb_split_bdi_pages - split nr_pages to write according to bandwidth * @wb: target bdi_writeback to split @nr_pages to * @nr_pages: number of pages to write for the whole bdi * * Split @wb's portion of @nr_pages according to @wb's write bandwidth in * relation to the total write bandwidth of all wb's w/ dirty inodes on * @wb->bdi. */ static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages) { unsigned long this_bw = wb->avg_write_bandwidth; unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); if (nr_pages == LONG_MAX) return LONG_MAX; /* * This may be called on clean wb's and proportional distribution * may not make sense, just use the original @nr_pages in those * cases. In general, we wanna err on the side of writing more. */ if (!tot_bw || this_bw >= tot_bw) return nr_pages; else return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw); } /** * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi * @bdi: target backing_dev_info * @base_work: wb_writeback_work to issue * @skip_if_busy: skip wb's which already have writeback in progress * * Split and issue @base_work to all wb's (bdi_writeback's) of @bdi which * have dirty inodes. If @base_work->nr_page isn't %LONG_MAX, it's * distributed to the busy wbs according to each wb's proportion in the * total active write bandwidth of @bdi. */ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, struct wb_writeback_work *base_work, bool skip_if_busy) { struct bdi_writeback *last_wb = NULL; struct bdi_writeback *wb = list_entry(&bdi->wb_list, struct bdi_writeback, bdi_node); might_sleep(); restart: rcu_read_lock(); list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) { DEFINE_WB_COMPLETION(fallback_work_done, bdi); struct wb_writeback_work fallback_work; struct wb_writeback_work *work; long nr_pages; if (last_wb) { wb_put(last_wb); last_wb = NULL; } /* SYNC_ALL writes out I_DIRTY_TIME too */ if (!wb_has_dirty_io(wb) && (base_work->sync_mode == WB_SYNC_NONE || list_empty(&wb->b_dirty_time))) continue; if (skip_if_busy && writeback_in_progress(wb)) continue; nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages); work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) { *work = *base_work; work->nr_pages = nr_pages; work->auto_free = 1; wb_queue_work(wb, work); continue; } /* * If wb_tryget fails, the wb has been shutdown, skip it. * * Pin @wb so that it stays on @bdi->wb_list. This allows * continuing iteration from @wb after dropping and * regrabbing rcu read lock. */ if (!wb_tryget(wb)) continue; /* alloc failed, execute synchronously using on-stack fallback */ work = &fallback_work; *work = *base_work; work->nr_pages = nr_pages; work->auto_free = 0; work->done = &fallback_work_done; wb_queue_work(wb, work); last_wb = wb; rcu_read_unlock(); wb_wait_for_completion(&fallback_work_done); goto restart; } rcu_read_unlock(); if (last_wb) wb_put(last_wb); } /** * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs * @bdi_id: target bdi id * @memcg_id: target memcg css id * @reason: reason why some writeback work initiated * @done: target wb_completion * * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id * with the specified parameters. */ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done) { struct backing_dev_info *bdi; struct cgroup_subsys_state *memcg_css; struct bdi_writeback *wb; struct wb_writeback_work *work; unsigned long dirty; int ret; /* lookup bdi and memcg */ bdi = bdi_get_by_id(bdi_id); if (!bdi) return -ENOENT; rcu_read_lock(); memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys); if (memcg_css && !css_tryget(memcg_css)) memcg_css = NULL; rcu_read_unlock(); if (!memcg_css) { ret = -ENOENT; goto out_bdi_put; } /* * And find the associated wb. If the wb isn't there already * there's nothing to flush, don't create one. */ wb = wb_get_lookup(bdi, memcg_css); if (!wb) { ret = -ENOENT; goto out_css_put; } /* * The caller is attempting to write out most of * the currently dirty pages. Let's take the current dirty page * count and inflate it by 25% which should be large enough to * flush out most dirty pages while avoiding getting livelocked by * concurrent dirtiers. * * BTW the memcg stats are flushed periodically and this is best-effort * estimation, so some potential error is ok. */ dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY); dirty = dirty * 10 / 8; /* issue the writeback work */ work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN); if (work) { work->nr_pages = dirty; work->sync_mode = WB_SYNC_NONE; work->range_cyclic = 1; work->reason = reason; work->done = done; work->auto_free = 1; wb_queue_work(wb, work); ret = 0; } else { ret = -ENOMEM; } wb_put(wb); out_css_put: css_put(memcg_css); out_bdi_put: bdi_put(bdi); return ret; } /** * cgroup_writeback_umount - flush inode wb switches for umount * @sb: target super_block * * This function is called when a super_block is about to be destroyed and * flushes in-flight inode wb switches. An inode wb switch goes through * RCU and then workqueue, so the two need to be flushed in order to ensure * that all previously scheduled switches are finished. As wb switches are * rare occurrences and synchronize_rcu() can take a while, perform * flushing iff wb switches are in flight. */ void cgroup_writeback_umount(struct super_block *sb) { if (!(sb->s_bdi->capabilities & BDI_CAP_WRITEBACK)) return; /* * SB_ACTIVE should be reliably cleared before checking * isw_nr_in_flight, see generic_shutdown_super(). */ smp_mb(); if (atomic_read(&isw_nr_in_flight)) { /* * Use rcu_barrier() to wait for all pending callbacks to * ensure that all in-flight wb switches are in the workqueue. */ rcu_barrier(); flush_workqueue(isw_wq); } } static int __init cgroup_writeback_init(void) { isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); if (!isw_wq) return -ENOMEM; return 0; } fs_initcall(cgroup_writeback_init); #else /* CONFIG_CGROUP_WRITEBACK */ static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } static void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); WARN_ON_ONCE(inode->i_state & I_FREEING); inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); } static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) __acquires(&wb->list_lock) { struct bdi_writeback *wb = inode_to_wb(inode); spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock); return wb; } static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode) __acquires(&wb->list_lock) { struct bdi_writeback *wb = inode_to_wb(inode); spin_lock(&wb->list_lock); return wb; } static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages) { return nr_pages; } static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, struct wb_writeback_work *base_work, bool skip_if_busy) { might_sleep(); if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { base_work->auto_free = 0; wb_queue_work(&bdi->wb, base_work); } } #endif /* CONFIG_CGROUP_WRITEBACK */ /* * Add in the number of potentially dirty inodes, because each inode * write can dirty pagecache in the underlying blockdev. */ static unsigned long get_nr_dirty_pages(void) { return global_node_page_state(NR_FILE_DIRTY) + get_nr_dirty_inodes(); } static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason) { if (!wb_has_dirty_io(wb)) return; /* * All callers of this function want to start writeback of all * dirty pages. Places like vmscan can call this at a very * high frequency, causing pointless allocations of tons of * work items and keeping the flusher threads busy retrieving * that work. Ensure that we only allow one of them pending and * inflight at the time. */ if (test_bit(WB_start_all, &wb->state) || test_and_set_bit(WB_start_all, &wb->state)) return; wb->start_all_reason = reason; wb_wakeup(wb); } /** * wb_start_background_writeback - start background writeback * @wb: bdi_writback to write from * * Description: * This makes sure WB_SYNC_NONE background writeback happens. When * this function returns, it is only guaranteed that for given wb * some IO is happening if we are over background dirty threshold. * Caller need not hold sb s_umount semaphore. */ void wb_start_background_writeback(struct bdi_writeback *wb) { /* * We just wake up the flusher thread. It will perform background * writeback as soon as there is no other work to do. */ trace_writeback_wake_background(wb); wb_wakeup(wb); } /* * Remove the inode from the writeback list it is on. */ void inode_io_list_del(struct inode *inode) { struct bdi_writeback *wb; wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); spin_unlock(&inode->i_lock); spin_unlock(&wb->list_lock); } EXPORT_SYMBOL(inode_io_list_del); /* * mark an inode as under writeback on the sb */ void sb_mark_inode_writeback(struct inode *inode) { struct super_block *sb = inode->i_sb; unsigned long flags; if (list_empty(&inode->i_wb_list)) { spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); if (list_empty(&inode->i_wb_list)) { list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb); trace_sb_mark_inode_writeback(inode); } spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); } } /* * clear an inode as under writeback on the sb */ void sb_clear_inode_writeback(struct inode *inode) { struct super_block *sb = inode->i_sb; unsigned long flags; if (!list_empty(&inode->i_wb_list)) { spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); if (!list_empty(&inode->i_wb_list)) { list_del_init(&inode->i_wb_list); trace_sb_clear_inode_writeback(inode); } spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); } } /* * Redirty an inode: set its when-it-was dirtied timestamp and move it to the * furthest end of its superblock's dirty-inode list. * * Before stamping the inode's ->dirtied_when, we check to see whether it is * already the most-recently-dirtied inode on the b_dirty list. If that is * the case then the inode must have been redirtied while it was being written * out and we don't reset its dirtied_when. */ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&inode->i_lock); inode->i_state &= ~I_SYNC_QUEUED; /* * When the inode is being freed just don't bother with dirty list * tracking. Flush worker will ignore this inode anyway and it will * trigger assertions in inode_io_list_move_locked(). */ if (inode->i_state & I_FREEING) { list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); return; } if (!list_empty(&wb->b_dirty)) { struct inode *tail; tail = wb_inode(wb->b_dirty.next); if (time_before(inode->dirtied_when, tail->dirtied_when)) inode->dirtied_when = jiffies; } inode_io_list_move_locked(inode, wb, &wb->b_dirty); } static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) { spin_lock(&inode->i_lock); redirty_tail_locked(inode, wb); spin_unlock(&inode->i_lock); } /* * requeue inode for re-scanning after bdi->b_io list is exhausted. */ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) { inode_io_list_move_locked(inode, wb, &wb->b_more_io); } static void inode_sync_complete(struct inode *inode) { assert_spin_locked(&inode->i_lock); inode->i_state &= ~I_SYNC; /* If inode is clean an unused, put it into LRU now... */ inode_add_lru(inode); /* Called with inode->i_lock which ensures memory ordering. */ inode_wake_up_bit(inode, __I_SYNC); } static bool inode_dirtied_after(struct inode *inode, unsigned long t) { bool ret = time_after(inode->dirtied_when, t); #ifndef CONFIG_64BIT /* * For inodes being constantly redirtied, dirtied_when can get stuck. * It _appears_ to be in the future, but is actually in distant past. * This test is necessary to prevent such wrapped-around relative times * from permanently stopping the whole bdi writeback. */ ret = ret && time_before_eq(inode->dirtied_when, jiffies); #endif return ret; } /* * Move expired (dirtied before dirtied_before) dirty inodes from * @delaying_queue to @dispatch_queue. */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, unsigned long dirtied_before) { LIST_HEAD(tmp); struct list_head *pos, *node; struct super_block *sb = NULL; struct inode *inode; int do_sb_sort = 0; int moved = 0; while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); if (inode_dirtied_after(inode, dirtied_before)) break; spin_lock(&inode->i_lock); list_move(&inode->i_io_list, &tmp); moved++; inode->i_state |= I_SYNC_QUEUED; spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) continue; if (sb && sb != inode->i_sb) do_sb_sort = 1; sb = inode->i_sb; } /* just one sb in list, splice to dispatch_queue and we're done */ if (!do_sb_sort) { list_splice(&tmp, dispatch_queue); goto out; } /* * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue', * we don't take inode->i_lock here because it is just a pointless overhead. * Inode is already marked as I_SYNC_QUEUED so writeback list handling is * fully under our control. */ while (!list_empty(&tmp)) { sb = wb_inode(tmp.prev)->i_sb; list_for_each_prev_safe(pos, node, &tmp) { inode = wb_inode(pos); if (inode->i_sb == sb) list_move(&inode->i_io_list, dispatch_queue); } } out: return moved; } /* * Queue all expired dirty inodes for io, eldest first. * Before * newly dirtied b_dirty b_io b_more_io * =============> gf edc BA * After * newly dirtied b_dirty b_io b_more_io * =============> g fBAedc * | * +--> dequeue for IO */ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work, unsigned long dirtied_before) { int moved; unsigned long time_expire_jif = dirtied_before; assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before); if (!work->for_sync) time_expire_jif = jiffies - dirtytime_expire_interval * HZ; moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, time_expire_jif); if (moved) wb_io_lists_populated(wb); trace_writeback_queue_io(wb, work, dirtied_before, moved); } static int write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { trace_writeback_write_inode_start(inode, wbc); ret = inode->i_sb->s_op->write_inode(inode, wbc); trace_writeback_write_inode(inode, wbc); return ret; } return 0; } /* * Wait for writeback on an inode to complete. Called with i_lock held. * Caller must make sure inode cannot go away when we drop i_lock. */ void inode_wait_for_writeback(struct inode *inode) { struct wait_bit_queue_entry wqe; struct wait_queue_head *wq_head; assert_spin_locked(&inode->i_lock); if (!(inode->i_state & I_SYNC)) return; wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC); for (;;) { prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */ if (!(inode->i_state & I_SYNC)) break; spin_unlock(&inode->i_lock); schedule(); spin_lock(&inode->i_lock); } finish_wait(wq_head, &wqe.wq_entry); } /* * Sleep until I_SYNC is cleared. This function must be called with i_lock * held and drops it. It is aimed for callers not holding any inode reference * so once i_lock is dropped, inode can go away. */ static void inode_sleep_on_writeback(struct inode *inode) __releases(inode->i_lock) { struct wait_bit_queue_entry wqe; struct wait_queue_head *wq_head; bool sleep; assert_spin_locked(&inode->i_lock); wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC); prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */ sleep = !!(inode->i_state & I_SYNC); spin_unlock(&inode->i_lock); if (sleep) schedule(); finish_wait(wq_head, &wqe.wq_entry); } /* * Find proper writeback list for the inode depending on its current state and * possibly also change of its state while we were doing writeback. Here we * handle things such as livelock prevention or fairness of writeback among * inodes. This function can be called only by flusher thread - noone else * processes all inodes in writeback lists and requeueing inodes behind flusher * thread's back can have unexpected consequences. */ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, struct writeback_control *wbc, unsigned long dirtied_before) { if (inode->i_state & I_FREEING) return; /* * Sync livelock prevention. Each inode is tagged and synced in one * shot. If still dirty, it will be redirty_tail()'ed below. Update * the dirty time to prevent enqueue and sync it again. */ if ((inode->i_state & I_DIRTY) && (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) inode->dirtied_when = jiffies; if (wbc->pages_skipped) { /* * Writeback is not making progress due to locked buffers. * Skip this inode for now. Although having skipped pages * is odd for clean inodes, it can happen for some * filesystems so handle that gracefully. */ if (inode->i_state & I_DIRTY_ALL) redirty_tail_locked(inode, wb); else inode_cgwb_move_to_attached(inode, wb); return; } if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() * sometimes bales out without doing anything. */ if (wbc->nr_to_write <= 0 && !inode_dirtied_after(inode, dirtied_before)) { /* Slice used up. Queue for next turn. */ requeue_io(inode, wb); } else { /* * Writeback blocked by something other than * congestion. Delay the inode for some time to * avoid spinning on the CPU (100% iowait) * retrying writeback of the dirty page/inode * that cannot be performed immediately. */ redirty_tail_locked(inode, wb); } } else if (inode->i_state & I_DIRTY) { /* * Filesystems can dirty the inode during writeback operations, * such as delayed allocation during submission or metadata * updates after data IO completion. */ redirty_tail_locked(inode, wb); } else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); inode->i_state &= ~I_SYNC_QUEUED; } else { /* The inode is clean. Remove from writeback lists. */ inode_cgwb_move_to_attached(inode, wb); } } /* * Write out an inode and its dirty pages (or some of its dirty pages, depending * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state. * * This doesn't remove the inode from the writeback list it is on, except * potentially to move it from b_dirty_time to b_dirty due to timestamp * expiration. The caller is otherwise responsible for writeback list handling. * * The caller is also responsible for setting the I_SYNC flag beforehand and * calling inode_sync_complete() to clear it afterwards. */ static int __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; long nr_to_write = wbc->nr_to_write; unsigned dirty; int ret; WARN_ON(!(inode->i_state & I_SYNC)); trace_writeback_single_inode_start(inode, wbc, nr_to_write); ret = do_writepages(mapping, wbc); /* * Make sure to wait on the data before writing out the metadata. * This is important for filesystems that modify metadata on data * I/O completion. We don't do it for sync(2) writeback because it has a * separate, external IO completion path and ->sync_fs for guaranteeing * inode metadata is written back correctly. */ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; } /* * If the inode has dirty timestamps and we need to write them, call * mark_inode_dirty_sync() to notify the filesystem about it and to * change I_DIRTY_TIME into I_DIRTY_SYNC. */ if ((inode->i_state & I_DIRTY_TIME) && (wbc->sync_mode == WB_SYNC_ALL || time_after(jiffies, inode->dirtied_time_when + dirtytime_expire_interval * HZ))) { trace_writeback_lazytime(inode); mark_inode_dirty_sync(inode); } /* * Get and clear the dirty flags from i_state. This needs to be done * after calling writepages because some filesystems may redirty the * inode during writepages due to delalloc. It also needs to be done * after handling timestamp expiration, as that may dirty the inode too. */ spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; inode->i_state &= ~dirty; /* * Paired with smp_mb() in __mark_inode_dirty(). This allows * __mark_inode_dirty() to test i_state without grabbing i_lock - * either they see the I_DIRTY bits cleared or we see the dirtied * inode. * * I_DIRTY_PAGES is always cleared together above even if @mapping * still has dirty pages. The flag is reinstated after smp_mb() if * necessary. This guarantees that either __mark_inode_dirty() * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY. */ smp_mb(); if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) inode->i_state |= I_DIRTY_PAGES; else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) { if (!(inode->i_state & I_DIRTY_PAGES)) { inode->i_state &= ~I_PINNING_NETFS_WB; wbc->unpinned_netfs_wb = true; dirty |= I_PINNING_NETFS_WB; /* Cause write_inode */ } } spin_unlock(&inode->i_lock); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & ~I_DIRTY_PAGES) { int err = write_inode(inode, wbc); if (ret == 0) ret = err; } wbc->unpinned_netfs_wb = false; trace_writeback_single_inode(inode, wbc, nr_to_write); return ret; } /* * Write out an inode's dirty data and metadata on-demand, i.e. separately from * the regular batched writeback done by the flusher threads in * writeback_sb_inodes(). @wbc controls various aspects of the write, such as * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE). * * To prevent the inode from going away, either the caller must have a reference * to the inode, or the inode must have I_WILL_FREE or I_FREEING set. */ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct bdi_writeback *wb; int ret = 0; spin_lock(&inode->i_lock); if (!atomic_read(&inode->i_count)) WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); else WARN_ON(inode->i_state & I_WILL_FREE); if (inode->i_state & I_SYNC) { /* * Writeback is already running on the inode. For WB_SYNC_NONE, * that's enough and we can just return. For WB_SYNC_ALL, we * must wait for the existing writeback to complete, then do * writeback again if there's anything left. */ if (wbc->sync_mode != WB_SYNC_ALL) goto out; inode_wait_for_writeback(inode); } WARN_ON(inode->i_state & I_SYNC); /* * If the inode is already fully clean, then there's nothing to do. * * For data-integrity syncs we also need to check whether any pages are * still under writeback, e.g. due to prior WB_SYNC_NONE writeback. If * there are any such pages, we'll need to wait for them. */ if (!(inode->i_state & I_DIRTY_ALL) && (wbc->sync_mode != WB_SYNC_ALL || !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) goto out; inode->i_state |= I_SYNC; wbc_attach_and_unlock_inode(wbc, inode); ret = __writeback_single_inode(inode, wbc); wbc_detach_inode(wbc); wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* * If the inode is freeing, its i_io_list shoudn't be updated * as it can be finally deleted at this moment. */ if (!(inode->i_state & I_FREEING)) { /* * If the inode is now fully clean, then it can be safely * removed from its writeback list (if any). Otherwise the * flusher threads are responsible for the writeback lists. */ if (!(inode->i_state & I_DIRTY_ALL)) inode_cgwb_move_to_attached(inode, wb); else if (!(inode->i_state & I_SYNC_QUEUED)) { if ((inode->i_state & I_DIRTY)) redirty_tail_locked(inode, wb); else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); } } } spin_unlock(&wb->list_lock); inode_sync_complete(inode); out: spin_unlock(&inode->i_lock); return ret; } static long writeback_chunk_size(struct bdi_writeback *wb, struct wb_writeback_work *work) { long pages; /* * WB_SYNC_ALL mode does livelock avoidance by syncing dirty * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX * here avoids calling into writeback_inodes_wb() more than once. * * The intended call sequence for WB_SYNC_ALL writeback is: * * wb_writeback() * writeback_sb_inodes() <== called only once * write_cache_pages() <== called once for each inode * (quickly) tag currently dirty pages * (maybe slowly) sync all tagged pages */ if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) pages = LONG_MAX; else { pages = min(wb->avg_write_bandwidth / 2, global_wb_domain.dirty_limit / DIRTY_SCOPE); pages = min(pages, work->nr_pages); pages = round_down(pages + MIN_WRITEBACK_PAGES, MIN_WRITEBACK_PAGES); } return pages; } /* * Write a portion of b_io inodes which belong to @sb. * * Return the number of pages and/or inodes written. * * NOTE! This is called with wb->list_lock held, and will * unlock and relock that for each inode it ends up doing * IO for. */ static long writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, struct wb_writeback_work *work) { struct writeback_control wbc = { .sync_mode = work->sync_mode, .tagged_writepages = work->tagged_writepages, .for_kupdate = work->for_kupdate, .for_background = work->for_background, .for_sync = work->for_sync, .range_cyclic = work->range_cyclic, .range_start = 0, .range_end = LLONG_MAX, }; unsigned long start_time = jiffies; long write_chunk; long total_wrote = 0; /* count both pages and inodes */ unsigned long dirtied_before = jiffies; if (work->for_kupdate) dirtied_before = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); struct bdi_writeback *tmp_wb; long wrote; if (inode->i_sb != sb) { if (work->sb) { /* * We only want to write back data for this * superblock, move all inodes not belonging * to it back onto the dirty list. */ redirty_tail(inode, wb); continue; } /* * The inode belongs to a different superblock. * Bounce back to the caller to unpin this and * pin the next superblock. */ break; } /* * Don't bother with new inodes or inodes being freed, first * kind does not need periodic writeout yet, and for the latter * kind writeout is handled by the freer. */ spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { redirty_tail_locked(inode, wb); spin_unlock(&inode->i_lock); continue; } if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { /* * If this inode is locked for writeback and we are not * doing writeback-for-data-integrity, move it to * b_more_io so that writeback can proceed with the * other inodes on s_io. * * We'll have another go at writing back this inode * when we completed a full scan of b_io. */ requeue_io(inode, wb); spin_unlock(&inode->i_lock); trace_writeback_sb_inodes_requeue(inode); continue; } spin_unlock(&wb->list_lock); /* * We already requeued the inode if it had I_SYNC set and we * are doing WB_SYNC_NONE writeback. So this catches only the * WB_SYNC_ALL case. */ if (inode->i_state & I_SYNC) { /* Wait for I_SYNC. This function drops i_lock... */ inode_sleep_on_writeback(inode); /* Inode may be gone, start again */ spin_lock(&wb->list_lock); continue; } inode->i_state |= I_SYNC; wbc_attach_and_unlock_inode(&wbc, inode); write_chunk = writeback_chunk_size(wb, work); wbc.nr_to_write = write_chunk; wbc.pages_skipped = 0; /* * We use I_SYNC to pin the inode in memory. While it is set * evict_inode() will wait so the inode cannot be freed. */ __writeback_single_inode(inode, &wbc); wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; wrote = wrote < 0 ? 0 : wrote; total_wrote += wrote; if (need_resched()) { /* * We're trying to balance between building up a nice * long list of IOs to improve our merge rate, and * getting those IOs out quickly for anyone throttling * in balance_dirty_pages(). cond_resched() doesn't * unplug, so get our IOs out the door before we * give up the CPU. */ blk_flush_plug(current->plug, false); cond_resched(); } /* * Requeue @inode if still dirty. Be careful as @inode may * have been switched to another wb in the meantime. */ tmp_wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) total_wrote++; requeue_inode(inode, tmp_wb, &wbc, dirtied_before); inode_sync_complete(inode); spin_unlock(&inode->i_lock); if (unlikely(tmp_wb != wb)) { spin_unlock(&tmp_wb->list_lock); spin_lock(&wb->list_lock); } /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. */ if (total_wrote) { if (time_is_before_jiffies(start_time + HZ / 10UL)) break; if (work->nr_pages <= 0) break; } } return total_wrote; } static long __writeback_inodes_wb(struct bdi_writeback *wb, struct wb_writeback_work *work) { unsigned long start_time = jiffies; long wrote = 0; while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); struct super_block *sb = inode->i_sb; if (!super_trylock_shared(sb)) { /* * super_trylock_shared() may fail consistently due to * s_umount being grabbed by someone else. Don't use * requeue_io() to avoid busy retrying the inode/sb. */ redirty_tail(inode, wb); continue; } wrote += writeback_sb_inodes(sb, wb, work); up_read(&sb->s_umount); /* refer to the same tests at the end of writeback_sb_inodes */ if (wrote) { if (time_is_before_jiffies(start_time + HZ / 10UL)) break; if (work->nr_pages <= 0) break; } } /* Leave any unwritten inodes on b_io */ return wrote; } static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason) { struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = reason, }; struct blk_plug plug; blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) queue_io(wb, &work, jiffies); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); blk_finish_plug(&plug); return nr_pages - work.nr_pages; } /* * Explicit flushing or periodic writeback of "old" data. * * Define "old": the first time one of an inode's pages is dirtied, we mark the * dirtying-time in the inode's address_space. So this periodic writeback code * just walks the superblock inode list, writing back any inodes which are * older than a specific point in time. * * Try to run once per dirty_writeback_interval. But if a writeback event * takes longer than a dirty_writeback_interval interval, then leave a * one-second gap. * * dirtied_before takes precedence over nr_to_write. So we'll only write back * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, struct wb_writeback_work *work) { long nr_pages = work->nr_pages; unsigned long dirtied_before = jiffies; struct inode *inode; long progress; struct blk_plug plug; bool queued = false; blk_start_plug(&plug); for (;;) { /* * Stop writeback when nr_pages has been consumed */ if (work->nr_pages <= 0) break; /* * Background writeout and kupdate-style writeback may * run forever. Stop them if there is other work to do * so that e.g. sync can proceed. They'll be restarted * after the other works are all done. */ if ((work->for_background || work->for_kupdate) && !list_empty(&wb->work_list)) break; /* * For background writeout, stop when we are below the * background dirty threshold */ if (work->for_background && !wb_over_bg_thresh(wb)) break; spin_lock(&wb->list_lock); trace_writeback_start(wb, work); if (list_empty(&wb->b_io)) { /* * Kupdate and background works are special and we want * to include all inodes that need writing. Livelock * avoidance is handled by these works yielding to any * other work so we are safe. */ if (work->for_kupdate) { dirtied_before = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) dirtied_before = jiffies; queue_io(wb, work, dirtied_before); queued = true; } if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else progress = __writeback_inodes_wb(wb, work); trace_writeback_written(wb, work); /* * Did we write something? Try for more * * Dirty inodes are moved to b_io for writeback in batches. * The completion of the current batch does not necessarily * mean the overall work is done. So we keep looping as long * as made some progress on cleaning pages or inodes. */ if (progress || !queued) { spin_unlock(&wb->list_lock); continue; } /* * No more inodes for IO, bail */ if (list_empty(&wb->b_more_io)) { spin_unlock(&wb->list_lock); break; } /* * Nothing written. Wait for some inode to * become available for writeback. Otherwise * we'll just busyloop. */ trace_writeback_wait(wb, work); inode = wb_inode(wb->b_more_io.prev); spin_lock(&inode->i_lock); spin_unlock(&wb->list_lock); /* This function drops i_lock... */ inode_sleep_on_writeback(inode); } blk_finish_plug(&plug); return nr_pages - work->nr_pages; } /* * Return the next wb_writeback_work struct that hasn't been processed yet. */ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) { struct wb_writeback_work *work = NULL; spin_lock_irq(&wb->work_lock); if (!list_empty(&wb->work_list)) { work = list_entry(wb->work_list.next, struct wb_writeback_work, list); list_del_init(&work->list); } spin_unlock_irq(&wb->work_lock); return work; } static long wb_check_background_flush(struct bdi_writeback *wb) { if (wb_over_bg_thresh(wb)) { struct wb_writeback_work work = { .nr_pages = LONG_MAX, .sync_mode = WB_SYNC_NONE, .for_background = 1, .range_cyclic = 1, .reason = WB_REASON_BACKGROUND, }; return wb_writeback(wb, &work); } return 0; } static long wb_check_old_data_flush(struct bdi_writeback *wb) { unsigned long expired; long nr_pages; /* * When set to zero, disable periodic writeback */ if (!dirty_writeback_interval) return 0; expired = wb->last_old_flush + msecs_to_jiffies(dirty_writeback_interval * 10); if (time_before(jiffies, expired)) return 0; wb->last_old_flush = jiffies; nr_pages = get_nr_dirty_pages(); if (nr_pages) { struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .for_kupdate = 1, .range_cyclic = 1, .reason = WB_REASON_PERIODIC, }; return wb_writeback(wb, &work); } return 0; } static long wb_check_start_all(struct bdi_writeback *wb) { long nr_pages; if (!test_bit(WB_start_all, &wb->state)) return 0; nr_pages = get_nr_dirty_pages(); if (nr_pages) { struct wb_writeback_work work = { .nr_pages = wb_split_bdi_pages(wb, nr_pages), .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = wb->start_all_reason, }; nr_pages = wb_writeback(wb, &work); } clear_bit(WB_start_all, &wb->state); return nr_pages; } /* * Retrieve work items and do the writeback they describe */ static long wb_do_writeback(struct bdi_writeback *wb) { struct wb_writeback_work *work; long wrote = 0; set_bit(WB_writeback_running, &wb->state); while ((work = get_next_work_item(wb)) != NULL) { trace_writeback_exec(wb, work); wrote += wb_writeback(wb, work); finish_writeback_work(work); } /* * Check for a flush-everything request */ wrote += wb_check_start_all(wb); /* * Check for periodic writeback, kupdated() style */ wrote += wb_check_old_data_flush(wb); wrote += wb_check_background_flush(wb); clear_bit(WB_writeback_running, &wb->state); return wrote; } /* * Handle writeback of dirty data for the device backed by this bdi. Also * reschedules periodically and does kupdated style flushing. */ void wb_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(to_delayed_work(work), struct bdi_writeback, dwork); long pages_written; set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); if (likely(!current_is_workqueue_rescuer() || !test_bit(WB_registered, &wb->state))) { /* * The normal path. Keep writing back @wb until its * work_list is empty. Note that this path is also taken * if @wb is shutting down even when we're running off the * rescuer as work_list needs to be drained. */ do { pages_written = wb_do_writeback(wb); trace_writeback_pages_written(pages_written); } while (!list_empty(&wb->work_list)); } else { /* * bdi_wq can't get enough workers and we're running off * the emergency worker. Don't hog it. Hopefully, 1024 is * enough for efficient IO. */ pages_written = writeback_inodes_wb(wb, 1024, WB_REASON_FORKER_THREAD); trace_writeback_pages_written(pages_written); } if (!list_empty(&wb->work_list)) wb_wakeup(wb); else if (wb_has_dirty_io(wb) && dirty_writeback_interval) wb_wakeup_delayed(wb); } /* * Start writeback of all dirty pages on this bdi. */ static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason) { struct bdi_writeback *wb; if (!bdi_has_dirty_io(bdi)) return; list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) wb_start_writeback(wb, reason); } void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason) { rcu_read_lock(); __wakeup_flusher_threads_bdi(bdi, reason); rcu_read_unlock(); } /* * Wakeup the flusher threads to start writeback of all currently dirty pages */ void wakeup_flusher_threads(enum wb_reason reason) { struct backing_dev_info *bdi; /* * If we are expecting writeback progress we must submit plugged IO. */ blk_flush_plug(current->plug, true); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) __wakeup_flusher_threads_bdi(bdi, reason); rcu_read_unlock(); } /* * Wake up bdi's periodically to make sure dirtytime inodes gets * written back periodically. We deliberately do *not* check the * b_dirtytime list in wb_has_dirty_io(), since this would cause the * kernel to be constantly waking up once there are any dirtytime * inodes on the system. So instead we define a separate delayed work * function which gets called much more rarely. (By default, only * once every 12 hours.) * * If there is any other write activity going on in the file system, * this function won't be necessary. But if the only thing that has * happened on the file system is a dirtytime inode caused by an atime * update, we need this infrastructure below to make sure that inode * eventually gets pushed out to disk. */ static void wakeup_dirtytime_writeback(struct work_struct *w); static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); static void wakeup_dirtytime_writeback(struct work_struct *w) { struct backing_dev_info *bdi; rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { struct bdi_writeback *wb; list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) if (!list_empty(&wb->b_dirty_time)) wb_wakeup(wb); } rcu_read_unlock(); schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); } static int __init start_dirtytime_writeback(void) { schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); return 0; } __initcall(start_dirtytime_writeback); int dirtytime_interval_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) mod_delayed_work(system_wq, &dirtytime_work, 0); return ret; } /** * __mark_inode_dirty - internal function to mark an inode dirty * * @inode: inode to mark * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined * with I_DIRTY_PAGES. * * Mark an inode as dirty. We notify the filesystem, then update the inode's * dirty flags. Then, if needed we add the inode to the appropriate dirty list. * * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync() * instead of calling this directly. * * CAREFUL! We only add the inode to the dirty list if it is hashed or if it * refers to a blockdev. Unhashed inodes will never be added to the dirty list * even if they are later hashed, as they will have been marked dirty already. * * In short, ensure you hash any inodes _before_ you start marking them dirty. * * Note that for blockdevs, inode->dirtied_when represents the dirtying time of * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of * the kernel-internal blockdev inode represents the dirtying time of the * blockdev's pages. This is why for I_DIRTY_PAGES we always use * page->mapping->host, so the page-dirtying time is recorded in the internal * blockdev inode. */ void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; int dirtytime = 0; struct bdi_writeback *wb = NULL; trace_writeback_mark_inode_dirty(inode, flags); if (flags & I_DIRTY_INODE) { /* * Inode timestamp update will piggback on this dirtying. * We tell ->dirty_inode callback that timestamps need to * be updated by setting I_DIRTY_TIME in flags. */ if (inode->i_state & I_DIRTY_TIME) { spin_lock(&inode->i_lock); if (inode->i_state & I_DIRTY_TIME) { inode->i_state &= ~I_DIRTY_TIME; flags |= I_DIRTY_TIME; } spin_unlock(&inode->i_lock); } /* * Notify the filesystem about the inode being dirtied, so that * (if needed) it can update on-disk fields and journal the * inode. This is only needed when the inode itself is being * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not * for just I_DIRTY_PAGES or I_DIRTY_TIME. */ trace_writeback_dirty_inode_start(inode, flags); if (sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode, flags & (I_DIRTY_INODE | I_DIRTY_TIME)); trace_writeback_dirty_inode(inode, flags); /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ flags &= ~I_DIRTY_TIME; } else { /* * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing. * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME * in one call to __mark_inode_dirty().) */ dirtytime = flags & I_DIRTY_TIME; WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME); } /* * Paired with smp_mb() in __writeback_single_inode() for the * following lockless i_state test. See there for details. */ smp_mb(); if ((inode->i_state & flags) == flags) return; spin_lock(&inode->i_lock); if ((inode->i_state & flags) != flags) { const int was_dirty = inode->i_state & I_DIRTY; inode_attach_wb(inode, NULL); inode->i_state |= flags; /* * Grab inode's wb early because it requires dropping i_lock and we * need to make sure following checks happen atomically with dirty * list handling so that we don't move inodes under flush worker's * hands. */ if (!was_dirty) { wb = locked_inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); } /* * If the inode is queued for writeback by flush worker, just * update its dirty state. Once the flush worker is done with * the inode it will place it on the appropriate superblock * list, based upon its state. */ if (inode->i_state & I_SYNC_QUEUED) goto out_unlock; /* * Only add valid (hashed) inodes to the superblock's * dirty list. Add blockdev inodes as well. */ if (!S_ISBLK(inode->i_mode)) { if (inode_unhashed(inode)) goto out_unlock; } if (inode->i_state & I_FREEING) goto out_unlock; /* * If the inode was already on b_dirty/b_io/b_more_io, don't * reposition it (that would break b_dirty time-ordering). */ if (!was_dirty) { struct list_head *dirty_list; bool wakeup_bdi = false; inode->dirtied_when = jiffies; if (dirtytime) inode->dirtied_time_when = jiffies; if (inode->i_state & I_DIRTY) dirty_list = &wb->b_dirty; else dirty_list = &wb->b_dirty_time; wakeup_bdi = inode_io_list_move_locked(inode, wb, dirty_list); spin_unlock(&wb->list_lock); spin_unlock(&inode->i_lock); trace_writeback_dirty_inode_enqueue(inode); /* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread * to make sure background write-back happens * later. */ if (wakeup_bdi && (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) wb_wakeup_delayed(wb); return; } } out_unlock: if (wb) spin_unlock(&wb->list_lock); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(__mark_inode_dirty); /* * The @s_sync_lock is used to serialise concurrent sync operations * to avoid lock contention problems with concurrent wait_sb_inodes() calls. * Concurrent callers will block on the s_sync_lock rather than doing contending * walks. The queueing maintains sync(2) required behaviour as all the IO that * has been issued up to the time this function is enter is guaranteed to be * completed by the time we have gained the lock and waited for all IO that is * in progress regardless of the order callers are granted the lock. */ static void wait_sb_inodes(struct super_block *sb) { LIST_HEAD(sync_list); /* * We need to be protected against the filesystem going from * r/o to r/w or vice versa. */ WARN_ON(!rwsem_is_locked(&sb->s_umount)); mutex_lock(&sb->s_sync_lock); /* * Splice the writeback list onto a temporary list to avoid waiting on * inodes that have started writeback after this point. * * Use rcu_read_lock() to keep the inodes around until we have a * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as * the local list because inodes can be dropped from either by writeback * completion. */ rcu_read_lock(); spin_lock_irq(&sb->s_inode_wblist_lock); list_splice_init(&sb->s_inodes_wb, &sync_list); /* * Data integrity sync. Must wait for all pages under writeback, because * there may have been pages dirtied before our sync call, but which had * writeout started before we write it out. In which case, the inode * may not be on the dirty list, but we still have to wait for that * writeout. */ while (!list_empty(&sync_list)) { struct inode *inode = list_first_entry(&sync_list, struct inode, i_wb_list); struct address_space *mapping = inode->i_mapping; /* * Move each inode back to the wb list before we drop the lock * to preserve consistency between i_wb_list and the mapping * writeback tag. Writeback completion is responsible to remove * the inode from either list once the writeback tag is cleared. */ list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb); /* * The mapping can appear untagged while still on-list since we * do not have the mapping lock. Skip it here, wb completion * will remove it. */ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) continue; spin_unlock_irq(&sb->s_inode_wblist_lock); spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { spin_unlock(&inode->i_lock); spin_lock_irq(&sb->s_inode_wblist_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); rcu_read_unlock(); /* * We keep the error status of individual mapping so that * applications can catch the writeback error using fsync(2). * See filemap_fdatawait_keep_errors() for details. */ filemap_fdatawait_keep_errors(mapping); cond_resched(); iput(inode); rcu_read_lock(); spin_lock_irq(&sb->s_inode_wblist_lock); } spin_unlock_irq(&sb->s_inode_wblist_lock); rcu_read_unlock(); mutex_unlock(&sb->s_sync_lock); } static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, enum wb_reason reason, bool skip_if_busy) { struct backing_dev_info *bdi = sb->s_bdi; DEFINE_WB_COMPLETION(done, bdi); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_NONE, .tagged_writepages = 1, .done = &done, .nr_pages = nr, .reason = reason, }; if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info) return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy); wb_wait_for_completion(&done); } /** * writeback_inodes_sb_nr - writeback dirty inodes from given super_block * @sb: the superblock * @nr: the number of pages to write * @reason: reason why some writeback work initiated * * Start writeback on some inodes on this super_block. No guarantees are made * on how many (if any) will be written, and this function does not wait * for IO completion of submitted IO. */ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, enum wb_reason reason) { __writeback_inodes_sb_nr(sb, nr, reason, false); } EXPORT_SYMBOL(writeback_inodes_sb_nr); /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock * @reason: reason why some writeback work was initiated * * Start writeback on some inodes on this super_block. No guarantees are made * on how many (if any) will be written, and this function does not wait * for IO completion of submitted IO. */ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); } EXPORT_SYMBOL(writeback_inodes_sb); /** * try_to_writeback_inodes_sb - try to start writeback if none underway * @sb: the superblock * @reason: reason why some writeback work was initiated * * Invoke __writeback_inodes_sb_nr if no writeback is currently underway. */ void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { if (!down_read_trylock(&sb->s_umount)) return; __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true); up_read(&sb->s_umount); } EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages * @sb: the superblock * * This function writes and waits on any dirty inode belonging to this * super_block. */ void sync_inodes_sb(struct super_block *sb) { struct backing_dev_info *bdi = sb->s_bdi; DEFINE_WB_COMPLETION(done, bdi); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, .for_sync = 1, }; /* * Can't skip on !bdi_has_dirty() because we should wait for !dirty * inodes under writeback and I_DIRTY_TIME inodes ignored by * bdi_has_dirty() need to be written out too. */ if (bdi == &noop_backing_dev_info) return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ bdi_down_write_wb_switch_rwsem(bdi); bdi_split_work_to_wbs(bdi, &work, false); wb_wait_for_completion(&done); bdi_up_write_wb_switch_rwsem(bdi); wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); /** * write_inode_now - write an inode to disk * @inode: inode to write to disk * @sync: whether the write should be synchronous or not * * This function commits an inode to disk immediately if it is dirty. This is * primarily needed by knfsd. * * The caller must either have a ref on the inode or must have set I_WILL_FREE. */ int write_inode_now(struct inode *inode, int sync) { struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, .range_start = 0, .range_end = LLONG_MAX, }; if (!mapping_can_writeback(inode->i_mapping)) wbc.nr_to_write = 0; might_sleep(); return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(write_inode_now); /** * sync_inode_metadata - write an inode to disk * @inode: the inode to sync * @wait: wait for I/O to complete. * * Write an inode to disk and adjust its dirty state after completion. * * Note: only writes the actual inode, no associated data or other metadata. */ int sync_inode_metadata(struct inode *inode, int wait) { struct writeback_control wbc = { .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, .nr_to_write = 0, /* metadata-only */ }; return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(sync_inode_metadata); |
136 137 137 137 136 137 137 137 136 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/gcd.h> #include <linux/export.h> /* * This implements the binary GCD algorithm. (Often attributed to Stein, * but as Knuth has noted, appears in a first-century Chinese math text.) * * This is faster than the division-based algorithm even on x86, which * has decent hardware division. */ #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) /* If __ffs is available, the even/odd algorithm benchmarks slower. */ /** * gcd - calculate and return the greatest common divisor of 2 unsigned longs * @a: first value * @b: second value */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __ffs(b); if (b == 1) return r & -r; for (;;) { a >>= __ffs(a); if (a == 1) return r & -r; if (a == b) return a << __ffs(r); if (a < b) swap(a, b); a -= b; } } #else /* If normalization is done by loops, the even/odd algorithm is a win. */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; /* Isolate lsbit of r */ r &= -r; while (!(b & r)) b >>= 1; if (b == r) return r; for (;;) { while (!(a & r)) a >>= 1; if (a == r) return r; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } #endif EXPORT_SYMBOL_GPL(gcd); |
137 137 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _BTRFS_CTREE_H_ #define _BTRFS_CTREE_H_ #include <linux/btrfs.h> #include <linux/types.h> #ifdef __KERNEL__ #include <linux/stddef.h> #else #include <stddef.h> #endif /* ASCII for _BHRfS_M, no terminating nul */ #define BTRFS_MAGIC 0x4D5F53665248425FULL #define BTRFS_MAX_LEVEL 8 /* * We can actually store much bigger names, but lets not confuse the rest of * linux. */ #define BTRFS_NAME_LEN 255 /* * Theoretical limit is larger, but we keep this down to a sane value. That * should limit greatly the possibility of collisions on inode ref items. */ #define BTRFS_LINK_MAX 65535U /* * This header contains the structure definitions and constants used * by file system objects that can be retrieved using * the BTRFS_IOC_SEARCH_TREE ioctl. That means basically anything that * is needed to describe a leaf node's key or item contents. */ /* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL /* stores information about which extents are in use, and reference counts */ #define BTRFS_EXTENT_TREE_OBJECTID 2ULL /* * chunk tree stores translations from logical -> physical block numbering * the super block points to the chunk tree */ #define BTRFS_CHUNK_TREE_OBJECTID 3ULL /* * stores information about which areas of a given device are in use. * one per device. The tree of tree roots points to the device tree */ #define BTRFS_DEV_TREE_OBJECTID 4ULL /* one per subvolume, storing files and directories */ #define BTRFS_FS_TREE_OBJECTID 5ULL /* directory objectid inside the root tree */ #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL /* holds checksums of all the data extents */ #define BTRFS_CSUM_TREE_OBJECTID 7ULL /* holds quota configuration and tracking */ #define BTRFS_QUOTA_TREE_OBJECTID 8ULL /* for storing items that use the BTRFS_UUID_KEY* types */ #define BTRFS_UUID_TREE_OBJECTID 9ULL /* tracks free space in block groups. */ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL /* Holds the block group items for extent tree v2. */ #define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL /* Tracks RAID stripes in block groups. */ #define BTRFS_RAID_STRIPE_TREE_OBJECTID 12ULL /* device stats in the device tree */ #define BTRFS_DEV_STATS_OBJECTID 0ULL /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL /* orphan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL /* does write ahead logging to speed up fsyncs */ #define BTRFS_TREE_LOG_OBJECTID -6ULL #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL /* for space balancing */ #define BTRFS_TREE_RELOC_OBJECTID -8ULL #define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL /* * extent checksums all have this objectid * this allows them to share the logging tree * for fsyncs */ #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL /* For storing free space cache */ #define BTRFS_FREE_SPACE_OBJECTID -11ULL /* * The inode number assigned to the special inode for storing * free ino cache */ #define BTRFS_FREE_INO_OBJECTID -12ULL /* dummy objectid represents multiple objectids */ #define BTRFS_MULTIPLE_OBJECTIDS -255ULL /* * All files have objectids in this range. */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL #define BTRFS_LAST_FREE_OBJECTID -256ULL #define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL /* * the device items go into the chunk tree. The key is in the form * [ 1 BTRFS_DEV_ITEM_KEY device_id ] */ #define BTRFS_DEV_ITEMS_OBJECTID 1ULL #define BTRFS_BTREE_INODE_OBJECTID 1 #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 #define BTRFS_DEV_REPLACE_DEVID 0ULL /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in * the FS */ #define BTRFS_INODE_ITEM_KEY 1 #define BTRFS_INODE_REF_KEY 12 #define BTRFS_INODE_EXTREF_KEY 13 #define BTRFS_XATTR_ITEM_KEY 24 /* * fs verity items are stored under two different key types on disk. * The descriptor items: * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ] * * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size * of the descriptor item and some extra data for encryption. * Starting at offset 1, these hold the generic fs verity descriptor. The * latter are opaque to btrfs, we just read and write them as a blob for the * higher level verity code. The most common descriptor size is 256 bytes. * * The merkle tree items: * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ] * * These also start at offset 0, and correspond to the merkle tree bytes. When * fsverity asks for page 0 of the merkle tree, we pull up one page starting at * offset 0 for this key type. These are also opaque to btrfs, we're blindly * storing whatever fsverity sends down. */ #define BTRFS_VERITY_DESC_ITEM_KEY 36 #define BTRFS_VERITY_MERKLE_ITEM_KEY 37 #define BTRFS_ORPHAN_ITEM_KEY 48 /* reserve 2-15 close to the inode for later flexibility */ /* * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used * but it's still defined here for documentation purposes and to help avoid * having its numerical value reused in the future. */ #define BTRFS_DIR_LOG_ITEM_KEY 60 #define BTRFS_DIR_LOG_INDEX_KEY 72 #define BTRFS_DIR_ITEM_KEY 84 #define BTRFS_DIR_INDEX_KEY 96 /* * extent data is for file data */ #define BTRFS_EXTENT_DATA_KEY 108 /* * extent csums are stored in a separate tree and hold csums for * an entire extent on disk. */ #define BTRFS_EXTENT_CSUM_KEY 128 /* * root items point to tree roots. They are typically in the root * tree used by the super block to find all the other trees */ #define BTRFS_ROOT_ITEM_KEY 132 /* * root backrefs tie subvols and snapshots to the directory entries that * reference them */ #define BTRFS_ROOT_BACKREF_KEY 144 /* * root refs make a fast index for listing all of the snapshots and * subvolumes referenced by a given root. They point directly to the * directory item in the root that references the subvol */ #define BTRFS_ROOT_REF_KEY 156 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ #define BTRFS_EXTENT_ITEM_KEY 168 /* * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know * the length, so we save the level in key->offset instead of the length. */ #define BTRFS_METADATA_ITEM_KEY 169 /* * Special inline ref key which stores the id of the subvolume which originally * created the extent. This subvolume owns the extent permanently from the * perspective of simple quotas. Needed to know which subvolume to free quota * usage from when the extent is deleted. * * Stored as an inline ref rather to avoid wasting space on a separate item on * top of the existing extent item. However, unlike the other inline refs, * there is one one owner ref per extent rather than one per extent. * * Because of this, it goes at the front of the list of inline refs, and thus * must have a lower type value than any other inline ref type (to satisfy the * disk format rule that inline refs have non-decreasing type). */ #define BTRFS_EXTENT_OWNER_REF_KEY 172 #define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_EXTENT_DATA_REF_KEY 178 /* * Obsolete key. Defintion removed in 6.6, value may be reused in the future. * * #define BTRFS_EXTENT_REF_V0_KEY 180 */ #define BTRFS_SHARED_BLOCK_REF_KEY 182 #define BTRFS_SHARED_DATA_REF_KEY 184 /* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 192 /* * Every block group is represented in the free space tree by a free space info * item, which stores some accounting information. It is keyed on * (block_group_start, FREE_SPACE_INFO, block_group_length). */ #define BTRFS_FREE_SPACE_INFO_KEY 198 /* * A free space extent tracks an extent of space that is free in a block group. * It is keyed on (start, FREE_SPACE_EXTENT, length). */ #define BTRFS_FREE_SPACE_EXTENT_KEY 199 /* * When a block group becomes very fragmented, we convert it to use bitmaps * instead of extents. A free space bitmap is keyed on * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with * (length / sectorsize) bits. */ #define BTRFS_FREE_SPACE_BITMAP_KEY 200 #define BTRFS_DEV_EXTENT_KEY 204 #define BTRFS_DEV_ITEM_KEY 216 #define BTRFS_CHUNK_ITEM_KEY 228 #define BTRFS_RAID_STRIPE_KEY 230 /* * Records the overall state of the qgroups. * There's only one instance of this key present, * (0, BTRFS_QGROUP_STATUS_KEY, 0) */ #define BTRFS_QGROUP_STATUS_KEY 240 /* * Records the currently used space of the qgroup. * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). */ #define BTRFS_QGROUP_INFO_KEY 242 /* * Contains the user configured limits for the qgroup. * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). */ #define BTRFS_QGROUP_LIMIT_KEY 244 /* * Records the child-parent relationship of qgroups. For * each relation, 2 keys are present: * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) */ #define BTRFS_QGROUP_RELATION_KEY 246 /* * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. */ #define BTRFS_BALANCE_ITEM_KEY 248 /* * The key type for tree items that are stored persistently, but do not need to * exist for extended period of time. The items can exist in any tree. * * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] * * Existing items: * * - balance status item * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) */ #define BTRFS_TEMPORARY_ITEM_KEY 248 /* * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY */ #define BTRFS_DEV_STATS_KEY 249 /* * The key type for tree items that are stored persistently and usually exist * for a long period, eg. filesystem lifetime. The item kinds can be status * information, stats or preference values. The item can exist in any tree. * * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] * * Existing items: * * - device statistics, store IO stats in the device tree, one key for all * stats * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) */ #define BTRFS_PERSISTENT_ITEM_KEY 249 /* * Persistently stores the device replace state in the device tree. * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). */ #define BTRFS_DEV_REPLACE_KEY 250 /* * Stores items that allow to quickly map UUIDs to something else. * These items are part of the filesystem UUID tree. * The key is built like this: * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). */ #if BTRFS_UUID_SIZE != 16 #error "UUID items require BTRFS_UUID_SIZE == 16!" #endif #define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ #define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to * received subvols */ /* * string items are for debugging. They just store a short string of * data in the FS */ #define BTRFS_STRING_ITEM_KEY 253 /* Maximum metadata block size (nodesize) */ #define BTRFS_MAX_METADATA_BLOCKSIZE 65536 /* 32 bytes in various csum fields */ #define BTRFS_CSUM_SIZE 32 /* csum types */ enum btrfs_csum_type { BTRFS_CSUM_TYPE_CRC32 = 0, BTRFS_CSUM_TYPE_XXHASH = 1, BTRFS_CSUM_TYPE_SHA256 = 2, BTRFS_CSUM_TYPE_BLAKE2 = 3, }; /* * flags definitions for directory entry item type * * Used by: * struct btrfs_dir_item.type * * Values 0..7 must match common file type values in fs_types.h. */ #define BTRFS_FT_UNKNOWN 0 #define BTRFS_FT_REG_FILE 1 #define BTRFS_FT_DIR 2 #define BTRFS_FT_CHRDEV 3 #define BTRFS_FT_BLKDEV 4 #define BTRFS_FT_FIFO 5 #define BTRFS_FT_SOCK 6 #define BTRFS_FT_SYMLINK 7 #define BTRFS_FT_XATTR 8 #define BTRFS_FT_MAX 9 /* Directory contains encrypted data */ #define BTRFS_FT_ENCRYPTED 0x80 static inline __u8 btrfs_dir_flags_to_ftype(__u8 flags) { return flags & ~BTRFS_FT_ENCRYPTED; } /* * Inode flags */ #define BTRFS_INODE_NODATASUM (1U << 0) #define BTRFS_INODE_NODATACOW (1U << 1) #define BTRFS_INODE_READONLY (1U << 2) #define BTRFS_INODE_NOCOMPRESS (1U << 3) #define BTRFS_INODE_PREALLOC (1U << 4) #define BTRFS_INODE_SYNC (1U << 5) #define BTRFS_INODE_IMMUTABLE (1U << 6) #define BTRFS_INODE_APPEND (1U << 7) #define BTRFS_INODE_NODUMP (1U << 8) #define BTRFS_INODE_NOATIME (1U << 9) #define BTRFS_INODE_DIRSYNC (1U << 10) #define BTRFS_INODE_COMPRESS (1U << 11) #define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31) #define BTRFS_INODE_FLAG_MASK \ (BTRFS_INODE_NODATASUM | \ BTRFS_INODE_NODATACOW | \ BTRFS_INODE_READONLY | \ BTRFS_INODE_NOCOMPRESS | \ BTRFS_INODE_PREALLOC | \ BTRFS_INODE_SYNC | \ BTRFS_INODE_IMMUTABLE | \ BTRFS_INODE_APPEND | \ BTRFS_INODE_NODUMP | \ BTRFS_INODE_NOATIME | \ BTRFS_INODE_DIRSYNC | \ BTRFS_INODE_COMPRESS | \ BTRFS_INODE_ROOT_ITEM_INIT) #define BTRFS_INODE_RO_VERITY (1U << 0) #define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY) /* * The key defines the order in the tree, and so it also defines (optimal) * block layout. * * objectid corresponds to the inode number. * * type tells us things about the object, and is a kind of stream selector. * so for a given inode, keys with type of 1 might refer to the inode data, * type of 2 may point to file data in the btree and type == 3 may point to * extents. * * offset is the starting byte offset for this key in the stream. * * btrfs_disk_key is in disk byte order. struct btrfs_key is always * in cpu native order. Otherwise they are identical and their sizes * should be the same (ie both packed) */ struct btrfs_disk_key { __le64 objectid; __u8 type; __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { __u64 objectid; __u8 type; __u64 offset; } __attribute__ ((__packed__)); /* * Every tree block (leaf or node) starts with this header. */ struct btrfs_header { /* These first four must match the super block */ __u8 csum[BTRFS_CSUM_SIZE]; /* FS specific uuid */ __u8 fsid[BTRFS_FSID_SIZE]; /* Which block this node is supposed to live in */ __le64 bytenr; __le64 flags; /* Allowed to be different from the super from here on down */ __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; __le64 generation; __le64 owner; __le32 nritems; __u8 level; } __attribute__ ((__packed__)); /* * This is a very generous portion of the super block, giving us room to * translate 14 chunks with 3 stripes each. */ #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 /* * Just in case we somehow lose the roots and are not able to mount, we store * an array of the roots from previous transactions in the super. */ #define BTRFS_NUM_BACKUP_ROOTS 4 struct btrfs_root_backup { __le64 tree_root; __le64 tree_root_gen; __le64 chunk_root; __le64 chunk_root_gen; __le64 extent_root; __le64 extent_root_gen; __le64 fs_root; __le64 fs_root_gen; __le64 dev_root; __le64 dev_root_gen; __le64 csum_root; __le64 csum_root_gen; __le64 total_bytes; __le64 bytes_used; __le64 num_devices; /* future */ __le64 unused_64[4]; __u8 tree_root_level; __u8 chunk_root_level; __u8 extent_root_level; __u8 fs_root_level; __u8 dev_root_level; __u8 csum_root_level; /* future and to align */ __u8 unused_8[10]; } __attribute__ ((__packed__)); /* * A leaf is full of items. offset and size tell us where to find the item in * the leaf (relative to the start of the data area) */ struct btrfs_item { struct btrfs_disk_key key; __le32 offset; __le32 size; } __attribute__ ((__packed__)); /* * Leaves have an item area and a data area: * [item0, item1....itemN] [free space] [dataN...data1, data0] * * The data is separate from the items to get the keys closer together during * searches. */ struct btrfs_leaf { struct btrfs_header header; struct btrfs_item items[]; } __attribute__ ((__packed__)); /* * All non-leaf blocks are nodes, they hold only keys and pointers to other * blocks. */ struct btrfs_key_ptr { struct btrfs_disk_key key; __le64 blockptr; __le64 generation; } __attribute__ ((__packed__)); struct btrfs_node { struct btrfs_header header; struct btrfs_key_ptr ptrs[]; } __attribute__ ((__packed__)); struct btrfs_dev_item { /* the internal btrfs device id */ __le64 devid; /* size of the device */ __le64 total_bytes; /* bytes used */ __le64 bytes_used; /* optimal io alignment for this device */ __le32 io_align; /* optimal io width for this device */ __le32 io_width; /* minimal io size for this device */ __le32 sector_size; /* type and info about this device */ __le64 type; /* expected generation for this device */ __le64 generation; /* * starting byte of this partition on the device, * to allow for stripe alignment in the future */ __le64 start_offset; /* grouping information for allocation decisions */ __le32 dev_group; /* seek speed 0-100 where 100 is fastest */ __u8 seek_speed; /* bandwidth 0-100 where 100 is fastest */ __u8 bandwidth; /* btrfs generated uuid for this device */ __u8 uuid[BTRFS_UUID_SIZE]; /* uuid of FS who owns this device */ __u8 fsid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_stripe { __le64 devid; __le64 offset; __u8 dev_uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_chunk { /* size of this chunk in bytes */ __le64 length; /* objectid of the root referencing this chunk */ __le64 owner; __le64 stripe_len; __le64 type; /* optimal io alignment for this chunk */ __le32 io_align; /* optimal io width for this chunk */ __le32 io_width; /* minimal io size for this chunk */ __le32 sector_size; /* 2^16 stripes is quite a lot, a second limit is the size of a single * item in the btree */ __le16 num_stripes; /* sub stripes only matter for raid10 */ __le16 sub_stripes; struct btrfs_stripe stripe; /* additional stripes go here */ } __attribute__ ((__packed__)); /* * The super block basically lists the main trees of the FS. */ struct btrfs_super_block { /* The first 4 fields must match struct btrfs_header */ __u8 csum[BTRFS_CSUM_SIZE]; /* FS specific UUID, visible to user */ __u8 fsid[BTRFS_FSID_SIZE]; /* This block number */ __le64 bytenr; __le64 flags; /* Allowed to be different from the btrfs_header from here own down */ __le64 magic; __le64 generation; __le64 root; __le64 chunk_root; __le64 log_root; /* * This member has never been utilized since the very beginning, thus * it's always 0 regardless of kernel version. We always use * generation + 1 to read log tree root. So here we mark it deprecated. */ __le64 __unused_log_root_transid; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; __le64 num_devices; __le32 sectorsize; __le32 nodesize; __le32 __unused_leafsize; __le32 stripesize; __le32 sys_chunk_array_size; __le64 chunk_root_generation; __le64 compat_flags; __le64 compat_ro_flags; __le64 incompat_flags; __le16 csum_type; __u8 root_level; __u8 chunk_root_level; __u8 log_root_level; struct btrfs_dev_item dev_item; char label[BTRFS_LABEL_SIZE]; __le64 cache_generation; __le64 uuid_tree_generation; /* The UUID written into btree blocks */ __u8 metadata_uuid[BTRFS_FSID_SIZE]; __u64 nr_global_roots; /* Future expansion */ __le64 reserved[27]; __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; /* Padded to 4096 bytes */ __u8 padding[565]; } __attribute__ ((__packed__)); #define BTRFS_FREE_SPACE_EXTENT 1 #define BTRFS_FREE_SPACE_BITMAP 2 struct btrfs_free_space_entry { __le64 offset; __le64 bytes; __u8 type; } __attribute__ ((__packed__)); struct btrfs_free_space_header { struct btrfs_disk_key location; __le64 generation; __le64 num_entries; __le64 num_bitmaps; } __attribute__ ((__packed__)); struct btrfs_raid_stride { /* The id of device this raid extent lives on. */ __le64 devid; /* The physical location on disk. */ __le64 physical; } __attribute__ ((__packed__)); struct btrfs_stripe_extent { /* An array of raid strides this stripe is composed of. */ __DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides); } __attribute__ ((__packed__)); #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) /* Super block flags */ /* Errors detected */ #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) /* * Those are temporaray flags utilized by btrfs-progs to do offline conversion. * They are rejected by kernel. * But still keep them all here to avoid conflicts. */ #define BTRFS_SUPER_FLAG_CHANGING_BG_TREE (1ULL << 38) #define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 39) #define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 40) /* * items in the extent btree are used to record the objectid of the * owner of the block and the number of references */ struct btrfs_extent_item { __le64 refs; __le64 generation; __le64 flags; } __attribute__ ((__packed__)); struct btrfs_extent_item_v0 { __le32 refs; } __attribute__ ((__packed__)); #define BTRFS_EXTENT_FLAG_DATA (1ULL << 0) #define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1) /* following flags only apply to tree blocks */ /* use full backrefs for extent pointers in the block */ #define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) #define BTRFS_BACKREF_REV_MAX 256 #define BTRFS_BACKREF_REV_SHIFT 56 #define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ BTRFS_BACKREF_REV_SHIFT) #define BTRFS_OLD_BACKREF_REV 0 #define BTRFS_MIXED_BACKREF_REV 1 /* * this flag is only used internally by scrub and may be changed at any time * it is only declared here to avoid collisions */ #define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48) struct btrfs_tree_block_info { struct btrfs_disk_key key; __u8 level; } __attribute__ ((__packed__)); struct btrfs_extent_data_ref { __le64 root; __le64 objectid; __le64 offset; __le32 count; } __attribute__ ((__packed__)); struct btrfs_shared_data_ref { __le32 count; } __attribute__ ((__packed__)); struct btrfs_extent_owner_ref { __le64 root_id; } __attribute__ ((__packed__)); struct btrfs_extent_inline_ref { __u8 type; __le64 offset; } __attribute__ ((__packed__)); /* dev extents record free space on individual devices. The owner * field points back to the chunk allocation mapping tree that allocated * the extent. The chunk tree uuid field is a way to double check the owner */ struct btrfs_dev_extent { __le64 chunk_tree; __le64 chunk_objectid; __le64 chunk_offset; __le64 length; __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_inode_ref { __le64 index; __le16 name_len; /* name goes here */ } __attribute__ ((__packed__)); struct btrfs_inode_extref { __le64 parent_objectid; __le64 index; __le16 name_len; __u8 name[]; /* name goes here */ } __attribute__ ((__packed__)); struct btrfs_timespec { __le64 sec; __le32 nsec; } __attribute__ ((__packed__)); struct btrfs_inode_item { /* nfs style generation number */ __le64 generation; /* transid that last touched this inode */ __le64 transid; __le64 size; __le64 nbytes; __le64 block_group; __le32 nlink; __le32 uid; __le32 gid; __le32 mode; __le64 rdev; __le64 flags; /* modification sequence number for NFS */ __le64 sequence; /* * a little future expansion, for more than this we can * just grow the inode item and version it */ __le64 reserved[4]; struct btrfs_timespec atime; struct btrfs_timespec ctime; struct btrfs_timespec mtime; struct btrfs_timespec otime; } __attribute__ ((__packed__)); struct btrfs_dir_log_item { __le64 end; } __attribute__ ((__packed__)); struct btrfs_dir_item { struct btrfs_disk_key location; __le64 transid; __le16 data_len; __le16 name_len; __u8 type; } __attribute__ ((__packed__)); #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) /* * Internal in-memory flag that a subvolume has been marked for deletion but * still visible as a directory */ #define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48) struct btrfs_root_item { struct btrfs_inode_item inode; __le64 generation; __le64 root_dirid; __le64 bytenr; __le64 byte_limit; __le64 bytes_used; __le64 last_snapshot; __le64 flags; __le32 refs; struct btrfs_disk_key drop_progress; __u8 drop_level; __u8 level; /* * The following fields appear after subvol_uuids+subvol_times * were introduced. */ /* * This generation number is used to test if the new fields are valid * and up to date while reading the root item. Every time the root item * is written out, the "generation" field is copied into this field. If * anyone ever mounted the fs with an older kernel, we will have * mismatching generation values here and thus must invalidate the * new fields. See btrfs_update_root and btrfs_find_last_root for * details. * the offset of generation_v2 is also used as the start for the memset * when invalidating the fields. */ __le64 generation_v2; __u8 uuid[BTRFS_UUID_SIZE]; __u8 parent_uuid[BTRFS_UUID_SIZE]; __u8 received_uuid[BTRFS_UUID_SIZE]; __le64 ctransid; /* updated when an inode changes */ __le64 otransid; /* trans when created */ __le64 stransid; /* trans when sent. non-zero for received subvol */ __le64 rtransid; /* trans when received. non-zero for received subvol */ struct btrfs_timespec ctime; struct btrfs_timespec otime; struct btrfs_timespec stime; struct btrfs_timespec rtime; __le64 reserved[8]; /* for future */ } __attribute__ ((__packed__)); /* * Btrfs root item used to be smaller than current size. The old format ends * at where member generation_v2 is. */ static inline __u32 btrfs_legacy_root_item_size(void) { return offsetof(struct btrfs_root_item, generation_v2); } /* * this is used for both forward and backward root refs */ struct btrfs_root_ref { __le64 dirid; __le64 sequence; __le16 name_len; } __attribute__ ((__packed__)); struct btrfs_disk_balance_args { /* * profiles to operate on, single is denoted by * BTRFS_AVAIL_ALLOC_BIT_SINGLE */ __le64 profiles; /* * usage filter * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N' * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max */ union { __le64 usage; struct { __le32 usage_min; __le32 usage_max; }; }; /* devid filter */ __le64 devid; /* devid subset filter [pstart..pend) */ __le64 pstart; __le64 pend; /* btrfs virtual address space subset filter [vstart..vend) */ __le64 vstart; __le64 vend; /* * profile to convert to, single is denoted by * BTRFS_AVAIL_ALLOC_BIT_SINGLE */ __le64 target; /* BTRFS_BALANCE_ARGS_* */ __le64 flags; /* * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum * and maximum */ union { __le64 limit; struct { __le32 limit_min; __le32 limit_max; }; }; /* * Process chunks that cross stripes_min..stripes_max devices, * BTRFS_BALANCE_ARGS_STRIPES_RANGE */ __le32 stripes_min; __le32 stripes_max; __le64 unused[6]; } __attribute__ ((__packed__)); /* * store balance parameters to disk so that balance can be properly * resumed after crash or unmount */ struct btrfs_balance_item { /* BTRFS_BALANCE_* */ __le64 flags; struct btrfs_disk_balance_args data; struct btrfs_disk_balance_args meta; struct btrfs_disk_balance_args sys; __le64 unused[4]; } __attribute__ ((__packed__)); enum { BTRFS_FILE_EXTENT_INLINE = 0, BTRFS_FILE_EXTENT_REG = 1, BTRFS_FILE_EXTENT_PREALLOC = 2, BTRFS_NR_FILE_EXTENT_TYPES = 3, }; struct btrfs_file_extent_item { /* * transaction id that created this extent */ __le64 generation; /* * max number of bytes to hold this extent in ram * when we split a compressed extent we can't know how big * each of the resulting pieces will be. So, this is * an upper limit on the size of the extent in ram instead of * an exact limit. */ __le64 ram_bytes; /* * 32 bits for the various ways we might encode the data, * including compression and encryption. If any of these * are set to something a given disk format doesn't understand * it is treated like an incompat flag for reading and writing, * but not for stat. */ __u8 compression; __u8 encryption; __le16 other_encoding; /* spare for later use */ /* are we inline data or a real extent? */ __u8 type; /* * disk space consumed by the extent, checksum blocks are included * in these numbers * * At this offset in the structure, the inline extent data start. */ __le64 disk_bytenr; __le64 disk_num_bytes; /* * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point * into the middle of an existing extent on disk, sharing it * between two snapshots (useful if some bytes in the middle of the * extent have changed */ __le64 offset; /* * the logical number of file blocks (no csums included). This * always reflects the size uncompressed and without encoding. */ __le64 num_bytes; } __attribute__ ((__packed__)); struct btrfs_csum_item { __u8 csum; } __attribute__ ((__packed__)); struct btrfs_dev_stats_item { /* * grow this item struct at the end for future enhancements and keep * the existing values unchanged */ __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; } __attribute__ ((__packed__)); #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 struct btrfs_dev_replace_item { /* * grow this item struct at the end for future enhancements and keep * the existing values unchanged */ __le64 src_devid; __le64 cursor_left; __le64 cursor_right; __le64 cont_reading_from_srcdev_mode; __le64 replace_state; __le64 time_started; __le64 time_stopped; __le64 num_write_errors; __le64 num_uncorrectable_read_errors; } __attribute__ ((__packed__)); /* different types of block groups (and chunks) */ #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) #define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) #define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) #define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9) #define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10) #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ BTRFS_SPACE_INFO_GLOBAL_RSV) #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ BTRFS_BLOCK_GROUP_SYSTEM | \ BTRFS_BLOCK_GROUP_METADATA) #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ BTRFS_BLOCK_GROUP_RAID1 | \ BTRFS_BLOCK_GROUP_RAID1C3 | \ BTRFS_BLOCK_GROUP_RAID1C4 | \ BTRFS_BLOCK_GROUP_RAID5 | \ BTRFS_BLOCK_GROUP_RAID6 | \ BTRFS_BLOCK_GROUP_DUP | \ BTRFS_BLOCK_GROUP_RAID10) #define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \ BTRFS_BLOCK_GROUP_RAID6) #define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1 | \ BTRFS_BLOCK_GROUP_RAID1C3 | \ BTRFS_BLOCK_GROUP_RAID1C4) /* * We need a bit for restriper to be able to tell when chunks of type * SINGLE are available. This "extended" profile format is used in * fs_info->avail_*_alloc_bits (in-memory) and balance item fields * (on-disk). The corresponding on-disk bit in chunk.type is reserved * to avoid remappings between two formats in future. */ #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) /* * A fake block group type that is used to communicate global block reserve * size to userspace via the SPACE_INFO ioctl. */ #define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) #define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ BTRFS_AVAIL_ALLOC_BIT_SINGLE) static inline __u64 chunk_to_extended(__u64 flags) { if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0) flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE; return flags; } static inline __u64 extended_to_chunk(__u64 flags) { return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; } struct btrfs_block_group_item { __le64 used; __le64 chunk_objectid; __le64 flags; } __attribute__ ((__packed__)); struct btrfs_free_space_info { __le32 extent_count; __le32 flags; } __attribute__ ((__packed__)); #define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0) #define BTRFS_QGROUP_LEVEL_SHIFT 48 static inline __u16 btrfs_qgroup_level(__u64 qgroupid) { return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT); } /* * is subvolume quota turned on? */ #define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) /* * RESCAN is set during the initialization phase */ #define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1) /* * Some qgroup entries are known to be out of date, * either because the configuration has changed in a way that * makes a rescan necessary, or because the fs has been mounted * with a non-qgroup-aware version. * Turning qouta off and on again makes it inconsistent, too. */ #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) /* * Whether or not this filesystem is using simple quotas. Not exactly the * incompat bit, because we support using simple quotas, disabling it, then * going back to full qgroup quotas. */ #define BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE (1ULL << 3) #define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \ BTRFS_QGROUP_STATUS_FLAG_RESCAN | \ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT | \ BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE) #define BTRFS_QGROUP_STATUS_VERSION 1 struct btrfs_qgroup_status_item { __le64 version; /* * the generation is updated during every commit. As older * versions of btrfs are not aware of qgroups, it will be * possible to detect inconsistencies by checking the * generation on mount time */ __le64 generation; /* flag definitions see above */ __le64 flags; /* * only used during scanning to record the progress * of the scan. It contains a logical address */ __le64 rescan; /* * The generation when quotas were last enabled. Used by simple quotas to * avoid decrementing when freeing an extent that was written before * enable. * * Set only if flags contain BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE. */ __le64 enable_gen; } __attribute__ ((__packed__)); struct btrfs_qgroup_info_item { __le64 generation; __le64 rfer; __le64 rfer_cmpr; __le64 excl; __le64 excl_cmpr; } __attribute__ ((__packed__)); struct btrfs_qgroup_limit_item { /* * only updated when any of the other values change */ __le64 flags; __le64 max_rfer; __le64 max_excl; __le64 rsv_rfer; __le64 rsv_excl; } __attribute__ ((__packed__)); struct btrfs_verity_descriptor_item { /* Size of the verity descriptor in bytes */ __le64 size; /* * When we implement support for fscrypt, we will need to encrypt the * Merkle tree for encrypted verity files. These 128 bits are for the * eventual storage of an fscrypt initialization vector. */ __le64 reserved[2]; __u8 encryption; } __attribute__ ((__packed__)); #endif /* _BTRFS_CTREE_H_ */ |
36 51 104 338 254 1 1 1 7119 56 56 10 46 46 56 55 3847 4180 2953 594 1918 2 825 573 228 4 80 15 3581 3114 3574 3577 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_MM_H #define _LINUX_SCHED_MM_H #include <linux/kernel.h> #include <linux/atomic.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/gfp.h> #include <linux/sync_core.h> #include <linux/sched/coredump.h> /* * Routines for handling mm_structs */ extern struct mm_struct *mm_alloc(void); /** * mmgrab() - Pin a &struct mm_struct. * @mm: The &struct mm_struct to pin. * * Make sure that @mm will not get freed even after the owning task * exits. This doesn't guarantee that the associated address space * will still exist later on and mmget_not_zero() has to be used before * accessing it. * * This is a preferred way to pin @mm for a longer/unbounded amount * of time. * * Use mmdrop() to release the reference acquired by mmgrab(). * * See also <Documentation/mm/active_mm.rst> for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmgrab(struct mm_struct *mm) { atomic_inc(&mm->mm_count); } static inline void smp_mb__after_mmgrab(void) { smp_mb__after_atomic(); } extern void __mmdrop(struct mm_struct *mm); static inline void mmdrop(struct mm_struct *mm) { /* * The implicit full barrier implied by atomic_dec_and_test() is * required by the membarrier system call before returning to * user-space, after storing to rq->curr. */ if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } #ifdef CONFIG_PREEMPT_RT /* * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is * by far the least expensive way to do that. */ static inline void __mmdrop_delayed(struct rcu_head *rhp) { struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); __mmdrop(mm); } /* * Invoked from finish_task_switch(). Delegates the heavy lifting on RT * kernels via RCU. */ static inline void mmdrop_sched(struct mm_struct *mm) { /* Provides a full memory barrier. See mmdrop() */ if (atomic_dec_and_test(&mm->mm_count)) call_rcu(&mm->delayed_drop, __mmdrop_delayed); } #else static inline void mmdrop_sched(struct mm_struct *mm) { mmdrop(mm); } #endif /* Helpers for lazy TLB mm refcounting */ static inline void mmgrab_lazy_tlb(struct mm_struct *mm) { if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) mmgrab(mm); } static inline void mmdrop_lazy_tlb(struct mm_struct *mm) { if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) { mmdrop(mm); } else { /* * mmdrop_lazy_tlb must provide a full memory barrier, see the * membarrier comment finish_task_switch which relies on this. */ smp_mb(); } } static inline void mmdrop_lazy_tlb_sched(struct mm_struct *mm) { if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) mmdrop_sched(mm); else smp_mb(); /* see mmdrop_lazy_tlb() above */ } /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. * * Make sure that the address space of the given &struct mm_struct doesn't * go away. This does not protect against parts of the address space being * modified or freed, however. * * Never use this function to pin this address space for an * unbounded/indefinite amount of time. * * Use mmput() to release the reference acquired by mmget(). * * See also <Documentation/mm/active_mm.rst> for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmget(struct mm_struct *mm) { atomic_inc(&mm->mm_users); } static inline bool mmget_not_zero(struct mm_struct *mm) { return atomic_inc_not_zero(&mm->mm_users); } /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); #ifdef CONFIG_MMU /* same as above but performs the slow path from the async context. Can * be called from the atomic context as well */ void mmput_async(struct mm_struct *); #endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); /* * Grab a reference to a task's mm, if it is not already going away * and ptrace_may_access with the mode parameter passed to it * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct on exit() */ extern void exit_mm_release(struct task_struct *, struct mm_struct *); /* Remove the current tasks stale references to the old mm_struct on exec() */ extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); #else static inline void mm_update_next_owner(struct mm_struct *mm) { } #endif /* CONFIG_MEMCG */ #ifdef CONFIG_MMU #ifndef arch_get_mmap_end #define arch_get_mmap_end(addr, len, flags) (TASK_SIZE) #endif #ifndef arch_get_mmap_base #define arch_get_mmap_base(addr, base) (base) #endif extern void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack); unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t); unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); unsigned long generic_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); unsigned long generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) {} #endif static inline bool in_vfork(struct task_struct *tsk) { bool ret; /* * need RCU to access ->real_parent if CLONE_VM was used along with * CLONE_PARENT. * * We check real_parent->mm == tsk->mm because CLONE_VFORK does not * imply CLONE_VM * * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus * ->real_parent is not necessarily the task doing vfork(), so in * theory we can't rely on task_lock() if we want to dereference it. * * And in this case we can't trust the real_parent->mm == tsk->mm * check, it can be false negative. But we do not care, if init or * another oom-unkillable task does this it should blame itself. */ rcu_read_lock(); ret = tsk->vfork_done && rcu_dereference(tsk->real_parent)->mm == tsk->mm; rcu_read_unlock(); return ret; } /* * Applies per-task gfp context to the given allocation flags. * PF_MEMALLOC_NOIO implies GFP_NOIO * PF_MEMALLOC_NOFS implies GFP_NOFS * PF_MEMALLOC_PIN implies !GFP_MOVABLE */ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_NORECLAIM | PF_MEMALLOC_NOWARN | PF_MEMALLOC_PIN))) { /* * Stronger flags before weaker flags: * NORECLAIM implies NOIO, which in turn implies NOFS */ if (pflags & PF_MEMALLOC_NORECLAIM) flags &= ~__GFP_DIRECT_RECLAIM; else if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; if (pflags & PF_MEMALLOC_NOWARN) flags |= __GFP_NOWARN; if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } return flags; } #ifdef CONFIG_LOCKDEP extern void __fs_reclaim_acquire(unsigned long ip); extern void __fs_reclaim_release(unsigned long ip); extern void fs_reclaim_acquire(gfp_t gfp_mask); extern void fs_reclaim_release(gfp_t gfp_mask); #else static inline void __fs_reclaim_acquire(unsigned long ip) { } static inline void __fs_reclaim_release(unsigned long ip) { } static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif /* Any memory-allocation retry loop should use * memalloc_retry_wait(), and pass the flags for the most * constrained allocation attempt that might have failed. * This provides useful documentation of where loops are, * and a central place to fine tune the waiting as the MM * implementation changes. */ static inline void memalloc_retry_wait(gfp_t gfp_flags) { /* We use io_schedule_timeout because waiting for memory * typically included waiting for dirty pages to be * written out, which requires IO. */ __set_current_state(TASK_UNINTERRUPTIBLE); gfp_flags = current_gfp_context(gfp_flags); if (gfpflags_allow_blocking(gfp_flags) && !(gfp_flags & __GFP_NORETRY)) /* Probably waited already, no need for much more */ io_schedule_timeout(1); else /* Probably didn't wait, and has now released a lock, * so now is a good time to wait */ io_schedule_timeout(HZ/50); } /** * might_alloc - Mark possible allocation sites * @gfp_mask: gfp_t flags that would be used to allocate * * Similar to might_sleep() and other annotations, this can be used in functions * that might allocate, but often don't. Compiles to nothing without * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking. */ static inline void might_alloc(gfp_t gfp_mask) { fs_reclaim_acquire(gfp_mask); fs_reclaim_release(gfp_mask); might_sleep_if(gfpflags_allow_blocking(gfp_mask)); } /** * memalloc_flags_save - Add a PF_* flag to current->flags, save old value * * This allows PF_* flags to be conveniently added, irrespective of current * value, and then the old version restored with memalloc_flags_restore(). */ static inline unsigned memalloc_flags_save(unsigned flags) { unsigned oldflags = ~current->flags & flags; current->flags |= flags; return oldflags; } static inline void memalloc_flags_restore(unsigned flags) { current->flags &= ~flags; } /** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * * This functions marks the beginning of the GFP_NOIO allocation scope. * All further allocations will implicitly drop __GFP_IO flag and so * they are safe for the IO critical section from the allocation recursion * point of view. Use memalloc_noio_restore to end the scope with flags * returned by this function. * * Context: This function is safe to be used from any context. * Return: The saved flags to be passed to memalloc_noio_restore. */ static inline unsigned int memalloc_noio_save(void) { return memalloc_flags_save(PF_MEMALLOC_NOIO); } /** * memalloc_noio_restore - Ends the implicit GFP_NOIO scope. * @flags: Flags to restore. * * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. * Always make sure that the given flags is the return value from the * pairing memalloc_noio_save call. */ static inline void memalloc_noio_restore(unsigned int flags) { memalloc_flags_restore(flags); } /** * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope. * * This functions marks the beginning of the GFP_NOFS allocation scope. * All further allocations will implicitly drop __GFP_FS flag and so * they are safe for the FS critical section from the allocation recursion * point of view. Use memalloc_nofs_restore to end the scope with flags * returned by this function. * * Context: This function is safe to be used from any context. * Return: The saved flags to be passed to memalloc_nofs_restore. */ static inline unsigned int memalloc_nofs_save(void) { return memalloc_flags_save(PF_MEMALLOC_NOFS); } /** * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope. * @flags: Flags to restore. * * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. * Always make sure that the given flags is the return value from the * pairing memalloc_nofs_save call. */ static inline void memalloc_nofs_restore(unsigned int flags) { memalloc_flags_restore(flags); } /** * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope. * * This function marks the beginning of the __GFP_MEMALLOC allocation scope. * All further allocations will implicitly add the __GFP_MEMALLOC flag, which * prevents entering reclaim and allows access to all memory reserves. This * should only be used when the caller guarantees the allocation will allow more * memory to be freed very shortly, i.e. it needs to allocate some memory in * the process of freeing memory, and cannot reclaim due to potential recursion. * * Users of this scope have to be extremely careful to not deplete the reserves * completely and implement a throttling mechanism which controls the * consumption of the reserve based on the amount of freed memory. Usage of a * pre-allocated pool (e.g. mempool) should be always considered before using * this scope. * * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC * * Context: This function should not be used in an interrupt context as that one * does not give PF_MEMALLOC access to reserves. * See __gfp_pfmemalloc_flags(). * Return: The saved flags to be passed to memalloc_noreclaim_restore. */ static inline unsigned int memalloc_noreclaim_save(void) { return memalloc_flags_save(PF_MEMALLOC); } /** * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope. * @flags: Flags to restore. * * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save * function. Always make sure that the given flags is the return value from the * pairing memalloc_noreclaim_save call. */ static inline void memalloc_noreclaim_restore(unsigned int flags) { memalloc_flags_restore(flags); } /** * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope. * * This function marks the beginning of the ~__GFP_MOVABLE allocation scope. * All further allocations will implicitly remove the __GFP_MOVABLE flag, which * will constraint the allocations to zones that allow long term pinning, i.e. * not ZONE_MOVABLE zones. * * Return: The saved flags to be passed to memalloc_pin_restore. */ static inline unsigned int memalloc_pin_save(void) { return memalloc_flags_save(PF_MEMALLOC_PIN); } /** * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope. * @flags: Flags to restore. * * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function. * Always make sure that the given flags is the return value from the pairing * memalloc_pin_save call. */ static inline void memalloc_pin_restore(unsigned int flags) { memalloc_flags_restore(flags); } #ifdef CONFIG_MEMCG DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); /** * set_active_memcg - Starts the remote memcg charging scope. * @memcg: memcg to charge. * * This function marks the beginning of the remote memcg charging scope. All the * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * * Please, make sure that caller has a reference to the passed memcg structure, * so its lifetime is guaranteed to exceed the scope between two * set_active_memcg() calls. * * NOTE: This function can nest. Users must save the return value and * reset the previous value after their own charging scope is over. */ static inline struct mem_cgroup * set_active_memcg(struct mem_cgroup *memcg) { struct mem_cgroup *old; if (!in_task()) { old = this_cpu_read(int_active_memcg); this_cpu_write(int_active_memcg, memcg); } else { old = current->active_memcg; current->active_memcg = memcg; } return old; } #else static inline struct mem_cgroup * set_active_memcg(struct mem_cgroup *memcg) { return NULL; } #endif #ifdef CONFIG_MEMBARRIER enum { MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6), MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7), }; enum { MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), MEMBARRIER_FLAG_RSEQ = (1U << 1), }; #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS #include <asm/membarrier.h> #endif static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { if (current->mm != mm) return; if (likely(!(atomic_read(&mm->membarrier_state) & MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) return; sync_core_before_usermode(); } extern void membarrier_exec_mmap(struct mm_struct *mm); extern void membarrier_update_current_mm(struct mm_struct *next_mm); #else #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS static inline void membarrier_arch_switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { } #endif static inline void membarrier_exec_mmap(struct mm_struct *mm) { } static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { } static inline void membarrier_update_current_mm(struct mm_struct *next_mm) { } #endif #endif /* _LINUX_SCHED_MM_H */ |
1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include "gfs2.h" #include "incore.h" #include "dir.h" #include "glock.h" #include "glops.h" #include "inode.h" #include "super.h" #include "rgrp.h" #include "util.h" #define GFS2_SMALL_FH_SIZE 4 #define GFS2_LARGE_FH_SIZE 8 #define GFS2_OLD_FH_SIZE 10 static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, struct inode *parent) { __be32 *fh = (__force __be32 *)p; struct super_block *sb = inode->i_sb; struct gfs2_inode *ip = GFS2_I(inode); if (parent && (*len < GFS2_LARGE_FH_SIZE)) { *len = GFS2_LARGE_FH_SIZE; return FILEID_INVALID; } else if (*len < GFS2_SMALL_FH_SIZE) { *len = GFS2_SMALL_FH_SIZE; return FILEID_INVALID; } fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); fh[2] = cpu_to_be32(ip->i_no_addr >> 32); fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_SMALL_FH_SIZE; if (!parent || inode == d_inode(sb->s_root)) return *len; ip = GFS2_I(parent); fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); fh[6] = cpu_to_be32(ip->i_no_addr >> 32); fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_LARGE_FH_SIZE; return *len; } struct get_name_filldir { struct dir_context ctx; struct gfs2_inum_host inum; char *name; }; static bool get_name_filldir(struct dir_context *ctx, const char *name, int length, loff_t offset, u64 inum, unsigned int type) { struct get_name_filldir *gnfd = container_of(ctx, struct get_name_filldir, ctx); if (inum != gnfd->inum.no_addr) return true; memcpy(gnfd->name, name, length); gnfd->name[length] = 0; return false; } static int gfs2_get_name(struct dentry *parent, char *name, struct dentry *child) { struct inode *dir = d_inode(parent); struct inode *inode = d_inode(child); struct gfs2_inode *dip, *ip; struct get_name_filldir gnfd = { .ctx.actor = get_name_filldir, .name = name }; struct gfs2_holder gh; int error; struct file_ra_state f_ra = { .start = 0 }; if (!dir) return -EINVAL; if (!S_ISDIR(dir->i_mode) || !inode) return -EINVAL; dip = GFS2_I(dir); ip = GFS2_I(inode); *name = 0; gnfd.inum.no_addr = ip->i_no_addr; gnfd.inum.no_formal_ino = ip->i_no_formal_ino; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); if (error) return error; error = gfs2_dir_read(dir, &gnfd.ctx, &f_ra); gfs2_glock_dq_uninit(&gh); if (!error && !*name) error = -ENOENT; return error; } static struct dentry *gfs2_get_parent(struct dentry *child) { return d_obtain_alias(gfs2_lookupi(d_inode(child), &gfs2_qdotdot, 1)); } static struct dentry *gfs2_get_dentry(struct super_block *sb, struct gfs2_inum_host *inum) { struct gfs2_sbd *sdp = sb->s_fs_info; struct inode *inode; if (!inum->no_formal_ino) return ERR_PTR(-ESTALE); inode = gfs2_lookup_by_inum(sdp, inum->no_addr, inum->no_formal_ino, GFS2_BLKST_DINODE); return d_obtain_alias(inode); } static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { struct gfs2_inum_host this; __be32 *fh = (__force __be32 *)fid->raw; switch (fh_type) { case GFS2_SMALL_FH_SIZE: case GFS2_LARGE_FH_SIZE: case GFS2_OLD_FH_SIZE: if (fh_len < GFS2_SMALL_FH_SIZE) return NULL; this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; this.no_formal_ino |= be32_to_cpu(fh[1]); this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; this.no_addr |= be32_to_cpu(fh[3]); return gfs2_get_dentry(sb, &this); default: return NULL; } } static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { struct gfs2_inum_host parent; __be32 *fh = (__force __be32 *)fid->raw; switch (fh_type) { case GFS2_LARGE_FH_SIZE: case GFS2_OLD_FH_SIZE: if (fh_len < GFS2_LARGE_FH_SIZE) return NULL; parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; parent.no_formal_ino |= be32_to_cpu(fh[5]); parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; parent.no_addr |= be32_to_cpu(fh[7]); return gfs2_get_dentry(sb, &parent); default: return NULL; } } const struct export_operations gfs2_export_ops = { .encode_fh = gfs2_encode_fh, .fh_to_dentry = gfs2_fh_to_dentry, .fh_to_parent = gfs2_fh_to_parent, .get_name = gfs2_get_name, .get_parent = gfs2_get_parent, .flags = EXPORT_OP_ASYNC_LOCK, }; |
4 1 3 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/zstd.h> #include "common/zstd_deps.h" /* Common symbols. zstd_compress must depend on zstd_decompress. */ unsigned int zstd_is_error(size_t code) { return ZSTD_isError(code); } EXPORT_SYMBOL(zstd_is_error); zstd_error_code zstd_get_error_code(size_t code) { return ZSTD_getErrorCode(code); } EXPORT_SYMBOL(zstd_get_error_code); const char *zstd_get_error_name(size_t code) { return ZSTD_getErrorName(code); } EXPORT_SYMBOL(zstd_get_error_name); /* Decompression symbols. */ size_t zstd_dctx_workspace_bound(void) { return ZSTD_estimateDCtxSize(); } EXPORT_SYMBOL(zstd_dctx_workspace_bound); zstd_dctx *zstd_create_dctx_advanced(zstd_custom_mem custom_mem) { return ZSTD_createDCtx_advanced(custom_mem); } EXPORT_SYMBOL(zstd_create_dctx_advanced); size_t zstd_free_dctx(zstd_dctx *dctx) { return ZSTD_freeDCtx(dctx); } EXPORT_SYMBOL(zstd_free_dctx); zstd_ddict *zstd_create_ddict_byreference(const void *dict, size_t dict_size, zstd_custom_mem custom_mem) { return ZSTD_createDDict_advanced(dict, dict_size, ZSTD_dlm_byRef, ZSTD_dct_auto, custom_mem); } EXPORT_SYMBOL(zstd_create_ddict_byreference); size_t zstd_free_ddict(zstd_ddict *ddict) { return ZSTD_freeDDict(ddict); } EXPORT_SYMBOL(zstd_free_ddict); zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size) { if (workspace == NULL) return NULL; return ZSTD_initStaticDCtx(workspace, workspace_size); } EXPORT_SYMBOL(zstd_init_dctx); size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, const void *src, size_t src_size) { return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size); } EXPORT_SYMBOL(zstd_decompress_dctx); size_t zstd_decompress_using_ddict(zstd_dctx *dctx, void *dst, size_t dst_capacity, const void* src, size_t src_size, const zstd_ddict* ddict) { return ZSTD_decompress_usingDDict(dctx, dst, dst_capacity, src, src_size, ddict); } EXPORT_SYMBOL(zstd_decompress_using_ddict); size_t zstd_dstream_workspace_bound(size_t max_window_size) { return ZSTD_estimateDStreamSize(max_window_size); } EXPORT_SYMBOL(zstd_dstream_workspace_bound); zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, size_t workspace_size) { if (workspace == NULL) return NULL; (void)max_window_size; return ZSTD_initStaticDStream(workspace, workspace_size); } EXPORT_SYMBOL(zstd_init_dstream); size_t zstd_reset_dstream(zstd_dstream *dstream) { return ZSTD_resetDStream(dstream); } EXPORT_SYMBOL(zstd_reset_dstream); size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, zstd_in_buffer *input) { return ZSTD_decompressStream(dstream, output, input); } EXPORT_SYMBOL(zstd_decompress_stream); size_t zstd_find_frame_compressed_size(const void *src, size_t src_size) { return ZSTD_findFrameCompressedSize(src, src_size); } EXPORT_SYMBOL(zstd_find_frame_compressed_size); size_t zstd_get_frame_header(zstd_frame_header *header, const void *src, size_t src_size) { return ZSTD_getFrameHeader(header, src, src_size); } EXPORT_SYMBOL(zstd_get_frame_header); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Zstd Decompressor"); |
15 40 5 10 10 19 19 10 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | #ifndef __NET_SCHED_CODEL_IMPL_H #define __NET_SCHED_CODEL_IMPL_H /* * Codel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ /* Controlling Queue Delay (CoDel) algorithm * ========================================= * Source : Kathleen Nichols and Van Jacobson * http://queue.acm.org/detail.cfm?id=2209336 * * Implemented on linux by Dave Taht and Eric Dumazet */ #include <net/inet_ecn.h> static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); params->target = MS2TIME(5); params->ce_threshold = CODEL_DISABLED_THRESHOLD; params->ce_threshold_mask = 0; params->ce_threshold_selector = 0; params->ecn = false; } static void codel_vars_init(struct codel_vars *vars) { memset(vars, 0, sizeof(*vars)); } static void codel_stats_init(struct codel_stats *stats) { stats->maxpacket = 0; } /* * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ static void codel_Newton_step(struct codel_vars *vars) { u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; } /* * CoDel control_law is t + interval/sqrt(count) * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid * both sqrt() and divide operation. */ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, u32 rec_inv_sqrt) { return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); } static bool codel_should_drop(const struct sk_buff *skb, void *ctx, struct codel_vars *vars, struct codel_params *params, struct codel_stats *stats, codel_skb_len_t skb_len_func, codel_skb_time_t skb_time_func, u32 *backlog, codel_time_t now) { bool ok_to_drop; u32 skb_len; if (!skb) { vars->first_above_time = 0; return false; } skb_len = skb_len_func(skb); vars->ldelay = now - skb_time_func(skb); if (unlikely(skb_len > stats->maxpacket)) stats->maxpacket = skb_len; if (codel_time_before(vars->ldelay, params->target) || *backlog <= params->mtu) { /* went below - stay below for at least interval */ vars->first_above_time = 0; return false; } ok_to_drop = false; if (vars->first_above_time == 0) { /* just went above from below. If we stay above * for at least interval we'll say it's ok to drop */ vars->first_above_time = now + params->interval; } else if (codel_time_after(now, vars->first_above_time)) { ok_to_drop = true; } return ok_to_drop; } static struct sk_buff *codel_dequeue(void *ctx, u32 *backlog, struct codel_params *params, struct codel_vars *vars, struct codel_stats *stats, codel_skb_len_t skb_len_func, codel_skb_time_t skb_time_func, codel_skb_drop_t drop_func, codel_skb_dequeue_t dequeue_func) { struct sk_buff *skb = dequeue_func(vars, ctx); codel_time_t now; bool drop; if (!skb) { vars->dropping = false; return skb; } now = codel_get_time(); drop = codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now); if (vars->dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ vars->dropping = false; } else if (codel_time_after_eq(now, vars->drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might * take us out of dropping state. * If not, schedule the next drop. * A large backlog might result in drop rates so high * that the next drop should happen now, * hence the while loop. */ while (vars->dropping && codel_time_after_eq(now, vars->drop_next)) { vars->count++; /* dont care of possible wrap * since there is no more divide */ codel_Newton_step(vars); if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; vars->drop_next = codel_control_law(vars->drop_next, params->interval, vars->rec_inv_sqrt); goto end; } stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); stats->drop_count++; skb = dequeue_func(vars, ctx); if (!codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now)) { /* leave dropping state */ vars->dropping = false; } else { /* and schedule the next drop */ vars->drop_next = codel_control_law(vars->drop_next, params->interval, vars->rec_inv_sqrt); } } } } else if (drop) { u32 delta; if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); stats->drop_count++; skb = dequeue_func(vars, ctx); drop = codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now); } vars->dropping = true; /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ delta = vars->count - vars->lastcount; if (delta > 1 && codel_time_before(now - vars->drop_next, 16 * params->interval)) { vars->count = delta; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(vars); } else { vars->count = 1; vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } vars->lastcount = vars->count; vars->drop_next = codel_control_law(now, params->interval, vars->rec_inv_sqrt); } end: if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { bool set_ce = true; if (params->ce_threshold_mask) { int dsfield = skb_get_dsfield(skb); set_ce = (dsfield >= 0 && (((u8)dsfield & params->ce_threshold_mask) == params->ce_threshold_selector)); } if (set_ce && INET_ECN_set_ce(skb)) stats->ce_mark++; } return skb; } #endif |
12 7 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * File: af_phonet.h * * Phonet sockets kernel definitions * * Copyright (C) 2008 Nokia Corporation. */ #ifndef AF_PHONET_H #define AF_PHONET_H #include <linux/phonet.h> #include <linux/skbuff.h> #include <net/sock.h> /* * The lower layers may not require more space, ever. Make sure it's * enough. */ #define MAX_PHONET_HEADER (8 + MAX_HEADER) /* * Every Phonet* socket has this structure first in its * protocol-specific structure under name c. */ struct pn_sock { struct sock sk; u16 sobject; u16 dobject; u8 resource; }; static inline struct pn_sock *pn_sk(struct sock *sk) { return (struct pn_sock *)sk; } extern const struct proto_ops phonet_dgram_ops; void pn_sock_init(void); struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb); void phonet_get_local_port_range(int *min, int *max); int pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); struct sock *pn_find_sock_by_res(struct net *net, u8 res); int pn_sock_bind_res(struct sock *sock, u8 res); int pn_sock_unbind_res(struct sock *sk, u8 res); void pn_sock_unbind_all_res(struct sock *sk); int pn_skb_send(struct sock *sk, struct sk_buff *skb, const struct sockaddr_pn *target); static inline struct phonethdr *pn_hdr(struct sk_buff *skb) { return (struct phonethdr *)skb_network_header(skb); } static inline struct phonetmsg *pn_msg(struct sk_buff *skb) { return (struct phonetmsg *)skb_transport_header(skb); } /* * Get the other party's sockaddr from received skb. The skb begins * with a Phonet header. */ static inline void pn_skb_get_src_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) { struct phonethdr *ph = pn_hdr(skb); u16 obj = pn_object(ph->pn_sdev, ph->pn_sobj); sa->spn_family = AF_PHONET; pn_sockaddr_set_object(sa, obj); pn_sockaddr_set_resource(sa, ph->pn_res); memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); } static inline void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) { struct phonethdr *ph = pn_hdr(skb); u16 obj = pn_object(ph->pn_rdev, ph->pn_robj); sa->spn_family = AF_PHONET; pn_sockaddr_set_object(sa, obj); pn_sockaddr_set_resource(sa, ph->pn_res); memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); } /* Protocols in Phonet protocol family. */ struct phonet_protocol { const struct proto_ops *ops; struct proto *prot; int sock_type; }; int phonet_proto_register(unsigned int protocol, const struct phonet_protocol *pp); void phonet_proto_unregister(unsigned int protocol, const struct phonet_protocol *pp); int phonet_sysctl_init(void); void phonet_sysctl_exit(void); int isi_register(void); void isi_unregister(void); static inline bool sk_is_phonet(struct sock *sk) { return sk->sk_family == PF_PHONET; } static inline int phonet_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { int karg; switch (cmd) { case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: if (get_user(karg, (int __user *)arg)) return -EFAULT; return sk->sk_prot->ioctl(sk, cmd, &karg); } /* A positive return value means that the ioctl was not processed */ return 1; } #endif |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_QUOTA_H__ #define __XFS_QUOTA_H__ #include "xfs_quota_defs.h" /* * Kernel only quota definitions and functions */ struct xfs_trans; struct xfs_buf; /* * This check is done typically without holding the inode lock; * that may seem racy, but it is harmless in the context that it is used. * The inode cannot go inactive as long a reference is kept, and * therefore if dquot(s) were attached, they'll stay consistent. * If, for example, the ownership of the inode changes while * we didn't have the inode locked, the appropriate dquot(s) will be * attached atomically. */ #define XFS_NOT_DQATTACHED(mp, ip) \ ((XFS_IS_UQUOTA_ON(mp) && (ip)->i_udquot == NULL) || \ (XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \ (XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL)) #define XFS_QM_NEED_QUOTACHECK(mp) \ ((XFS_IS_UQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ (XFS_IS_GQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_GQUOTA_CHKD) == 0) || \ (XFS_IS_PQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0)) static inline uint xfs_quota_chkd_flag( xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return XFS_UQUOTA_CHKD; case XFS_DQTYPE_GROUP: return XFS_GQUOTA_CHKD; case XFS_DQTYPE_PROJ: return XFS_PQUOTA_CHKD; default: return 0; } } /* * The structure kept inside the xfs_trans_t keep track of dquot changes * within a transaction and apply them later. */ struct xfs_dqtrx { struct xfs_dquot *qt_dquot; /* the dquot this refers to */ uint64_t qt_blk_res; /* blks reserved on a dquot */ int64_t qt_bcount_delta; /* dquot blk count changes */ int64_t qt_delbcnt_delta; /* delayed dquot blk count changes */ uint64_t qt_rtblk_res; /* # blks reserved on a dquot */ uint64_t qt_rtblk_res_used;/* # blks used from reservation */ int64_t qt_rtbcount_delta;/* dquot realtime blk changes */ int64_t qt_delrtb_delta; /* delayed RT blk count changes */ uint64_t qt_ino_res; /* inode reserved on a dquot */ uint64_t qt_ino_res_used; /* inodes used from the reservation */ int64_t qt_icount_delta; /* dquot inode count changes */ }; enum xfs_apply_dqtrx_type { XFS_APPLY_DQTRX_COMMIT = 0, XFS_APPLY_DQTRX_UNRESERVE, }; /* * Parameters for applying dqtrx changes to a dquot. The hook function arg * parameter is enum xfs_apply_dqtrx_type. */ struct xfs_apply_dqtrx_params { uintptr_t tx_id; xfs_ino_t ino; xfs_dqtype_t q_type; xfs_dqid_t q_id; }; #ifdef CONFIG_XFS_QUOTA extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); extern void xfs_trans_free_dqinfo(struct xfs_trans *); extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, uint, int64_t); extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip, int64_t dblocks, int64_t rblocks, bool force); extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, struct xfs_mount *, struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *, int64_t, long, uint); int xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, int64_t dblocks); extern int xfs_qm_vop_dqalloc(struct xfs_inode *, kuid_t, kgid_t, prid_t, uint, struct xfs_dquot **, struct xfs_dquot **, struct xfs_dquot **); extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *); extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); extern int xfs_qm_dqattach(struct xfs_inode *); extern int xfs_qm_dqattach_locked(struct xfs_inode *ip, bool doalloc); extern void xfs_qm_dqdetach(struct xfs_inode *); extern void xfs_qm_dqrele(struct xfs_dquot *); extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); extern void xfs_qm_mount_quotas(struct xfs_mount *); extern void xfs_qm_unmount(struct xfs_mount *); extern void xfs_qm_unmount_quotas(struct xfs_mount *); bool xfs_inode_near_dquot_enforcement(struct xfs_inode *ip, xfs_dqtype_t type); # ifdef CONFIG_XFS_LIVE_HOOKS void xfs_trans_mod_ino_dquot(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_dquot *dqp, unsigned int field, int64_t delta); struct xfs_quotainfo; struct xfs_dqtrx_hook { struct xfs_hook mod_hook; struct xfs_hook apply_hook; }; void xfs_dqtrx_hook_disable(void); void xfs_dqtrx_hook_enable(void); int xfs_dqtrx_hook_add(struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook); void xfs_dqtrx_hook_del(struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook); void xfs_dqtrx_hook_setup(struct xfs_dqtrx_hook *hook, notifier_fn_t mod_fn, notifier_fn_t apply_fn); # else # define xfs_trans_mod_ino_dquot(tp, ip, dqp, field, delta) \ xfs_trans_mod_dquot((tp), (dqp), (field), (delta)) # endif /* CONFIG_XFS_LIVE_HOOKS */ #else static inline int xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid, prid_t prid, uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp, struct xfs_dquot **pdqp) { *udqp = NULL; *gdqp = NULL; *pdqp = NULL; return 0; } #define xfs_trans_dup_dqinfo(tp, tp2) #define xfs_trans_free_dqinfo(tp) static inline void xfs_trans_mod_dquot_byino(struct xfs_trans *tp, struct xfs_inode *ip, uint field, int64_t delta) { } #define xfs_trans_apply_dquot_deltas(tp) #define xfs_trans_unreserve_and_mod_dquots(tp) static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip, int64_t dblocks, int64_t rblocks, bool force) { return 0; } static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, struct xfs_mount *mp, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, int64_t nblks, long nions, uint flags) { return 0; } static inline int xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, int64_t dblocks) { return 0; } #define xfs_qm_vop_create_dqattach(tp, ip, u, g, p) #define xfs_qm_vop_rename_dqattach(it) (0) #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) #define xfs_qm_dqattach(ip) (0) #define xfs_qm_dqattach_locked(ip, fl) (0) #define xfs_qm_dqdetach(ip) #define xfs_qm_dqrele(d) do { (d) = (d); } while(0) #define xfs_qm_statvfs(ip, s) do { } while(0) #define xfs_qm_newmount(mp, a, b) (0) #define xfs_qm_mount_quotas(mp) #define xfs_qm_unmount(mp) #define xfs_qm_unmount_quotas(mp) #define xfs_inode_near_dquot_enforcement(ip, type) (false) # ifdef CONFIG_XFS_LIVE_HOOKS # define xfs_dqtrx_hook_enable() ((void)0) # define xfs_dqtrx_hook_disable() ((void)0) # endif /* CONFIG_XFS_LIVE_HOOKS */ #endif /* CONFIG_XFS_QUOTA */ static inline int xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks) { return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false); } static inline void xfs_quota_unreserve_blkres(struct xfs_inode *ip, uint64_t blocks) { /* don't return an error as unreserving quotas can't fail */ xfs_quota_reserve_blkres(ip, -(int64_t)blocks); } extern int xfs_mount_reset_sbqflags(struct xfs_mount *); #endif /* __XFS_QUOTA_H__ */ |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 | // SPDX-License-Identifier: GPL-2.0 /* * (C) 2001 Clemson University and The University of Chicago * * Changes by Acxiom Corporation to add protocol version to kernel * communication, Copyright Acxiom Corporation, 2005. * * See COPYING in top-level directory. */ #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-dev-proto.h" #include "orangefs-bufmap.h" #include "orangefs-debugfs.h" #include <linux/debugfs.h> #include <linux/slab.h> /* this file implements the /dev/pvfs2-req device node */ uint32_t orangefs_userspace_version; static int open_access_count; static DEFINE_MUTEX(devreq_mutex); #define DUMP_DEVICE_ERROR() \ do { \ gossip_err("*****************************************************\n");\ gossip_err("ORANGEFS Device Error: You cannot open the device file "); \ gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \ "are no ", ORANGEFS_REQDEVICE_NAME); \ gossip_err("instances of a program using this device\ncurrently " \ "running. (You must verify this!)\n"); \ gossip_err("For example, you can use the lsof program as follows:\n");\ gossip_err("'lsof | grep %s' (run this as root)\n", \ ORANGEFS_REQDEVICE_NAME); \ gossip_err(" open_access_count = %d\n", open_access_count); \ gossip_err("*****************************************************\n");\ } while (0) static int hash_func(__u64 tag, int table_size) { return do_div(tag, (unsigned int)table_size); } static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op) { int index = hash_func(op->tag, hash_table_size); list_add_tail(&op->list, &orangefs_htable_ops_in_progress[index]); } /* * find the op with this tag and remove it from the in progress * hash table. */ static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) { struct orangefs_kernel_op_s *op, *next; int index; index = hash_func(tag, hash_table_size); spin_lock(&orangefs_htable_ops_in_progress_lock); list_for_each_entry_safe(op, next, &orangefs_htable_ops_in_progress[index], list) { if (op->tag == tag && !op_state_purged(op) && !op_state_given_up(op)) { list_del_init(&op->list); spin_unlock(&orangefs_htable_ops_in_progress_lock); return op; } } spin_unlock(&orangefs_htable_ops_in_progress_lock); return NULL; } /* Returns whether any FS are still pending remounted */ static int mark_all_pending_mounts(void) { int unmounted = 1; struct orangefs_sb_info_s *orangefs_sb = NULL; spin_lock(&orangefs_superblocks_lock); list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { /* All of these file system require a remount */ orangefs_sb->mount_pending = 1; unmounted = 0; } spin_unlock(&orangefs_superblocks_lock); return unmounted; } /* * Determine if a given file system needs to be remounted or not * Returns -1 on error * 0 if already mounted * 1 if needs remount */ static int fs_mount_pending(__s32 fsid) { int mount_pending = -1; struct orangefs_sb_info_s *orangefs_sb = NULL; spin_lock(&orangefs_superblocks_lock); list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { if (orangefs_sb->fs_id == fsid) { mount_pending = orangefs_sb->mount_pending; break; } } spin_unlock(&orangefs_superblocks_lock); return mount_pending; } static int orangefs_devreq_open(struct inode *inode, struct file *file) { int ret = -EINVAL; /* in order to ensure that the filesystem driver sees correct UIDs */ if (file->f_cred->user_ns != &init_user_ns) { gossip_err("%s: device cannot be opened outside init_user_ns\n", __func__); goto out; } if (!(file->f_flags & O_NONBLOCK)) { gossip_err("%s: device cannot be opened in blocking mode\n", __func__); goto out; } ret = -EACCES; gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n"); mutex_lock(&devreq_mutex); if (open_access_count == 0) { open_access_count = 1; ret = 0; } else { DUMP_DEVICE_ERROR(); } mutex_unlock(&devreq_mutex); out: gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: open device complete (ret = %d)\n", ret); return ret; } /* Function for read() callers into the device */ static ssize_t orangefs_devreq_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { struct orangefs_kernel_op_s *op, *temp; __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; static __s32 magic = ORANGEFS_DEVREQ_MAGIC; struct orangefs_kernel_op_s *cur_op; unsigned long ret; /* We do not support blocking IO. */ if (!(file->f_flags & O_NONBLOCK)) { gossip_err("%s: blocking read from client-core.\n", __func__); return -EINVAL; } /* * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then * always read with that size buffer. */ if (count != MAX_DEV_REQ_UPSIZE) { gossip_err("orangefs: client-core tried to read wrong size\n"); return -EINVAL; } /* Check for an empty list before locking. */ if (list_empty(&orangefs_request_list)) return -EAGAIN; restart: cur_op = NULL; /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { __s32 fsid; /* This lock is held past the end of the loop when we break. */ spin_lock(&op->lock); if (unlikely(op_state_purged(op) || op_state_given_up(op))) { spin_unlock(&op->lock); continue; } fsid = fsid_of_op(op); if (fsid != ORANGEFS_FS_ID_NULL) { int ret; /* Skip ops whose filesystem needs to be mounted. */ ret = fs_mount_pending(fsid); if (ret == 1) { gossip_debug(GOSSIP_DEV_DEBUG, "%s: mount pending, skipping op tag " "%llu %s\n", __func__, llu(op->tag), get_opname_string(op)); spin_unlock(&op->lock); continue; /* * Skip ops whose filesystem we don't know about unless * it is being mounted or unmounted. It is possible for * a filesystem we don't know about to be unmounted if * it fails to mount in the kernel after userspace has * been sent the mount request. */ /* XXX: is there a better way to detect this? */ } else if (ret == -1 && !(op->upcall.type == ORANGEFS_VFS_OP_FS_MOUNT || op->upcall.type == ORANGEFS_VFS_OP_GETATTR || op->upcall.type == ORANGEFS_VFS_OP_FS_UMOUNT)) { gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: skipping op tag %llu %s\n", llu(op->tag), get_opname_string(op)); gossip_err( "orangefs: ERROR: fs_mount_pending %d\n", fsid); spin_unlock(&op->lock); continue; } } /* * Either this op does not pertain to a filesystem, is mounting * a filesystem, or pertains to a mounted filesystem. Let it * through. */ cur_op = op; break; } /* * At this point we either have a valid op and can continue or have not * found an op and must ask the client to try again later. */ if (!cur_op) { spin_unlock(&orangefs_request_list_lock); return -EAGAIN; } gossip_debug(GOSSIP_DEV_DEBUG, "%s: reading op tag %llu %s\n", __func__, llu(cur_op->tag), get_opname_string(cur_op)); /* * Such an op should never be on the list in the first place. If so, we * will abort. */ if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { gossip_err("orangefs: ERROR: Current op already queued.\n"); list_del_init(&cur_op->list); spin_unlock(&cur_op->lock); spin_unlock(&orangefs_request_list_lock); return -EAGAIN; } list_del_init(&cur_op->list); spin_unlock(&orangefs_request_list_lock); spin_unlock(&cur_op->lock); /* Push the upcall out. */ ret = copy_to_user(buf, &proto_ver, sizeof(__s32)); if (ret != 0) goto error; ret = copy_to_user(buf + sizeof(__s32), &magic, sizeof(__s32)); if (ret != 0) goto error; ret = copy_to_user(buf + 2 * sizeof(__s32), &cur_op->tag, sizeof(__u64)); if (ret != 0) goto error; ret = copy_to_user(buf + 2 * sizeof(__s32) + sizeof(__u64), &cur_op->upcall, sizeof(struct orangefs_upcall_s)); if (ret != 0) goto error; spin_lock(&orangefs_htable_ops_in_progress_lock); spin_lock(&cur_op->lock); if (unlikely(op_state_given_up(cur_op))) { spin_unlock(&cur_op->lock); spin_unlock(&orangefs_htable_ops_in_progress_lock); complete(&cur_op->waitq); goto restart; } /* * Set the operation to be in progress and move it between lists since * it has been sent to the client. */ set_op_state_inprogress(cur_op); gossip_debug(GOSSIP_DEV_DEBUG, "%s: 1 op:%s: op_state:%d: process:%s:\n", __func__, get_opname_string(cur_op), cur_op->op_state, current->comm); orangefs_devreq_add_op(cur_op); spin_unlock(&cur_op->lock); spin_unlock(&orangefs_htable_ops_in_progress_lock); /* The client only asks to read one size buffer. */ return MAX_DEV_REQ_UPSIZE; error: /* * We were unable to copy the op data to the client. Put the op back in * list. If client has crashed, the op will be purged later when the * device is released. */ gossip_err("orangefs: Failed to copy data to user space\n"); spin_lock(&orangefs_request_list_lock); spin_lock(&cur_op->lock); if (likely(!op_state_given_up(cur_op))) { set_op_state_waiting(cur_op); gossip_debug(GOSSIP_DEV_DEBUG, "%s: 2 op:%s: op_state:%d: process:%s:\n", __func__, get_opname_string(cur_op), cur_op->op_state, current->comm); list_add(&cur_op->list, &orangefs_request_list); spin_unlock(&cur_op->lock); } else { spin_unlock(&cur_op->lock); complete(&cur_op->waitq); } spin_unlock(&orangefs_request_list_lock); return -EFAULT; } /* * Function for writev() callers into the device. * * Userspace should have written: * - __u32 version * - __u32 magic * - __u64 tag * - struct orangefs_downcall_s * - trailer buffer (in the case of READDIR operations) */ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, struct iov_iter *iter) { ssize_t ret; struct orangefs_kernel_op_s *op = NULL; struct { __u32 version; __u32 magic; __u64 tag; } head; int total = ret = iov_iter_count(iter); int downcall_size = sizeof(struct orangefs_downcall_s); int head_size = sizeof(head); gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n", __func__, total, ret); if (total < MAX_DEV_REQ_DOWNSIZE) { gossip_err("%s: total:%d: must be at least:%u:\n", __func__, total, (unsigned int) MAX_DEV_REQ_DOWNSIZE); return -EFAULT; } if (!copy_from_iter_full(&head, head_size, iter)) { gossip_err("%s: failed to copy head.\n", __func__); return -EFAULT; } if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) { gossip_err("%s: userspace claims version" "%d, minimum version required: %d.\n", __func__, head.version, ORANGEFS_MINIMUM_USERSPACE_VERSION); return -EPROTO; } if (head.magic != ORANGEFS_DEVREQ_MAGIC) { gossip_err("Error: Device magic number does not match.\n"); return -EPROTO; } if (!orangefs_userspace_version) { orangefs_userspace_version = head.version; } else if (orangefs_userspace_version != head.version) { gossip_err("Error: userspace version changes\n"); return -EPROTO; } /* remove the op from the in progress hash table */ op = orangefs_devreq_remove_op(head.tag); if (!op) { gossip_debug(GOSSIP_DEV_DEBUG, "%s: No one's waiting for tag %llu\n", __func__, llu(head.tag)); return ret; } if (!copy_from_iter_full(&op->downcall, downcall_size, iter)) { gossip_err("%s: failed to copy downcall.\n", __func__); goto Efault; } if (op->downcall.status) goto wakeup; /* * We've successfully peeled off the head and the downcall. * Something has gone awry if total doesn't equal the * sum of head_size, downcall_size and trailer_size. */ if ((head_size + downcall_size + op->downcall.trailer_size) != total) { gossip_err("%s: funky write, head_size:%d" ": downcall_size:%d: trailer_size:%lld" ": total size:%d:\n", __func__, head_size, downcall_size, op->downcall.trailer_size, total); goto Efault; } /* Only READDIR operations should have trailers. */ if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) && (op->downcall.trailer_size != 0)) { gossip_err("%s: %x operation with trailer.", __func__, op->downcall.type); goto Efault; } /* READDIR operations should always have trailers. */ if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) && (op->downcall.trailer_size == 0)) { gossip_err("%s: %x operation with no trailer.", __func__, op->downcall.type); goto Efault; } if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) goto wakeup; op->downcall.trailer_buf = vzalloc(op->downcall.trailer_size); if (!op->downcall.trailer_buf) goto Enomem; if (!copy_from_iter_full(op->downcall.trailer_buf, op->downcall.trailer_size, iter)) { gossip_err("%s: failed to copy trailer.\n", __func__); vfree(op->downcall.trailer_buf); goto Efault; } wakeup: /* * Return to vfs waitqueue, and back to service_operation * through wait_for_matching_downcall. */ spin_lock(&op->lock); if (unlikely(op_is_cancel(op))) { spin_unlock(&op->lock); put_cancel(op); } else if (unlikely(op_state_given_up(op))) { spin_unlock(&op->lock); complete(&op->waitq); } else { set_op_state_serviced(op); gossip_debug(GOSSIP_DEV_DEBUG, "%s: op:%s: op_state:%d: process:%s:\n", __func__, get_opname_string(op), op->op_state, current->comm); spin_unlock(&op->lock); } return ret; Efault: op->downcall.status = -(ORANGEFS_ERROR_BIT | 9); ret = -EFAULT; goto wakeup; Enomem: op->downcall.status = -(ORANGEFS_ERROR_BIT | 8); ret = -ENOMEM; goto wakeup; } /* * NOTE: gets called when the last reference to this device is dropped. * Using the open_access_count variable, we enforce a reference count * on this file so that it can be opened by only one process at a time. * the devreq_mutex is used to make sure all i/o has completed * before we call orangefs_bufmap_finalize, and similar such tricky * situations */ static int orangefs_devreq_release(struct inode *inode, struct file *file) { int unmounted = 0; gossip_debug(GOSSIP_DEV_DEBUG, "%s:pvfs2-client-core: exiting, closing device\n", __func__); mutex_lock(&devreq_mutex); orangefs_bufmap_finalize(); open_access_count = -1; unmounted = mark_all_pending_mounts(); gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n", (unmounted ? "UNMOUNTED" : "MOUNTED")); purge_waiting_ops(); purge_inprogress_ops(); orangefs_bufmap_run_down(); gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: device close complete\n"); open_access_count = 0; orangefs_userspace_version = 0; mutex_unlock(&devreq_mutex); return 0; } int is_daemon_in_service(void) { int in_service; /* * What this function does is checks if client-core is alive * based on the access count we maintain on the device. */ mutex_lock(&devreq_mutex); in_service = open_access_count == 1 ? 0 : -EIO; mutex_unlock(&devreq_mutex); return in_service; } bool __is_daemon_in_service(void) { return open_access_count == 1; } static inline long check_ioctl_command(unsigned int command) { /* Check for valid ioctl codes */ if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) { gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n", command, _IOC_TYPE(command), ORANGEFS_DEV_MAGIC); return -EINVAL; } /* and valid ioctl commands */ if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) { gossip_err("Invalid ioctl command number [%d >= %d]\n", _IOC_NR(command), ORANGEFS_DEV_MAXNR); return -ENOIOCTLCMD; } return 0; } static long dispatch_ioctl_command(unsigned int command, unsigned long arg) { static __s32 magic = ORANGEFS_DEVREQ_MAGIC; static __s32 max_up_size = MAX_DEV_REQ_UPSIZE; static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE; struct ORANGEFS_dev_map_desc user_desc; int ret = 0; int upstream_kmod = 1; struct orangefs_sb_info_s *orangefs_sb; /* mtmoore: add locking here */ switch (command) { case ORANGEFS_DEV_GET_MAGIC: return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ? -EIO : 0); case ORANGEFS_DEV_GET_MAX_UPSIZE: return ((put_user(max_up_size, (__s32 __user *) arg) == -EFAULT) ? -EIO : 0); case ORANGEFS_DEV_GET_MAX_DOWNSIZE: return ((put_user(max_down_size, (__s32 __user *) arg) == -EFAULT) ? -EIO : 0); case ORANGEFS_DEV_MAP: ret = copy_from_user(&user_desc, (struct ORANGEFS_dev_map_desc __user *) arg, sizeof(struct ORANGEFS_dev_map_desc)); /* WTF -EIO and not -EFAULT? */ return ret ? -EIO : orangefs_bufmap_initialize(&user_desc); case ORANGEFS_DEV_REMOUNT_ALL: gossip_debug(GOSSIP_DEV_DEBUG, "%s: got ORANGEFS_DEV_REMOUNT_ALL\n", __func__); /* * remount all mounted orangefs volumes to regain the lost * dynamic mount tables (if any) -- NOTE: this is done * without keeping the superblock list locked due to the * upcall/downcall waiting. also, the request mutex is * used to ensure that no operations will be serviced until * all of the remounts are serviced (to avoid ops between * mounts to fail) */ ret = mutex_lock_interruptible(&orangefs_request_mutex); if (ret < 0) return ret; gossip_debug(GOSSIP_DEV_DEBUG, "%s: priority remount in progress\n", __func__); spin_lock(&orangefs_superblocks_lock); list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { /* * We have to drop the spinlock, so entries can be * removed. They can't be freed, though, so we just * keep the forward pointers and zero the back ones - * that way we can get to the rest of the list. */ if (!orangefs_sb->list.prev) continue; gossip_debug(GOSSIP_DEV_DEBUG, "%s: Remounting SB %p\n", __func__, orangefs_sb); spin_unlock(&orangefs_superblocks_lock); ret = orangefs_remount(orangefs_sb); spin_lock(&orangefs_superblocks_lock); if (ret) { gossip_debug(GOSSIP_DEV_DEBUG, "SB %p remount failed\n", orangefs_sb); break; } } spin_unlock(&orangefs_superblocks_lock); gossip_debug(GOSSIP_DEV_DEBUG, "%s: priority remount complete\n", __func__); mutex_unlock(&orangefs_request_mutex); return ret; case ORANGEFS_DEV_UPSTREAM: ret = copy_to_user((void __user *)arg, &upstream_kmod, sizeof(upstream_kmod)); if (ret != 0) return -EIO; else return ret; case ORANGEFS_DEV_CLIENT_MASK: return orangefs_debugfs_new_client_mask((void __user *)arg); case ORANGEFS_DEV_CLIENT_STRING: return orangefs_debugfs_new_client_string((void __user *)arg); case ORANGEFS_DEV_DEBUG: return orangefs_debugfs_new_debug((void __user *)arg); default: return -ENOIOCTLCMD; } return -ENOIOCTLCMD; } static long orangefs_devreq_ioctl(struct file *file, unsigned int command, unsigned long arg) { long ret; /* Check for properly constructed commands */ ret = check_ioctl_command(command); if (ret < 0) return (int)ret; return (int)dispatch_ioctl_command(command, arg); } #ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ /* Compat structure for the ORANGEFS_DEV_MAP ioctl */ struct ORANGEFS_dev_map_desc32 { compat_uptr_t ptr; __s32 total_size; __s32 size; __s32 count; }; /* * 32 bit user-space apps' ioctl handlers when kernel modules * is compiled as a 64 bit one */ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long args) { long ret; /* Check for properly constructed commands */ ret = check_ioctl_command(cmd); if (ret < 0) return ret; if (cmd == ORANGEFS_DEV_MAP) { struct ORANGEFS_dev_map_desc desc; struct ORANGEFS_dev_map_desc32 d32; if (copy_from_user(&d32, (void __user *)args, sizeof(d32))) return -EFAULT; desc.ptr = compat_ptr(d32.ptr); desc.total_size = d32.total_size; desc.size = d32.size; desc.count = d32.count; return orangefs_bufmap_initialize(&desc); } /* no other ioctl requires translation */ return dispatch_ioctl_command(cmd, args); } #endif /* CONFIG_COMPAT is in .config */ static __poll_t orangefs_devreq_poll(struct file *file, struct poll_table_struct *poll_table) { __poll_t poll_revent_mask = 0; poll_wait(file, &orangefs_request_list_waitq, poll_table); if (!list_empty(&orangefs_request_list)) poll_revent_mask |= EPOLLIN; return poll_revent_mask; } /* the assigned character device major number */ static int orangefs_dev_major; static const struct file_operations orangefs_devreq_file_operations = { .owner = THIS_MODULE, .read = orangefs_devreq_read, .write_iter = orangefs_devreq_write_iter, .open = orangefs_devreq_open, .release = orangefs_devreq_release, .unlocked_ioctl = orangefs_devreq_ioctl, #ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ .compat_ioctl = orangefs_devreq_compat_ioctl, #endif .poll = orangefs_devreq_poll }; /* * Initialize orangefs device specific state: * Must be called at module load time only */ int orangefs_dev_init(void) { /* register orangefs-req device */ orangefs_dev_major = register_chrdev(0, ORANGEFS_REQDEVICE_NAME, &orangefs_devreq_file_operations); if (orangefs_dev_major < 0) { gossip_debug(GOSSIP_DEV_DEBUG, "Failed to register /dev/%s (error %d)\n", ORANGEFS_REQDEVICE_NAME, orangefs_dev_major); return orangefs_dev_major; } gossip_debug(GOSSIP_DEV_DEBUG, "*** /dev/%s character device registered ***\n", ORANGEFS_REQDEVICE_NAME); gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n", ORANGEFS_REQDEVICE_NAME, orangefs_dev_major); return 0; } void orangefs_dev_cleanup(void) { unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME); gossip_debug(GOSSIP_DEV_DEBUG, "*** /dev/%s character device unregistered ***\n", ORANGEFS_REQDEVICE_NAME); } |
7339 743 735 745 9561 37 18 36 4073 4068 7696 7695 7697 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/lockref.h> #if USE_CMPXCHG_LOCKREF /* * Note that the "cmpxchg()" reloads the "old" value for the * failure case. */ #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ int retry = 100; \ struct lockref old; \ BUILD_BUG_ON(sizeof(old) != 8); \ old.lock_count = READ_ONCE(lockref->lock_count); \ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old; \ CODE \ if (likely(try_cmpxchg64_relaxed(&lockref->lock_count, \ &old.lock_count, \ new.lock_count))) { \ SUCCESS; \ } \ if (!--retry) \ break; \ } \ } while (0) #else #define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) #endif /** * lockref_get - Increments reference count unconditionally * @lockref: pointer to lockref structure * * This operation is only valid if you already hold a reference * to the object, so you know the count cannot be zero. */ void lockref_get(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; , return; ); spin_lock(&lockref->lock); lockref->count++; spin_unlock(&lockref->lock); } EXPORT_SYMBOL(lockref_get); /** * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ int lockref_get_not_zero(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count++; if (old.count <= 0) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count > 0) { lockref->count++; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_get_not_zero); /** * lockref_put_not_zero - Decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count would become zero */ int lockref_put_not_zero(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count--; if (old.count <= 1) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count > 1) { lockref->count--; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_put_not_zero); /** * lockref_put_return - Decrement reference count if possible * @lockref: pointer to lockref structure * * Decrement the reference count and return the new value. * If the lockref was dead or locked, return an error. */ int lockref_put_return(struct lockref *lockref) { CMPXCHG_LOOP( new.count--; if (old.count <= 0) return -1; , return new.count; ); return -1; } EXPORT_SYMBOL(lockref_put_return); /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken */ int lockref_put_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count--; if (old.count <= 1) break; , return 1; ); spin_lock(&lockref->lock); if (lockref->count <= 1) return 0; lockref->count--; spin_unlock(&lockref->lock); return 1; } EXPORT_SYMBOL(lockref_put_or_lock); /** * lockref_mark_dead - mark lockref dead * @lockref: pointer to lockref structure */ void lockref_mark_dead(struct lockref *lockref) { assert_spin_locked(&lockref->lock); lockref->count = -128; } EXPORT_SYMBOL(lockref_mark_dead); /** * lockref_get_not_dead - Increments count unless the ref is dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if lockref was dead */ int lockref_get_not_dead(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count++; if (old.count < 0) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count >= 0) { lockref->count++; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_get_not_dead); |
1 1 1 2 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for EMS Trio Linker Plus II * * Copyright (c) 2010 Ignaz Forster <ignaz.forster@gmx.de> */ /* */ #include <linux/hid.h> #include <linux/input.h> #include <linux/module.h> #include "hid-ids.h" struct emsff_device { struct hid_report *report; }; static int emsff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct emsff_device *emsff = data; int weak, strong; weak = effect->u.rumble.weak_magnitude; strong = effect->u.rumble.strong_magnitude; dbg_hid("called with 0x%04x 0x%04x\n", strong, weak); weak = weak * 0xff / 0xffff; strong = strong * 0xff / 0xffff; emsff->report->field[0]->value[1] = weak; emsff->report->field[0]->value[2] = strong; dbg_hid("running with 0x%02x 0x%02x\n", strong, weak); hid_hw_request(hid, emsff->report, HID_REQ_SET_REPORT); return 0; } static int emsff_init(struct hid_device *hid) { struct emsff_device *emsff; struct hid_report *report; struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev; int error; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_first_entry(&hid->inputs, struct hid_input, list); dev = hidinput->input; if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; } report = list_first_entry(report_list, struct hid_report, list); if (report->maxfield < 1) { hid_err(hid, "no fields in the report\n"); return -ENODEV; } if (report->field[0]->report_count < 7) { hid_err(hid, "not enough values in the field\n"); return -ENODEV; } emsff = kzalloc(sizeof(struct emsff_device), GFP_KERNEL); if (!emsff) return -ENOMEM; set_bit(FF_RUMBLE, dev->ffbit); error = input_ff_create_memless(dev, emsff, emsff_play); if (error) { kfree(emsff); return error; } emsff->report = report; emsff->report->field[0]->value[0] = 0x01; emsff->report->field[0]->value[1] = 0x00; emsff->report->field[0]->value[2] = 0x00; emsff->report->field[0]->value[3] = 0x00; emsff->report->field[0]->value[4] = 0x00; emsff->report->field[0]->value[5] = 0x00; emsff->report->field[0]->value[6] = 0x00; hid_hw_request(hid, emsff->report, HID_REQ_SET_REPORT); hid_info(hid, "force feedback for EMS based devices by Ignaz Forster <ignaz.forster@gmx.de>\n"); return 0; } static int ems_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } ret = emsff_init(hdev); if (ret) { dev_err(&hdev->dev, "force feedback init failed\n"); hid_hw_stop(hdev); goto err; } return 0; err: return ret; } static const struct hid_device_id ems_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II) }, { } }; MODULE_DEVICE_TABLE(hid, ems_devices); static struct hid_driver ems_driver = { .name = "hkems", .id_table = ems_devices, .probe = ems_probe, }; module_hid_driver(ems_driver); MODULE_DESCRIPTION("Force feedback support for EMS Trio Linker Plus II"); MODULE_LICENSE("GPL"); |
3 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match FRAG parameters. */ /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_frag.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: IPv6 fragment match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); /* Returns 1 if the id is matched by the range, 0 otherwise */ static inline bool id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { bool r; pr_debug("id_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ', min, id, max); r = (id >= min && id <= max) ^ invert; pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } static bool frag_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct frag_hdr _frag; const struct frag_hdr *fh; const struct ip6t_frag *fraginfo = par->matchinfo; unsigned int ptr = 0; int err; err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; return false; } fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); if (fh == NULL) { par->hotdrop = true; return false; } pr_debug("INFO %04X ", fh->frag_off); pr_debug("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); pr_debug("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); pr_debug("MF %04X ", fh->frag_off & htons(IP6_MF)); pr_debug("ID %u %08X\n", ntohl(fh->identification), ntohl(fh->identification)); pr_debug("IPv6 FRAG id %02X ", id_match(fraginfo->ids[0], fraginfo->ids[1], ntohl(fh->identification), !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))); pr_debug("res %02X %02X%04X %02X ", fraginfo->flags & IP6T_FRAG_RES, fh->reserved, ntohs(fh->frag_off) & 0x6, !((fraginfo->flags & IP6T_FRAG_RES) && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); pr_debug("first %02X %02X %02X ", fraginfo->flags & IP6T_FRAG_FST, ntohs(fh->frag_off) & ~0x7, !((fraginfo->flags & IP6T_FRAG_FST) && (ntohs(fh->frag_off) & ~0x7))); pr_debug("mf %02X %02X %02X ", fraginfo->flags & IP6T_FRAG_MF, ntohs(fh->frag_off) & IP6_MF, !((fraginfo->flags & IP6T_FRAG_MF) && !((ntohs(fh->frag_off) & IP6_MF)))); pr_debug("last %02X %02X %02X\n", fraginfo->flags & IP6T_FRAG_NMF, ntohs(fh->frag_off) & IP6_MF, !((fraginfo->flags & IP6T_FRAG_NMF) && (ntohs(fh->frag_off) & IP6_MF))); return id_match(fraginfo->ids[0], fraginfo->ids[1], ntohl(fh->identification), !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) && !((fraginfo->flags & IP6T_FRAG_RES) && (fh->reserved || (ntohs(fh->frag_off) & 0x6))) && !((fraginfo->flags & IP6T_FRAG_FST) && (ntohs(fh->frag_off) & ~0x7)) && !((fraginfo->flags & IP6T_FRAG_MF) && !(ntohs(fh->frag_off) & IP6_MF)) && !((fraginfo->flags & IP6T_FRAG_NMF) && (ntohs(fh->frag_off) & IP6_MF)); } static int frag_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_frag *fraginfo = par->matchinfo; if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { pr_debug("unknown flags %X\n", fraginfo->invflags); return -EINVAL; } return 0; } static struct xt_match frag_mt6_reg __read_mostly = { .name = "frag", .family = NFPROTO_IPV6, .match = frag_mt6, .matchsize = sizeof(struct ip6t_frag), .checkentry = frag_mt6_check, .me = THIS_MODULE, }; static int __init frag_mt6_init(void) { return xt_register_match(&frag_mt6_reg); } static void __exit frag_mt6_exit(void) { xt_unregister_match(&frag_mt6_reg); } module_init(frag_mt6_init); module_exit(frag_mt6_exit); |
1100 687 1328 1216 8 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMAN_H #define _LINUX_MMAN_H #include <linux/mm.h> #include <linux/percpu_counter.h> #include <linux/atomic.h> #include <uapi/linux/mman.h> /* * Arrange for legacy / undefined architecture specific flags to be * ignored by mmap handling code. */ #ifndef MAP_32BIT #define MAP_32BIT 0 #endif #ifndef MAP_ABOVE4G #define MAP_ABOVE4G 0 #endif #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB 0 #endif #ifndef MAP_HUGE_1GB #define MAP_HUGE_1GB 0 #endif #ifndef MAP_UNINITIALIZED #define MAP_UNINITIALIZED 0 #endif #ifndef MAP_SYNC #define MAP_SYNC 0 #endif /* * The historical set of flags that all mmap implementations implicitly * support when a ->mmap_validate() op is not provided in file_operations. * * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the * kernel. */ #define LEGACY_MAP_MASK (MAP_SHARED \ | MAP_PRIVATE \ | MAP_FIXED \ | MAP_ANONYMOUS \ | MAP_DENYWRITE \ | MAP_EXECUTABLE \ | MAP_UNINITIALIZED \ | MAP_GROWSDOWN \ | MAP_LOCKED \ | MAP_NORESERVE \ | MAP_POPULATE \ | MAP_NONBLOCK \ | MAP_STACK \ | MAP_HUGETLB \ | MAP_32BIT \ | MAP_ABOVE4G \ | MAP_HUGE_2MB \ | MAP_HUGE_1GB) extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; extern struct percpu_counter vm_committed_as; #ifdef CONFIG_SMP extern s32 vm_committed_as_batch; extern void mm_compute_batch(int overcommit_policy); #else #define vm_committed_as_batch 0 static inline void mm_compute_batch(int overcommit_policy) { } #endif unsigned long vm_memory_committed(void); static inline void vm_acct_memory(long pages) { percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch); } static inline void vm_unacct_memory(long pages) { vm_acct_memory(-pages); } /* * Allow architectures to handle additional protection and flag bits. The * overriding macros must be defined in the arch-specific asm/mman.h file. */ #ifndef arch_calc_vm_prot_bits #define arch_calc_vm_prot_bits(prot, pkey) 0 #endif #ifndef arch_calc_vm_flag_bits #define arch_calc_vm_flag_bits(flags) 0 #endif #ifndef arch_validate_prot /* * This is called from mprotect(). PROT_GROWSDOWN and PROT_GROWSUP have * already been masked out. * * Returns true if the prot flags are valid */ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) { return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0; } #define arch_validate_prot arch_validate_prot #endif #ifndef arch_validate_flags /* * This is called from mmap() and mprotect() with the updated vma->vm_flags. * * Returns true if the VM_* flags are valid. */ static inline bool arch_validate_flags(unsigned long flags) { return true; } #define arch_validate_flags arch_validate_flags #endif /* * Optimisation macro. It is equivalent to: * (x & bit1) ? bit2 : 0 * but this version is faster. * ("bit1" and "bit2" must be single bits) */ #define _calc_vm_trans(x, bit1, bit2) \ ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ : ((x) & (bit1)) / ((bit1) / (bit2)))) /* * Combine the mmap "prot" argument into "vm_flags" used internally. */ static inline unsigned long calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { return _calc_vm_trans(prot, PROT_READ, VM_READ ) | _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) | _calc_vm_trans(prot, PROT_EXEC, VM_EXEC) | arch_calc_vm_prot_bits(prot, pkey); } /* * Combine the mmap "flags" argument into "vm_flags" used internally. */ static inline unsigned long calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } unsigned long vm_commit_limit(void); #ifndef arch_memory_deny_write_exec_supported static inline bool arch_memory_deny_write_exec_supported(void) { return true; } #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported #endif /* * Denies creating a writable executable mapping or gaining executable permissions. * * This denies the following: * * a) mmap(PROT_WRITE | PROT_EXEC) * * b) mmap(PROT_WRITE) * mprotect(PROT_EXEC) * * c) mmap(PROT_WRITE) * mprotect(PROT_READ) * mprotect(PROT_EXEC) * * But allows the following: * * d) mmap(PROT_READ | PROT_EXEC) * mmap(PROT_READ | PROT_EXEC | PROT_BTI) */ static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags) { if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) return false; if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE)) return true; if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC)) return true; return false; } #endif /* _LINUX_MMAN_H */ |
1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | // SPDX-License-Identifier: GPL-2.0 /* * (C) 2001 Clemson University and The University of Chicago * * See COPYING in top-level directory. */ #include "protocol.h" #include "orangefs-kernel.h" /* tags assigned to kernel upcall operations */ static __u64 next_tag_value; static DEFINE_SPINLOCK(next_tag_value_lock); /* the orangefs memory caches */ /* a cache for orangefs upcall/downcall operations */ static struct kmem_cache *op_cache; int op_cache_initialize(void) { op_cache = kmem_cache_create("orangefs_op_cache", sizeof(struct orangefs_kernel_op_s), 0, 0, NULL); if (!op_cache) { gossip_err("Cannot create orangefs_op_cache\n"); return -ENOMEM; } /* initialize our atomic tag counter */ spin_lock(&next_tag_value_lock); next_tag_value = 100; spin_unlock(&next_tag_value_lock); return 0; } int op_cache_finalize(void) { kmem_cache_destroy(op_cache); return 0; } char *get_opname_string(struct orangefs_kernel_op_s *new_op) { if (new_op) { __s32 type = new_op->upcall.type; if (type == ORANGEFS_VFS_OP_FILE_IO) return "OP_FILE_IO"; else if (type == ORANGEFS_VFS_OP_LOOKUP) return "OP_LOOKUP"; else if (type == ORANGEFS_VFS_OP_CREATE) return "OP_CREATE"; else if (type == ORANGEFS_VFS_OP_GETATTR) return "OP_GETATTR"; else if (type == ORANGEFS_VFS_OP_REMOVE) return "OP_REMOVE"; else if (type == ORANGEFS_VFS_OP_MKDIR) return "OP_MKDIR"; else if (type == ORANGEFS_VFS_OP_READDIR) return "OP_READDIR"; else if (type == ORANGEFS_VFS_OP_READDIRPLUS) return "OP_READDIRPLUS"; else if (type == ORANGEFS_VFS_OP_SETATTR) return "OP_SETATTR"; else if (type == ORANGEFS_VFS_OP_SYMLINK) return "OP_SYMLINK"; else if (type == ORANGEFS_VFS_OP_RENAME) return "OP_RENAME"; else if (type == ORANGEFS_VFS_OP_STATFS) return "OP_STATFS"; else if (type == ORANGEFS_VFS_OP_TRUNCATE) return "OP_TRUNCATE"; else if (type == ORANGEFS_VFS_OP_RA_FLUSH) return "OP_RA_FLUSH"; else if (type == ORANGEFS_VFS_OP_FS_MOUNT) return "OP_FS_MOUNT"; else if (type == ORANGEFS_VFS_OP_FS_UMOUNT) return "OP_FS_UMOUNT"; else if (type == ORANGEFS_VFS_OP_GETXATTR) return "OP_GETXATTR"; else if (type == ORANGEFS_VFS_OP_SETXATTR) return "OP_SETXATTR"; else if (type == ORANGEFS_VFS_OP_LISTXATTR) return "OP_LISTXATTR"; else if (type == ORANGEFS_VFS_OP_REMOVEXATTR) return "OP_REMOVEXATTR"; else if (type == ORANGEFS_VFS_OP_PARAM) return "OP_PARAM"; else if (type == ORANGEFS_VFS_OP_PERF_COUNT) return "OP_PERF_COUNT"; else if (type == ORANGEFS_VFS_OP_CANCEL) return "OP_CANCEL"; else if (type == ORANGEFS_VFS_OP_FSYNC) return "OP_FSYNC"; else if (type == ORANGEFS_VFS_OP_FSKEY) return "OP_FSKEY"; else if (type == ORANGEFS_VFS_OP_FEATURES) return "OP_FEATURES"; } return "OP_UNKNOWN?"; } void orangefs_new_tag(struct orangefs_kernel_op_s *op) { spin_lock(&next_tag_value_lock); op->tag = next_tag_value++; if (next_tag_value == 0) next_tag_value = 100; spin_unlock(&next_tag_value_lock); } struct orangefs_kernel_op_s *op_alloc(__s32 type) { struct orangefs_kernel_op_s *new_op = NULL; new_op = kmem_cache_zalloc(op_cache, GFP_KERNEL); if (new_op) { INIT_LIST_HEAD(&new_op->list); spin_lock_init(&new_op->lock); init_completion(&new_op->waitq); new_op->upcall.type = ORANGEFS_VFS_OP_INVALID; new_op->downcall.type = ORANGEFS_VFS_OP_INVALID; new_op->downcall.status = -1; new_op->op_state = OP_VFS_STATE_UNKNOWN; /* initialize the op specific tag and upcall credentials */ orangefs_new_tag(new_op); new_op->upcall.type = type; new_op->attempts = 0; gossip_debug(GOSSIP_CACHE_DEBUG, "Alloced OP (%p: %llu %s)\n", new_op, llu(new_op->tag), get_opname_string(new_op)); new_op->upcall.uid = from_kuid(&init_user_ns, current_fsuid()); new_op->upcall.gid = from_kgid(&init_user_ns, current_fsgid()); } else { gossip_err("op_alloc: kmem_cache_zalloc failed!\n"); } return new_op; } void op_release(struct orangefs_kernel_op_s *orangefs_op) { if (orangefs_op) { gossip_debug(GOSSIP_CACHE_DEBUG, "Releasing OP (%p: %llu)\n", orangefs_op, llu(orangefs_op->tag)); kmem_cache_free(op_cache, orangefs_op); } else { gossip_err("NULL pointer in op_release\n"); } } |
196 197 197 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | // SPDX-License-Identifier: GPL-2.0 /* Lock down the kernel * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #include <linux/security.h> #include <linux/export.h> #include <linux/lsm_hooks.h> #include <uapi/linux/lsm.h> static enum lockdown_reason kernel_locked_down; static const enum lockdown_reason lockdown_levels[] = {LOCKDOWN_NONE, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_CONFIDENTIALITY_MAX}; /* * Put the kernel into lock-down mode. */ static int lock_kernel_down(const char *where, enum lockdown_reason level) { if (kernel_locked_down >= level) return -EPERM; kernel_locked_down = level; pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n", where); return 0; } static int __init lockdown_param(char *level) { if (!level) return -EINVAL; if (strcmp(level, "integrity") == 0) lock_kernel_down("command line", LOCKDOWN_INTEGRITY_MAX); else if (strcmp(level, "confidentiality") == 0) lock_kernel_down("command line", LOCKDOWN_CONFIDENTIALITY_MAX); else return -EINVAL; return 0; } early_param("lockdown", lockdown_param); /** * lockdown_is_locked_down - Find out if the kernel is locked down * @what: Tag to use in notice generated if lockdown is in effect */ static int lockdown_is_locked_down(enum lockdown_reason what) { if (WARN(what >= LOCKDOWN_CONFIDENTIALITY_MAX, "Invalid lockdown reason")) return -EPERM; if (kernel_locked_down >= what) { if (lockdown_reasons[what]) pr_notice_ratelimited("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n", current->comm, lockdown_reasons[what]); return -EPERM; } return 0; } static struct security_hook_list lockdown_hooks[] __ro_after_init = { LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), }; static const struct lsm_id lockdown_lsmid = { .name = "lockdown", .id = LSM_ID_LOCKDOWN, }; static int __init lockdown_lsm_init(void) { #if defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY) lock_kernel_down("Kernel configuration", LOCKDOWN_INTEGRITY_MAX); #elif defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY) lock_kernel_down("Kernel configuration", LOCKDOWN_CONFIDENTIALITY_MAX); #endif security_add_hooks(lockdown_hooks, ARRAY_SIZE(lockdown_hooks), &lockdown_lsmid); return 0; } static ssize_t lockdown_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { char temp[80]; int i, offset = 0; for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) { enum lockdown_reason level = lockdown_levels[i]; if (lockdown_reasons[level]) { const char *label = lockdown_reasons[level]; if (kernel_locked_down == level) offset += sprintf(temp+offset, "[%s] ", label); else offset += sprintf(temp+offset, "%s ", label); } } /* Convert the last space to a newline if needed. */ if (offset > 0) temp[offset-1] = '\n'; return simple_read_from_buffer(buf, count, ppos, temp, strlen(temp)); } static ssize_t lockdown_write(struct file *file, const char __user *buf, size_t n, loff_t *ppos) { char *state; int i, len, err = -EINVAL; state = memdup_user_nul(buf, n); if (IS_ERR(state)) return PTR_ERR(state); len = strlen(state); if (len && state[len-1] == '\n') { state[len-1] = '\0'; len--; } for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) { enum lockdown_reason level = lockdown_levels[i]; const char *label = lockdown_reasons[level]; if (label && !strcmp(state, label)) err = lock_kernel_down("securityfs", level); } kfree(state); return err ? err : n; } static const struct file_operations lockdown_ops = { .read = lockdown_read, .write = lockdown_write, }; static int __init lockdown_secfs_init(void) { struct dentry *dentry; dentry = securityfs_create_file("lockdown", 0644, NULL, NULL, &lockdown_ops); return PTR_ERR_OR_ZERO(dentry); } core_initcall(lockdown_secfs_init); #ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY DEFINE_EARLY_LSM(lockdown) = { #else DEFINE_LSM(lockdown) = { #endif .name = "lockdown", .init = lockdown_lsm_init, }; |
52 23 23 23 18 4 21 20 20 1 20 18 20 11 4 7 5 2 1 21 21 1 21 20 21 2 2 21 2 20 19 14 19 6 20 1 1 1 1 1 8 4 4 2 1 4 3 3 1 3 3 2 1 6 3 6 2 1 1 1 1 1 1 1 2 1 92 65 66 66 66 65 65 52 65 90 86 90 8 8 5 32 32 32 24 23 23 21 4 21 4 21 5 5 5 5 3 3 24 25 24 24 12 12 3 18 16 25 24 24 6 24 24 15 24 6 5 1 1 6 32 6 6 6 6 6 6 6 6 6 6 3 6 31 31 30 3 3 3 3 3 3 32 2 2 2 2 2 2 2 2 2 2 24 24 91 91 58 32 2 2 2 2 91 32 32 25 31 32 32 32 31 32 2 31 26 2 26 15 15 1 40 29 29 29 13 13 3 1 1 1 1 12 21 12 5 9 6 6 16 19 6 19 19 17 19 19 19 16 19 19 24 24 24 24 3 3 3 96 96 52 52 52 52 96 48 48 30 21 3 3 3 3 3 3 3 2 3 3 3 3 3 19 19 19 18 1 1 99 99 93 57 7 7 7 7 7 100 101 20 20 20 15 12 3 15 20 20 6 20 20 15 15 15 20 96 91 23 21 4 88 90 91 23 2 21 18 4 4 4 14 8 8 5 5 8 6 6 2 6 6 6 5 1 1 6 6 6 2 2 1 2 2 2 2 27 2 2 101 3 99 101 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 | // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2017 - 2019, Intel Corporation. */ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> #include <crypto/sha2.h> #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" #include "mib.h" #include <trace/events/mptcp.h> static bool mptcp_cap_flag_sha256(u8 flags) { return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; } static void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, int opsize, struct mptcp_options_received *mp_opt) { u8 subtype = *ptr >> 4; int expected_opsize; u16 subopt; u8 version; u8 flags; u8 i; switch (subtype) { case MPTCPOPT_MP_CAPABLE: /* strict size checking */ if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { if (skb->len > tcp_hdr(skb)->doff << 2) expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA; else expected_opsize = TCPOLEN_MPTCP_MPC_ACK; subopt = OPTION_MPTCP_MPC_ACK; } else { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) { expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK; subopt = OPTION_MPTCP_MPC_SYNACK; } else { expected_opsize = TCPOLEN_MPTCP_MPC_SYN; subopt = OPTION_MPTCP_MPC_SYN; } } /* Cfr RFC 8684 Section 3.3.0: * If a checksum is present but its use had * not been negotiated in the MP_CAPABLE handshake, the receiver MUST * close the subflow with a RST, as it is not behaving as negotiated. * If a checksum is not present when its use has been negotiated, the * receiver MUST close the subflow with a RST, as it is considered * broken * We parse even option with mismatching csum presence, so that * later in subflow_data_ready we can trigger the reset. */ if (opsize != expected_opsize && (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA || opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break; /* try to be gentle vs future versions on the initial syn */ version = *ptr++ & MPTCP_VERSION_MASK; if (opsize != TCPOLEN_MPTCP_MPC_SYN) { if (version != MPTCP_SUPPORTED_VERSION) break; } else if (version < MPTCP_SUPPORTED_VERSION) { break; } flags = *ptr++; if (!mptcp_cap_flag_sha256(flags) || (flags & MPTCP_CAP_EXTENSIBILITY)) break; /* RFC 6824, Section 3.1: * "For the Checksum Required bit (labeled "A"), if either * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0." */ if (flags & MPTCP_CAP_CHECKSUM_REQD) mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0); mp_opt->suboptions |= subopt; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { mp_opt->sndr_key = get_unaligned_be64(ptr); ptr += 8; } if (opsize >= TCPOLEN_MPTCP_MPC_ACK) { mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) { /* Section 3.1.: * "the data parameters in a MP_CAPABLE are semantically * equivalent to those in a DSS option and can be used * interchangeably." */ mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; mp_opt->use_ack = 0; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { mp_opt->csum = get_unaligned((__force __sum16 *)ptr); mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u\n", version, flags, opsize, mp_opt->sndr_key, mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; case MPTCPOPT_MP_JOIN: if (opsize == TCPOLEN_MPTCP_MPJ_SYN) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYN; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->token = get_unaligned_be32(ptr); ptr += 4; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->token, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYNACK; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->thmac = get_unaligned_be64(ptr); ptr += 8; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->thmac, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK; ptr += 2; memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); pr_debug("MP_JOIN hmac\n"); } break; case MPTCPOPT_DSS: pr_debug("DSS\n"); ptr++; /* we must clear 'mpc_map' be able to detect MP_CAPABLE * map vs DSS map in mptcp_incoming_options(), and reconstruct * map info accordingly */ mp_opt->mpc_map = 0; flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0; mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0; mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK); pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d\n", mp_opt->data_fin, mp_opt->dsn64, mp_opt->use_map, mp_opt->ack64, mp_opt->use_ack); expected_opsize = TCPOLEN_MPTCP_DSS_BASE; if (mp_opt->use_ack) { if (mp_opt->ack64) expected_opsize += TCPOLEN_MPTCP_DSS_ACK64; else expected_opsize += TCPOLEN_MPTCP_DSS_ACK32; } if (mp_opt->use_map) { if (mp_opt->dsn64) expected_opsize += TCPOLEN_MPTCP_DSS_MAP64; else expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; } /* Always parse any csum presence combination, we will enforce * RFC 8684 Section 3.3.0 checks later in subflow_data_ready */ if (opsize != expected_opsize && opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) break; mp_opt->suboptions |= OPTION_MPTCP_DSS; if (mp_opt->use_ack) { if (mp_opt->ack64) { mp_opt->data_ack = get_unaligned_be64(ptr); ptr += 8; } else { mp_opt->data_ack = get_unaligned_be32(ptr); ptr += 4; } pr_debug("data_ack=%llu\n", mp_opt->data_ack); } if (mp_opt->use_map) { if (mp_opt->dsn64) { mp_opt->data_seq = get_unaligned_be64(ptr); ptr += 8; } else { mp_opt->data_seq = get_unaligned_be32(ptr); ptr += 4; } mp_opt->subflow_seq = get_unaligned_be32(ptr); ptr += 4; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; mp_opt->csum = get_unaligned((__force __sum16 *)ptr); ptr += 2; } pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", mp_opt->data_seq, mp_opt->subflow_seq, mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD), mp_opt->csum); } break; case MPTCPOPT_ADD_ADDR: mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO; if (!mp_opt->echo) { if (opsize == TCPOLEN_MPTCP_ADD_ADDR || opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT) mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 || opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT) mp_opt->addr.family = AF_INET6; #endif else break; } else { if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) mp_opt->addr.family = AF_INET6; #endif else break; } mp_opt->suboptions |= OPTION_MPTCP_ADD_ADDR; mp_opt->addr.id = *ptr++; mp_opt->addr.port = 0; mp_opt->ahmac = 0; if (mp_opt->addr.family == AF_INET) { memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4); ptr += 4; if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) { mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else { memcpy(mp_opt->addr.addr6.s6_addr, (u8 *)ptr, 16); ptr += 16; if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) { mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } #endif if (!mp_opt->echo) { mp_opt->ahmac = get_unaligned_be64(ptr); ptr += 8; } pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d\n", (mp_opt->addr.family == AF_INET6) ? "6" : "", mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port)); break; case MPTCPOPT_RM_ADDR: if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 || opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX) break; ptr++; mp_opt->suboptions |= OPTION_MPTCP_RM_ADDR; mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; for (i = 0; i < mp_opt->rm_list.nr; i++) mp_opt->rm_list.ids[i] = *ptr++; pr_debug("RM_ADDR: rm_list_nr=%d\n", mp_opt->rm_list.nr); break; case MPTCPOPT_MP_PRIO: if (opsize != TCPOLEN_MPTCP_PRIO) break; mp_opt->suboptions |= OPTION_MPTCP_PRIO; mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; pr_debug("MP_PRIO: prio=%d\n", mp_opt->backup); break; case MPTCPOPT_MP_FASTCLOSE: if (opsize != TCPOLEN_MPTCP_FASTCLOSE) break; ptr += 2; mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE; pr_debug("MP_FASTCLOSE: recv_key=%llu\n", mp_opt->rcvr_key); break; case MPTCPOPT_RST: if (opsize != TCPOLEN_MPTCP_RST) break; if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) break; mp_opt->suboptions |= OPTION_MPTCP_RST; flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; pr_debug("MP_RST: transient=%u reason=%u\n", mp_opt->reset_transient, mp_opt->reset_reason); break; case MPTCPOPT_MP_FAIL: if (opsize != TCPOLEN_MPTCP_FAIL) break; ptr += 2; mp_opt->suboptions |= OPTION_MPTCP_FAIL; mp_opt->fail_seq = get_unaligned_be64(ptr); pr_debug("MP_FAIL: data_seq=%llu\n", mp_opt->fail_seq); break; default: break; } } void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; /* initialize option status */ mp_opt->suboptions = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); while (length > 0) { int opcode = *ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: return; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; continue; default: if (length < 2) return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_MPTCP) mptcp_parse_option(skb, ptr, opsize, mp_opt); ptr += opsize - 2; length -= opsize; } } } bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); /* we will use snd_isn to detect first pkt [re]transmission * in mptcp_established_options_mp() */ subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { opts->suboptions = OPTION_MPTCP_MPC_SYN; opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { pr_debug("remote_token=%u, nonce=%u\n", subflow->remote_token, subflow->local_nonce); opts->suboptions = OPTION_MPTCP_MPJ_SYN; opts->join_id = subflow->local_id; opts->token = subflow->remote_token; opts->nonce = subflow->local_nonce; opts->backup = subflow->request_bkup; *size = TCPOLEN_MPTCP_MPJ_SYN; return true; } return false; } static void clear_3rdack_retransmission(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); sk_stop_timer(sk, &icsk->icsk_delack_timer); icsk->icsk_ack.timeout = 0; icsk->icsk_ack.ato = 0; icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER); } static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, bool snd_data_fin_enable, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; u8 len; /* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so * tell the caller to defer the estimate to * mptcp_established_options_dss(), which will reserve enough space. */ if (!skb) return false; /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ if (subflow->fully_established || snd_data_fin_enable || subflow->snd_isn != TCP_SKB_CB(skb)->seq || sk->sk_state != TCP_ESTABLISHED) return false; if (subflow->mp_capable) { mpext = mptcp_get_ext(skb); data_len = mpext ? mpext->data_len : 0; /* we will check ops->data_len in mptcp_write_options() to * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and * TCPOLEN_MPTCP_MPC_ACK */ opts->data_len = data_len; opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; opts->csum_reqd = READ_ONCE(msk->csum_enabled); opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); /* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data */ if (data_len > 0) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) { /* we need to propagate more info to csum the pseudo hdr */ opts->data_seq = mpext->data_seq; opts->subflow_seq = mpext->subflow_seq; opts->csum = mpext->csum; len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *size = ALIGN(len, 4); } else { *size = TCPOLEN_MPTCP_MPC_ACK; } pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d\n", subflow, subflow->local_key, subflow->remote_key, data_len); return true; } else if (subflow->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_ACK; memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN); *size = TCPOLEN_MPTCP_MPJ_ACK; pr_debug("subflow=%p\n", subflow); /* we can use the full delegate action helper only from BH context * If we are in process context - sk is flushing the backlog at * socket lock release time - just set the appropriate flag, will * be handled by the release callback */ if (sock_owned_by_user(sk)) set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status); else mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK); return true; } return false; } static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_ext *ext) { /* The write_seq value has already been incremented, so the actual * sequence number for the DATA_FIN is one less. */ u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1; if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped */ ext->data_fin = 1; ext->use_map = 1; ext->dsn64 = 1; ext->data_seq = data_fin_tx_seq; ext->subflow_seq = 0; ext->data_len = 1; } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) { /* If there's an existing DSS mapping and it is the * final mapping, DATA_FIN consumes 1 additional byte of * mapping space. */ ext->data_fin = 1; ext->data_len++; } } static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool snd_data_fin_enable, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int dss_size = 0; struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; u64 ack_seq; opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; if (mpext) { if (opts->csum_reqd) map_size += TCPOLEN_MPTCP_DSS_CHECKSUM; opts->ext_copy = *mpext; } dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); opts->suboptions = OPTION_MPTCP_DSS; ret = true; } /* passive sockets msk will set the 'can_ack' after accept(), even * if the first subflow may have the already the remote key handy */ opts->ext_copy.use_ack = 0; if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; } ack_seq = READ_ONCE(msk->ack_seq); if (READ_ONCE(msk->use_64bit_ack)) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; opts->ext_copy.data_ack = ack_seq; opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; opts->ext_copy.data_ack32 = (uint32_t)ack_seq; opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; opts->suboptions = OPTION_MPTCP_DSS; WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) ack_size += TCPOLEN_MPTCP_DSS_BASE; dss_size += ack_size; *size = ALIGN(dss_size, 4); return true; } static u64 add_addr_generate_hmac(u64 key1, u64 key2, struct mptcp_addr_info *addr) { u16 port = ntohs(addr->port); u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; int i = 0; msg[i++] = addr->id; if (addr->family == AF_INET) { memcpy(&msg[i], &addr->addr.s_addr, 4); i += 4; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->family == AF_INET6) { memcpy(&msg[i], &addr->addr6.s6_addr, 16); i += 16; } #endif msg[i++] = port >> 8; msg[i++] = port & 0xFF; mptcp_crypto_hmac_sha(key1, key2, msg, i, hmac); return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); bool drop_other_suboptions = false; unsigned int opt_size = *size; bool echo; int len; /* add addr will strip the existing options, be sure to avoid breaking * MPC/MPJ handshakes */ if (!mptcp_pm_should_add_signal(msk) || (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr, &echo, &drop_other_suboptions)) return false; if (drop_other_suboptions) remaining += opt_size; len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port); if (remaining < len) return false; *size = len; if (drop_other_suboptions) { pr_debug("drop other suboptions\n"); opts->suboptions = 0; /* note that e.g. DSS could have written into the memory * aliased by ahmac, we must reset the field here * to avoid appending the hmac even for ADD_ADDR echo * options */ opts->ahmac = 0; *size -= opt_size; } opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key), READ_ONCE(msk->remote_key), &opts->addr); } else { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); return true; } static bool mptcp_established_options_rm_addr(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_rm_list rm_list; int i, len; if (!mptcp_pm_should_rm_signal(msk) || !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list))) return false; len = mptcp_rm_addr_len(&rm_list); if (len < 0) return false; if (remaining < len) return false; *size = len; opts->suboptions |= OPTION_MPTCP_RM_ADDR; opts->rm_list = rm_list; for (i = 0; i < opts->rm_list.nr; i++) pr_debug("rm_list_ids[%d]=%d\n", i, opts->rm_list.ids[i]); MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } static bool mptcp_established_options_mp_prio(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); /* can't send MP_PRIO with MPC, as they share the same option space: * 'backup'. Also it makes no sense at all */ if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC)) return false; /* account for the trailing 'nop' option */ if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN) return false; *size = TCPOLEN_MPTCP_PRIO_ALIGN; opts->suboptions |= OPTION_MPTCP_PRIO; opts->backup = subflow->request_bkup; pr_debug("prio=%d\n", opts->backup); return true; } static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); if (remaining < TCPOLEN_MPTCP_RST) return false; *size = TCPOLEN_MPTCP_RST; opts->suboptions |= OPTION_MPTCP_RST; opts->reset_transient = subflow->reset_transient; opts->reset_reason = subflow->reset_reason; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); return true; } static bool mptcp_established_options_fastclose(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); if (likely(!subflow->send_fastclose)) return false; if (remaining < TCPOLEN_MPTCP_FASTCLOSE) return false; *size = TCPOLEN_MPTCP_FASTCLOSE; opts->suboptions |= OPTION_MPTCP_FASTCLOSE; opts->rcvr_key = READ_ONCE(msk->remote_key); pr_debug("FASTCLOSE key=%llu\n", opts->rcvr_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } static bool mptcp_established_options_mp_fail(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); if (likely(!subflow->send_mp_fail)) return false; if (remaining < TCPOLEN_MPTCP_FAIL) return false; *size = TCPOLEN_MPTCP_FAIL; opts->suboptions |= OPTION_MPTCP_FAIL; opts->fail_seq = subflow->map_seq; pr_debug("MP_FAIL fail_seq=%llu\n", opts->fail_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; } bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int opt_size = 0; bool snd_data_fin; bool ret = false; opts->suboptions = 0; if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb))) return false; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) || mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; } /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; } return true; } snd_data_fin = mptcp_data_fin_enabled(msk); if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, opts)) ret = true; else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, opts)) { unsigned int mp_fail_size; ret = true; if (mptcp_established_options_mp_fail(sk, &mp_fail_size, remaining - opt_size, opts)) { *size += opt_size + mp_fail_size; remaining -= opt_size - mp_fail_size; return true; } } /* we reserved enough space for the above options, and exceeding the * TCP option space would be fatal */ if (WARN_ON_ONCE(opt_size > remaining)) return false; *size += opt_size; remaining -= opt_size; if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; } else if (mptcp_established_options_rm_addr(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; } if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; } return ret; } bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; opts->csum_reqd = subflow_req->csum_reqd; opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu\n", subflow_req, subflow_req->local_key); return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u\n", subflow_req, opts->backup, opts->join_id, opts->thmac, opts->nonce); *size = TCPOLEN_MPTCP_MPJ_SYNACK; return true; } return false; } static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_options_received *mp_opt) { /* here we can process OoO, in-window pkts, only in-sequence 4th ack * will make the subflow fully established */ if (likely(subflow->fully_established)) { /* on passive sockets, check for 3rd ack retransmission * note that msk is always set by subflow_syn_recv_sock() * for mp_join subflows */ if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && !subflow->request_join) tcp_send_ack(ssk); goto check_notify; } /* we must process OoO packets before the first subflow is fully * established. OoO packets are instead a protocol violation * for MP_JOIN subflows as the peer must not send any data * before receiving the forth ack - cfr. RFC 8684 section 3.2. */ if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) { if (subflow->mp_join) goto reset; if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) goto set_fully_established; return subflow->mp_capable; } if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ goto set_fully_established; } /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP. Fallback scenarios requires a reset for * MP_JOIN subflows. */ if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) { if (subflow->mp_join) goto reset; subflow->mp_capable = 0; pr_fallback(msk); mptcp_do_fallback(ssk); return false; } if (mp_opt->deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); set_fully_established: mptcp_data_lock((struct sock *)msk); __mptcp_subflow_fully_established(msk, subflow, mp_opt); mptcp_data_unlock((struct sock *)msk); check_notify: /* if the subflow is not already linked into the conn_list, we can't * notify the PM: this subflow is still on the listener queue * and the PM possibly acquiring the subflow lock could race with * the listener close */ if (likely(subflow->pm_notified) || list_empty(&subflow->node)) return true; subflow->pm_notified = 1; if (subflow->mp_join) { clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk); } else { mptcp_pm_fully_established(msk, ssk); } return true; reset: mptcp_subflow_reset(ssk); return false; } u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) { u32 old_seq32, cur_seq32; old_seq32 = (u32)old_seq; cur_seq32 = (u32)cur_seq; cur_seq = (old_seq & GENMASK_ULL(63, 32)) + cur_seq32; if (unlikely(cur_seq32 < old_seq32 && before(old_seq32, cur_seq32))) return cur_seq + (1LL << 32); /* reverse wrap could happen, too */ if (unlikely(cur_seq32 > old_seq32 && after(old_seq32, cur_seq32))) return cur_seq - (1LL << 32); return cur_seq; } static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) { msk->bytes_acked += new_snd_una - msk->snd_una; WRITE_ONCE(msk->snd_una, new_snd_una); } static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) { u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt); struct sock *sk = (struct sock *)msk; u64 old_snd_una; mptcp_data_lock(sk); /* avoid ack expansion on update conflict, to reduce the risk of * wrongly expanding to a future ack sequence number, which is way * more dangerous than missing an ack */ old_snd_una = msk->snd_una; new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); /* ACK for data not even sent yet? Ignore.*/ if (unlikely(after64(new_snd_una, snd_nxt))) new_snd_una = old_snd_una; new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; if (after64(new_wnd_end, msk->wnd_end)) WRITE_ONCE(msk->wnd_end, new_wnd_end); /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ if (after64(msk->wnd_end, snd_nxt)) __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } msk->last_ack_recv = tcp_jiffies32; mptcp_data_unlock(sk); trace_ack_update_msk(mp_opt->data_ack, old_snd_una, new_snd_una, new_wnd_end, READ_ONCE(msk->wnd_end)); } bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) { /* Skip if DATA_FIN was already received. * If updating simultaneously with the recvmsg loop, values * should match. If they mismatch, the peer is misbehaving and * we will prefer the most recent information. */ if (READ_ONCE(msk->rcv_data_fin)) return false; WRITE_ONCE(msk->rcv_data_fin_seq, mptcp_expand_seq(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); WRITE_ONCE(msk->rcv_data_fin, 1); return true; } static bool add_addr_hmac_valid(struct mptcp_sock *msk, struct mptcp_options_received *mp_opt) { u64 hmac = 0; if (mp_opt->echo) return true; hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), &mp_opt->addr); pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", msk, hmac, mp_opt->ahmac); return hmac == mp_opt->ahmac; } /* Return false if a subflow has been reset, else return true */ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; if (__mptcp_check_fallback(msk)) { /* Keep it simple and unconditionally trigger send data cleanup and * pending queue spooling. We will need to acquire the data lock * for more accurate checks, and once the lock is acquired, such * helpers are cheap. */ mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk)) __mptcp_check_push(subflow->conn, sk); /* on fallback we just need to ignore the msk-level snd_una, as * this is really plain TCP */ __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return true; } mptcp_get_options(skb, &mp_opt); /* The subflow can be in close state only if check_fully_established() * just sent a reset. If so, tell the caller to ignore the current packet. */ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return sk->sk_state != TCP_CLOSE; if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && READ_ONCE(msk->local_key) == mp_opt.rcvr_key) { WRITE_ONCE(msk->rcv_fastclose, true); mptcp_schedule_work((struct sock *)msk); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX); } if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { if (!mp_opt.echo) { mptcp_pm_add_addr_received(sk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } if (mp_opt.addr.port) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); } if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR) mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); if (mp_opt.suboptions & OPTION_MPTCP_PRIO) { mptcp_pm_mp_prio_received(sk, mp_opt.backup); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); } if (mp_opt.suboptions & OPTION_MPTCP_FAIL) { mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); } if (mp_opt.suboptions & OPTION_MPTCP_RST) { subflow->reset_seen = 1; subflow->reset_reason = mp_opt.reset_reason; subflow->reset_transient = mp_opt.reset_transient; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTRX); } if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) return true; } /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ if (mp_opt.use_ack) ack_update_msk(msk, sk, &mp_opt); /* Zero-data-length packets are dropped by the caller and not * propagated to the MPTCP layer, so the skb extension does not * need to be allocated or populated. DATA_FIN information, if * present, needs to be updated here before the skb is freed. */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64)) mptcp_schedule_work((struct sock *)msk); return true; } mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) return true; memset(mpext, 0, sizeof(*mpext)); if (likely(mp_opt.use_map)) { if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data */ mptcp_crypto_key_sha(subflow->remote_key, NULL, &mpext->data_seq); mpext->data_seq++; mpext->subflow_seq = 1; mpext->dsn64 = 1; mpext->mpc_map = 1; mpext->data_fin = 0; } else { mpext->data_seq = mp_opt.data_seq; mpext->subflow_seq = mp_opt.subflow_seq; mpext->dsn64 = mp_opt.dsn64; mpext->data_fin = mp_opt.data_fin; } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD); if (mpext->csum_reqd) mpext->csum = mp_opt.csum; } return true; } static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) { const struct sock *ssk = (const struct sock *)tp; struct mptcp_subflow_context *subflow; u64 ack_seq, rcv_wnd_old, rcv_wnd_new; struct mptcp_sock *msk; u32 new_win; u64 win; subflow = mptcp_subflow_ctx(ssk); msk = mptcp_sk(subflow->conn); ack_seq = READ_ONCE(msk->ack_seq); rcv_wnd_new = ack_seq + tp->rcv_wnd; rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent); if (after64(rcv_wnd_new, rcv_wnd_old)) { u64 rcv_wnd; for (;;) { rcv_wnd = atomic64_cmpxchg(&msk->rcv_wnd_sent, rcv_wnd_old, rcv_wnd_new); if (rcv_wnd == rcv_wnd_old) break; rcv_wnd_old = rcv_wnd; if (before64(rcv_wnd_new, rcv_wnd_old)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE); goto raise_win; } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); } return; } if (rcv_wnd_new != rcv_wnd_old) { raise_win: win = rcv_wnd_old - ack_seq; tp->rcv_wnd = min_t(u64, win, U32_MAX); new_win = tp->rcv_wnd; /* Make sure we do not exceed the maximum possible * scaled window. */ if (unlikely(th->syn)) new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale; if (!tp->rx_opt.rcv_wscale && READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; th->window = htons(new_win); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); } } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) { struct csum_pseudo_header header; __wsum csum; /* cfr RFC 8684 3.3.1.: * the data sequence number used in the pseudo-header is * always the 64-bit value, irrespective of what length is used in the * DSS option itself. */ header.data_seq = cpu_to_be64(data_seq); header.subflow_seq = htonl(subflow_seq); header.data_len = htons(data_len); header.csum = 0; csum = csum_partial(&header, sizeof(header), sum); return csum_fold(csum); } static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext) { return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, ~csum_unfold(mpext->csum)); } static void put_len_csum(u16 len, __sum16 csum, void *data) { __sum16 *sumptr = data + 2; __be16 *ptr = data; put_unaligned_be16(len, ptr); put_unaligned(csum, sumptr); } void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, struct mptcp_out_options *opts) { const struct sock *ssk = (const struct sock *)tp; struct mptcp_subflow_context *subflow; /* Which options can be used together? * * X: mutually exclusive * O: often used together * C: can be used together in some cases * P: could be used together but we prefer not to (optimisations) * * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC | * ------|------|------|------|------|------|------|------|------| * MPC |------|------|------|------|------|------|------|------| * MPJ | X |------|------|------|------|------|------|------| * DSS | X | X |------|------|------|------|------|------| * ADD | X | X | P |------|------|------|------|------| * RM | C | C | C | P |------|------|------|------| * PRIO | X | C | C | C | C |------|------|------| * FAIL | X | X | C | X | X | X |------|------| * FC | X | X | X | X | X | X | X |------| * RST | X | X | X | X | X | X | O | O | * ------|------|------|------|------|------|------|------|------| * * The same applies in mptcp_established_options() function. */ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { struct mptcp_ext *mpext = &opts->ext_copy; u8 len = TCPOLEN_MPTCP_DSS_BASE; u8 flags = 0; if (mpext->use_ack) { flags = MPTCP_DSS_HAS_ACK; if (mpext->ack64) { len += TCPOLEN_MPTCP_DSS_ACK64; flags |= MPTCP_DSS_ACK64; } else { len += TCPOLEN_MPTCP_DSS_ACK32; } } if (mpext->use_map) { len += TCPOLEN_MPTCP_DSS_MAP64; /* Use only 64-bit mapping flags for now, add * support for optional 32-bit mappings later. */ flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin) flags |= MPTCP_DSS_DATA_FIN; if (opts->csum_reqd) len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); if (mpext->use_ack) { if (mpext->ack64) { put_unaligned_be64(mpext->data_ack, ptr); ptr += 2; } else { put_unaligned_be32(mpext->data_ack32, ptr); ptr += 1; } } if (mpext->use_map) { put_unaligned_be64(mpext->data_seq, ptr); ptr += 2; put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; if (opts->csum_reqd) { /* data_len == 0 is reserved for the infinite mapping, * the checksum will also be set to 0. */ put_len_csum(mpext->data_len, (mpext->data_len ? mptcp_make_csum(mpext) : 0), ptr); } else { put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; } /* We might need to add MP_FAIL options in rare cases */ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) goto mp_fail; } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYN; } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; } else if (opts->data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) len += TCPOLEN_MPTCP_DSS_CHECKSUM; } else { len = TCPOLEN_MPTCP_MPC_ACK; } if (opts->csum_reqd) flag |= MPTCP_CAP_CHECKSUM_REQD; if (!opts->allow_join_id0) flag |= MPTCP_CAP_DENY_JOIN_ID0; *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, flag); if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions)) goto mp_capable_done; put_unaligned_be64(opts->sndr_key, ptr); ptr += 2; if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions)) goto mp_capable_done; put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; if (!opts->data_len) goto mp_capable_done; if (opts->csum_reqd) { put_len_csum(opts->data_len, __mptcp_make_csum(opts->data_seq, opts->subflow_seq, opts->data_len, ~csum_unfold(opts->csum)), ptr); } else { put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; /* MPC is additionally mutually exclusive with MP_PRIO */ goto mp_capable_done; } else if (OPTIONS_MPTCP_MPJ & opts->suboptions) { if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_SYN, opts->backup, opts->join_id); put_unaligned_be32(opts->token, ptr); ptr += 1; put_unaligned_be32(opts->nonce, ptr); ptr += 1; } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_SYNACK, opts->backup, opts->join_id); put_unaligned_be64(opts->thmac, ptr); ptr += 2; put_unaligned_be32(opts->nonce, ptr); ptr += 1; } else { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_ACK, 0, 0); memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN); ptr += 5; } } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 echo = MPTCP_ADDR_ECHO; #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (opts->addr.family == AF_INET6) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; #endif if (opts->addr.port) len += TCPOLEN_MPTCP_PORT_LEN; if (opts->ahmac) { len += sizeof(opts->ahmac); echo = 0; } *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, len, echo, opts->addr.id); if (opts->addr.family == AF_INET) { memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4); ptr += 1; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opts->addr.family == AF_INET6) { memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16); ptr += 4; } #endif if (!opts->addr.port) { if (opts->ahmac) { put_unaligned_be64(opts->ahmac, ptr); ptr += 2; } } else { u16 port = ntohs(opts->addr.port); if (opts->ahmac) { u8 *bptr = (u8 *)ptr; put_unaligned_be16(port, bptr); bptr += 2; put_unaligned_be64(opts->ahmac, bptr); bptr += 8; put_unaligned_be16(TCPOPT_NOP << 8 | TCPOPT_NOP, bptr); ptr += 3; } else { put_unaligned_be32(port << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); ptr += 1; } } } else if (unlikely(OPTION_MPTCP_FASTCLOSE & opts->suboptions)) { /* FASTCLOSE is mutually exclusive with others except RST */ *ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE, TCPOLEN_MPTCP_FASTCLOSE, 0, 0); put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; if (OPTION_MPTCP_RST & opts->suboptions) goto mp_rst; return; } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { mp_fail: /* MP_FAIL is mutually exclusive with others except RST */ subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_fail = 0; *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, TCPOLEN_MPTCP_FAIL, 0, 0); put_unaligned_be64(opts->fail_seq, ptr); ptr += 2; if (OPTION_MPTCP_RST & opts->suboptions) goto mp_rst; return; } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { mp_rst: *ptr++ = mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST, opts->reset_transient, opts->reset_reason); return; } if (OPTION_MPTCP_PRIO & opts->suboptions) { subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_prio = 0; *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO, TCPOLEN_MPTCP_PRIO, opts->backup, TCPOPT_NOP); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPPRIOTX); } mp_capable_done: if (OPTION_MPTCP_RM_ADDR & opts->suboptions) { u8 i = 1; *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR, TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr, 0, opts->rm_list.ids[0]); while (i < opts->rm_list.nr) { u8 id1, id2, id3, id4; id1 = opts->rm_list.ids[i]; id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP; id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP; id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP; put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr); ptr += 1; i += 4; } } if (tp) mptcp_set_rwin(tp, th); } __be32 mptcp_get_reset_option(const struct sk_buff *skb) { const struct mptcp_ext *ext = mptcp_get_ext(skb); u8 flags, reason; if (ext) { flags = ext->reset_transient; reason = ext->reset_reason; return mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST, flags, reason); } return htonl(0u); } EXPORT_SYMBOL_GPL(mptcp_get_reset_option); |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 | // SPDX-License-Identifier: GPL-2.0-only /* * Architecture specific (i386/x86_64) functions for kexec based crash dumps. * * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) * * Copyright (C) IBM Corporation, 2004. All rights reserved. * Copyright (C) Red Hat Inc., 2014. All rights reserved. * Authors: * Vivek Goyal <vgoyal@redhat.com> * */ #define pr_fmt(fmt) "kexec: " fmt #include <linux/types.h> #include <linux/kernel.h> #include <linux/smp.h> #include <linux/reboot.h> #include <linux/kexec.h> #include <linux/delay.h> #include <linux/elf.h> #include <linux/elfcore.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/memblock.h> #include <asm/bootparam.h> #include <asm/processor.h> #include <asm/hardirq.h> #include <asm/nmi.h> #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/e820/types.h> #include <asm/io_apic.h> #include <asm/hpet.h> #include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/reboot.h> #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> #include <asm/sev.h> /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data { struct boot_params *params; /* Type of memory */ unsigned int type; }; #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { crash_save_cpu(regs, cpu); /* * Disable Intel PT to stop its logging */ cpu_emergency_stop_pt(); kdump_sev_callback(); disable_local_APIC(); } void kdump_nmi_shootdown_cpus(void) { nmi_shootdown_cpus(kdump_nmi_callback); disable_local_APIC(); } /* Override the weak function in kernel/panic.c */ void crash_smp_send_stop(void) { static int cpus_stopped; if (cpus_stopped) return; if (smp_ops.crash_stop_other_cpus) smp_ops.crash_stop_other_cpus(); else smp_send_stop(); cpus_stopped = 1; } #else void crash_smp_send_stop(void) { /* There are no cpus to shootdown */ } #endif void native_machine_crash_shutdown(struct pt_regs *regs) { /* This function is only called after the system * has panicked or is otherwise in a critical state. * The minimum amount of code to allow a kexec'd kernel * to run successfully needs to happen here. * * In practice this means shooting down the other cpus in * an SMP system. */ /* The kernel is broken so disable interrupts */ local_irq_disable(); crash_smp_send_stop(); cpu_emergency_disable_virtualization(); /* * Disable Intel PT to stop its logging */ cpu_emergency_stop_pt(); #ifdef CONFIG_X86_IO_APIC /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ ioapic_zap_locks(); clear_IO_APIC(); #endif lapic_shutdown(); restore_boot_irq_mode(); #ifdef CONFIG_HPET_TIMER hpet_disable(); #endif /* * Non-crash kexec calls enc_kexec_begin() while scheduling is still * active. This allows the callback to wait until all in-flight * shared<->private conversions are complete. In a crash scenario, * enc_kexec_begin() gets called after all but one CPU have been shut * down and interrupts have been disabled. This allows the callback to * detect a race with the conversion and report it. */ x86_platform.guest.enc_kexec_begin(); x86_platform.guest.enc_kexec_finish(); crash_save_cpu(regs, safe_smp_processor_id()); } #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG) static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; } /* Gather all the required information to prepare elf headers for ram regions */ static struct crash_mem *fill_up_crash_elf_data(void) { unsigned int nr_ranges = 0; struct crash_mem *cmem; walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); if (!nr_ranges) return NULL; /* * Exclusion of crash region and/or crashk_low_res may cause * another range split. So add extra two slots here. */ nr_ranges += 2; cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return NULL; cmem->max_nr_ranges = nr_ranges; cmem->nr_ranges = 0; return cmem; } /* * Look for any unwanted ranges between mstart, mend and remove them. This * might lead to split and split ranges are put in cmem->ranges[] array */ static int elf_header_exclude_ranges(struct crash_mem *cmem) { int ret = 0; /* Exclude the low 1M because it is always reserved */ ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); if (ret) return ret; /* Exclude crashkernel region */ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); if (ret) return ret; if (crashk_low_res.end) ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); return ret; } static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; cmem->ranges[cmem->nr_ranges].start = res->start; cmem->ranges[cmem->nr_ranges].end = res->end; cmem->nr_ranges++; return 0; } /* Prepare elf headers. Return addr and size */ static int prepare_elf_headers(void **addr, unsigned long *sz, unsigned long *nr_mem_ranges) { struct crash_mem *cmem; int ret; cmem = fill_up_crash_elf_data(); if (!cmem) return -ENOMEM; ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); if (ret) goto out; /* Exclude unwanted mem ranges */ ret = elf_header_exclude_ranges(cmem); if (ret) goto out; /* Return the computed number of memory ranges, for hotplug usage */ *nr_mem_ranges = cmem->nr_ranges; /* By default prepare 64bit headers */ ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); out: vfree(cmem); return ret; } #endif #ifdef CONFIG_KEXEC_FILE static int add_e820_entry(struct boot_params *params, struct e820_entry *entry) { unsigned int nr_e820_entries; nr_e820_entries = params->e820_entries; if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE) return 1; memcpy(¶ms->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry)); params->e820_entries++; return 0; } static int memmap_entry_callback(struct resource *res, void *arg) { struct crash_memmap_data *cmd = arg; struct boot_params *params = cmd->params; struct e820_entry ei; ei.addr = res->start; ei.size = resource_size(res); ei.type = cmd->type; add_e820_entry(params, &ei); return 0; } static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, unsigned long long mstart, unsigned long long mend) { unsigned long start, end; cmem->ranges[0].start = mstart; cmem->ranges[0].end = mend; cmem->nr_ranges = 1; /* Exclude elf header region */ start = image->elf_load_addr; end = start + image->elf_headers_sz - 1; return crash_exclude_mem_range(cmem, start, end); } /* Prepare memory map for crash dump kernel */ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) { int i, ret = 0; unsigned long flags; struct e820_entry ei; struct crash_memmap_data cmd; struct crash_mem *cmem; cmem = vzalloc(struct_size(cmem, ranges, 1)); if (!cmem) return -ENOMEM; memset(&cmd, 0, sizeof(struct crash_memmap_data)); cmd.params = params; /* Add the low 1M */ cmd.type = E820_TYPE_RAM; flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd, memmap_entry_callback); /* Add ACPI tables */ cmd.type = E820_TYPE_ACPI; flags = IORESOURCE_MEM | IORESOURCE_BUSY; walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, memmap_entry_callback); /* Add ACPI Non-volatile Storage */ cmd.type = E820_TYPE_NVS; walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, memmap_entry_callback); /* Add e820 reserved ranges */ cmd.type = E820_TYPE_RESERVED; flags = IORESOURCE_MEM; walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd, memmap_entry_callback); /* Add crashk_low_res region */ if (crashk_low_res.end) { ei.addr = crashk_low_res.start; ei.size = resource_size(&crashk_low_res); ei.type = E820_TYPE_RAM; add_e820_entry(params, &ei); } /* Exclude some ranges from crashk_res and add rest to memmap */ ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end); if (ret) goto out; for (i = 0; i < cmem->nr_ranges; i++) { ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1; /* If entry is less than a page, skip it */ if (ei.size < PAGE_SIZE) continue; ei.addr = cmem->ranges[i].start; ei.type = E820_TYPE_RAM; add_e820_entry(params, &ei); } out: vfree(cmem); return ret; } int crash_load_segments(struct kimage *image) { int ret; unsigned long pnum = 0; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ULONG_MAX, .top_down = false }; /* Prepare elf headers and add a segment */ ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum); if (ret) return ret; image->elf_headers = kbuf.buffer; image->elf_headers_sz = kbuf.bufsz; kbuf.memsz = kbuf.bufsz; #ifdef CONFIG_CRASH_HOTPLUG /* * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map, * maximum CPUs and maximum memory ranges. */ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) pnum = 2 + CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES; else pnum += 2 + CONFIG_NR_CPUS_DEFAULT; if (pnum < (unsigned long)PN_XNUM) { kbuf.memsz = pnum * sizeof(Elf64_Phdr); kbuf.memsz += sizeof(Elf64_Ehdr); image->elfcorehdr_index = image->nr_segments; /* Mark as usable to crash kernel, else crash kernel fails on boot */ image->elf_headers_sz = kbuf.memsz; } else { pr_err("number of Phdrs %lu exceeds max\n", pnum); } #endif kbuf.buf_align = ELF_CORE_HEADER_ALIGN; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) return ret; image->elf_load_addr = kbuf.mem; kexec_dprintk("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); return ret; } #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_HOTPLUG #undef pr_fmt #define pr_fmt(fmt) "crash hp: " fmt int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) { #ifdef CONFIG_KEXEC_FILE if (image->file_mode) return 1; #endif /* * Initially, crash hotplug support for kexec_load was added * with the KEXEC_UPDATE_ELFCOREHDR flag. Later, this * functionality was expanded to accommodate multiple kexec * segment updates, leading to the introduction of the * KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag bit. Consequently, * when the kexec tool sends either of these flags, it indicates * that the required kexec segment (elfcorehdr) is excluded from * the SHA calculation. */ return (kexec_flags & KEXEC_UPDATE_ELFCOREHDR || kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT); } unsigned int arch_crash_get_elfcorehdr_size(void) { unsigned int sz; /* kernel_map, VMCOREINFO and maximum CPUs */ sz = 2 + CONFIG_NR_CPUS_DEFAULT; if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) sz += CONFIG_CRASH_MAX_MEMORY_RANGES; sz *= sizeof(Elf64_Phdr); return sz; } /** * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes * @image: a pointer to kexec_crash_image * @arg: struct memory_notify handler for memory hotplug case and * NULL for CPU hotplug case. * * Prepare the new elfcorehdr and replace the existing elfcorehdr. */ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { void *elfbuf = NULL, *old_elfcorehdr; unsigned long nr_mem_ranges; unsigned long mem, memsz; unsigned long elfsz = 0; /* * As crash_prepare_elf64_headers() has already described all * possible CPUs, there is no need to update the elfcorehdr * for additional CPU changes. */ if ((image->file_mode || image->elfcorehdr_updated) && ((image->hp_action == KEXEC_CRASH_HP_ADD_CPU) || (image->hp_action == KEXEC_CRASH_HP_REMOVE_CPU))) return; /* * Create the new elfcorehdr reflecting the changes to CPU and/or * memory resources. */ if (prepare_elf_headers(&elfbuf, &elfsz, &nr_mem_ranges)) { pr_err("unable to create new elfcorehdr"); goto out; } /* * Obtain address and size of the elfcorehdr segment, and * check it against the new elfcorehdr buffer. */ mem = image->segment[image->elfcorehdr_index].mem; memsz = image->segment[image->elfcorehdr_index].memsz; if (elfsz > memsz) { pr_err("update elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz); goto out; } /* * Copy new elfcorehdr over the old elfcorehdr at destination. */ old_elfcorehdr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); if (!old_elfcorehdr) { pr_err("mapping elfcorehdr segment failed\n"); goto out; } /* * Temporarily invalidate the crash image while the * elfcorehdr is updated. */ xchg(&kexec_crash_image, NULL); memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz); xchg(&kexec_crash_image, image); kunmap_local(old_elfcorehdr); pr_debug("updated elfcorehdr\n"); out: vfree(elfbuf); } #endif |
2 1 2 1229 7 7 6 2 2 1 2 1 1 5 1 3 1 3 1 3 3 3 3 110 7 110 109 1 110 668 650 651 2 2 6 8 8 8 33 33 33 3 30 1228 1230 1228 1229 1226 1226 1 1229 1227 1227 8 8 8 9 1230 1229 1229 862 774 92 866 863 865 862 866 848 77 1198 1195 1197 1192 1198 5 5 5 5 459 651 459 651 716 649 33 3 3 1 1 1 1 1 1 1 1 1 1 1249 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 | // SPDX-License-Identifier: GPL-2.0 /* * inode.c - part of debugfs, a tiny little debug file system * * Copyright (C) 2004,2019 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. * Copyright (C) 2019 Linux Foundation <gregkh@linuxfoundation.org> * * debugfs is for people to use instead of /proc or /sys. * See ./Documentation/core-api/kernel-api.rst for more details. */ #define pr_fmt(fmt) "debugfs: " fmt #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/kobject.h> #include <linux/namei.h> #include <linux/debugfs.h> #include <linux/fsnotify.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/security.h> #include "internal.h" #define DEBUGFS_DEFAULT_MODE 0700 static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS; /* * Don't allow access attributes to be changed whilst the kernel is locked down * so that we can use the file mode as part of a heuristic to determine whether * to lock down individual files. */ static int debugfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *ia) { int ret; if (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) { ret = security_locked_down(LOCKDOWN_DEBUGFS); if (ret) return ret; } return simple_setattr(&nop_mnt_idmap, dentry, ia); } static const struct inode_operations debugfs_file_inode_operations = { .setattr = debugfs_setattr, }; static const struct inode_operations debugfs_dir_inode_operations = { .lookup = simple_lookup, .setattr = debugfs_setattr, }; static const struct inode_operations debugfs_symlink_inode_operations = { .get_link = simple_get_link, .setattr = debugfs_setattr, }; static struct inode *debugfs_get_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); } return inode; } struct debugfs_fs_info { kuid_t uid; kgid_t gid; umode_t mode; /* Opt_* bitfield. */ unsigned int opts; }; enum { Opt_uid, Opt_gid, Opt_mode, Opt_source, }; static const struct fs_parameter_spec debugfs_param_specs[] = { fsparam_gid ("gid", Opt_gid), fsparam_u32oct ("mode", Opt_mode), fsparam_uid ("uid", Opt_uid), fsparam_string ("source", Opt_source), {} }; static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct debugfs_fs_info *opts = fc->s_fs_info; struct fs_parse_result result; int opt; opt = fs_parse(fc, debugfs_param_specs, param, &result); if (opt < 0) { /* * We might like to report bad mount options here; but * traditionally debugfs has ignored all mount options */ if (opt == -ENOPARAM) return 0; return opt; } switch (opt) { case Opt_uid: opts->uid = result.uid; break; case Opt_gid: opts->gid = result.gid; break; case Opt_mode: opts->mode = result.uint_32 & S_IALLUGO; break; case Opt_source: if (fc->source) return invalfc(fc, "Multiple sources specified"); fc->source = param->string; param->string = NULL; break; /* * We might like to report bad mount options here; * but traditionally debugfs has ignored all mount options */ } opts->opts |= BIT(opt); return 0; } static void _debugfs_apply_options(struct super_block *sb, bool remount) { struct debugfs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); /* * On remount, only reset mode/uid/gid if they were provided as mount * options. */ if (!remount || fsi->opts & BIT(Opt_mode)) { inode->i_mode &= ~S_IALLUGO; inode->i_mode |= fsi->mode; } if (!remount || fsi->opts & BIT(Opt_uid)) inode->i_uid = fsi->uid; if (!remount || fsi->opts & BIT(Opt_gid)) inode->i_gid = fsi->gid; } static void debugfs_apply_options(struct super_block *sb) { _debugfs_apply_options(sb, false); } static void debugfs_apply_options_remount(struct super_block *sb) { _debugfs_apply_options(sb, true); } static int debugfs_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct debugfs_fs_info *sb_opts = sb->s_fs_info; struct debugfs_fs_info *new_opts = fc->s_fs_info; sync_filesystem(sb); /* structure copy of new mount options to sb */ *sb_opts = *new_opts; debugfs_apply_options_remount(sb); return 0; } static int debugfs_show_options(struct seq_file *m, struct dentry *root) { struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; if (!uid_eq(fsi->uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, fsi->uid)); if (!gid_eq(fsi->gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, fsi->gid)); if (fsi->mode != DEBUGFS_DEFAULT_MODE) seq_printf(m, ",mode=%o", fsi->mode); return 0; } static void debugfs_free_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); free_inode_nonrcu(inode); } static const struct super_operations debugfs_super_operations = { .statfs = simple_statfs, .show_options = debugfs_show_options, .free_inode = debugfs_free_inode, }; static void debugfs_release_dentry(struct dentry *dentry) { struct debugfs_fsdata *fsd = dentry->d_fsdata; if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; /* check it wasn't a dir (no fsdata) or automount (no real_fops) */ if (fsd && fsd->real_fops) { WARN_ON(!list_empty(&fsd->cancellations)); mutex_destroy(&fsd->cancellations_mtx); } kfree(fsd); } static struct vfsmount *debugfs_automount(struct path *path) { struct debugfs_fsdata *fsd = path->dentry->d_fsdata; return fsd->automount(path->dentry, d_inode(path->dentry)->i_private); } static const struct dentry_operations debugfs_dops = { .d_delete = always_delete_dentry, .d_release = debugfs_release_dentry, .d_automount = debugfs_automount, }; static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr debug_files[] = {{""}}; int err; err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); if (err) return err; sb->s_op = &debugfs_super_operations; sb->s_d_op = &debugfs_dops; debugfs_apply_options(sb); return 0; } static int debugfs_get_tree(struct fs_context *fc) { if (!(debugfs_allow & DEBUGFS_ALLOW_API)) return -EPERM; return get_tree_single(fc, debugfs_fill_super); } static void debugfs_free_fc(struct fs_context *fc) { kfree(fc->s_fs_info); } static const struct fs_context_operations debugfs_context_ops = { .free = debugfs_free_fc, .parse_param = debugfs_parse_param, .get_tree = debugfs_get_tree, .reconfigure = debugfs_reconfigure, }; static int debugfs_init_fs_context(struct fs_context *fc) { struct debugfs_fs_info *fsi; fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL); if (!fsi) return -ENOMEM; fsi->mode = DEBUGFS_DEFAULT_MODE; fc->s_fs_info = fsi; fc->ops = &debugfs_context_ops; return 0; } static struct file_system_type debug_fs_type = { .owner = THIS_MODULE, .name = "debugfs", .init_fs_context = debugfs_init_fs_context, .parameters = debugfs_param_specs, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("debugfs"); /** * debugfs_lookup() - look up an existing debugfs file * @name: a pointer to a string containing the name of the file to look up. * @parent: a pointer to the parent dentry of the file. * * This function will return a pointer to a dentry if it succeeds. If the file * doesn't exist or an error occurs, %NULL will be returned. The returned * dentry must be passed to dput() when it is no longer needed. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. */ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) { struct dentry *dentry; if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent)) return NULL; if (!parent) parent = debugfs_mount->mnt_root; dentry = lookup_positive_unlocked(name, parent, strlen(name)); if (IS_ERR(dentry)) return NULL; return dentry; } EXPORT_SYMBOL_GPL(debugfs_lookup); static struct dentry *start_creating(const char *name, struct dentry *parent) { struct dentry *dentry; int error; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) return ERR_PTR(-EPERM); if (!debugfs_initialized()) return ERR_PTR(-ENOENT); pr_debug("creating file '%s'\n", name); if (IS_ERR(parent)) return parent; error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); if (error) { pr_err("Unable to pin filesystem for file '%s'\n", name); return ERR_PTR(error); } /* If the parent is not specified, we create it in the root. * We need the root dentry to do this, which is in the super * block. A pointer to that is in the struct vfsmount that we * have around. */ if (!parent) parent = debugfs_mount->mnt_root; inode_lock(d_inode(parent)); if (unlikely(IS_DEADDIR(d_inode(parent)))) dentry = ERR_PTR(-ENOENT); else dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry) && d_really_is_positive(dentry)) { if (d_is_dir(dentry)) pr_err("Directory '%s' with parent '%s' already present!\n", name, parent->d_name.name); else pr_err("File '%s' in directory '%s' already present!\n", name, parent->d_name.name); dput(dentry); dentry = ERR_PTR(-EEXIST); } if (IS_ERR(dentry)) { inode_unlock(d_inode(parent)); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } return dentry; } static struct dentry *failed_creating(struct dentry *dentry) { inode_unlock(d_inode(dentry->d_parent)); dput(dentry); simple_release_fs(&debugfs_mount, &debugfs_mount_count); return ERR_PTR(-ENOMEM); } static struct dentry *end_creating(struct dentry *dentry) { inode_unlock(d_inode(dentry->d_parent)); return dentry; } static struct dentry *__debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *proxy_fops, const struct file_operations *real_fops) { struct dentry *dentry; struct inode *inode; if (!(mode & S_IFMT)) mode |= S_IFREG; BUG_ON(!S_ISREG(mode)); dentry = start_creating(name, parent); if (IS_ERR(dentry)) return dentry; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); return ERR_PTR(-EPERM); } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create file '%s'\n", name); return failed_creating(dentry); } inode->i_mode = mode; inode->i_private = data; inode->i_op = &debugfs_file_inode_operations; inode->i_fop = proxy_fops; dentry->d_fsdata = (void *)((unsigned long)real_fops | DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); d_instantiate(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } /** * debugfs_create_file - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * file will be created in the root of the debugfs filesystem. * @data: a pointer to something that the caller will want to get to later * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. * * This is the basic "create a file" function for debugfs. It allows for a * wide range of flexibility in creating a file, or a directory (if you want * to create a directory, the debugfs_create_dir() function is * recommended to be used instead.) * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. * * NOTE: it's expected that most callers should _ignore_ the errors returned * by this function. Other debugfs functions handle the fact that the "dentry" * passed to them could be an error and they don't crash in that case. * Drivers should generally work fine even if debugfs fails to init anyway. */ struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { return __debugfs_create_file(name, mode, parent, data, fops ? &debugfs_full_proxy_file_operations : &debugfs_noop_file_operations, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file); /** * debugfs_create_file_unsafe - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * file will be created in the root of the debugfs filesystem. * @data: a pointer to something that the caller will want to get to later * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. * * debugfs_create_file_unsafe() is completely analogous to * debugfs_create_file(), the only difference being that the fops * handed it will not get protected against file removals by the * debugfs core. * * It is your responsibility to protect your struct file_operation * methods against file removals by means of debugfs_file_get() * and debugfs_file_put(). ->open() is still protected by * debugfs though. * * Any struct file_operations defined by means of * DEFINE_DEBUGFS_ATTRIBUTE() is protected against file removals and * thus, may be used here. */ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { return __debugfs_create_file(name, mode, parent, data, fops ? &debugfs_open_proxy_file_operations : &debugfs_noop_file_operations, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); /** * debugfs_create_file_size - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * file will be created in the root of the debugfs filesystem. * @data: a pointer to something that the caller will want to get to later * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. * @file_size: initial file size * * This is the basic "create a file" function for debugfs. It allows for a * wide range of flexibility in creating a file, or a directory (if you want * to create a directory, the debugfs_create_dir() function is * recommended to be used instead.) */ void debugfs_create_file_size(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops, loff_t file_size) { struct dentry *de = debugfs_create_file(name, mode, parent, data, fops); if (!IS_ERR(de)) d_inode(de)->i_size = file_size; } EXPORT_SYMBOL_GPL(debugfs_create_file_size); /** * debugfs_create_dir - create a directory in the debugfs filesystem * @name: a pointer to a string containing the name of the directory to * create. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * directory will be created in the root of the debugfs filesystem. * * This function creates a directory in debugfs with the given name. * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. * * NOTE: it's expected that most callers should _ignore_ the errors returned * by this function. Other debugfs functions handle the fact that the "dentry" * passed to them could be an error and they don't crash in that case. * Drivers should generally work fine even if debugfs fails to init anyway. */ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) { struct dentry *dentry = start_creating(name, parent); struct inode *inode; if (IS_ERR(dentry)) return dentry; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); return ERR_PTR(-EPERM); } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create directory '%s'\n", name); return failed_creating(dentry); } inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = &debugfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_dir); /** * debugfs_create_automount - create automount point in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * file will be created in the root of the debugfs filesystem. * @f: function to be called when pathname resolution steps on that one. * @data: opaque argument to pass to f(). * * @f should return what ->d_automount() would. */ struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, debugfs_automount_t f, void *data) { struct dentry *dentry = start_creating(name, parent); struct debugfs_fsdata *fsd; struct inode *inode; if (IS_ERR(dentry)) return dentry; fsd = kzalloc(sizeof(*fsd), GFP_KERNEL); if (!fsd) { failed_creating(dentry); return ERR_PTR(-ENOMEM); } fsd->automount = f; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); kfree(fsd); return ERR_PTR(-EPERM); } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", name); kfree(fsd); return failed_creating(dentry); } make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; dentry->d_fsdata = fsd; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } EXPORT_SYMBOL(debugfs_create_automount); /** * debugfs_create_symlink- create a symbolic link in the debugfs filesystem * @name: a pointer to a string containing the name of the symbolic link to * create. * @parent: a pointer to the parent dentry for this symbolic link. This * should be a directory dentry if set. If this parameter is NULL, * then the symbolic link will be created in the root of the debugfs * filesystem. * @target: a pointer to a string containing the path to the target of the * symbolic link. * * This function creates a symbolic link with the given name in debugfs that * links to the given target path. * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the symbolic * link is to be removed (no automatic cleanup happens if your module is * unloaded, you are responsible here.) If an error occurs, ERR_PTR(-ERROR) * will be returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. */ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *target) { struct dentry *dentry; struct inode *inode; char *link = kstrdup(target, GFP_KERNEL); if (!link) return ERR_PTR(-ENOMEM); dentry = start_creating(name, parent); if (IS_ERR(dentry)) { kfree(link); return dentry; } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create symlink '%s'\n", name); kfree(link); return failed_creating(dentry); } inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_op = &debugfs_symlink_inode_operations; inode->i_link = link; d_instantiate(dentry, inode); return end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_symlink); static void __debugfs_file_removed(struct dentry *dentry) { struct debugfs_fsdata *fsd; /* * Paired with the closing smp_mb() implied by a successful * cmpxchg() in debugfs_file_get(): either * debugfs_file_get() must see a dead dentry or we must see a * debugfs_fsdata instance at ->d_fsdata here (or both). */ smp_mb(); fsd = READ_ONCE(dentry->d_fsdata); if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; /* if this was the last reference, we're done */ if (refcount_dec_and_test(&fsd->active_users)) return; /* * If there's still a reference, the code that obtained it can * be in different states: * - The common case of not using cancellations, or already * after debugfs_leave_cancellation(), where we just need * to wait for debugfs_file_put() which signals the completion; * - inside a cancellation section, i.e. between * debugfs_enter_cancellation() and debugfs_leave_cancellation(), * in which case we need to trigger the ->cancel() function, * and then wait for debugfs_file_put() just like in the * previous case; * - before debugfs_enter_cancellation() (but obviously after * debugfs_file_get()), in which case we may not see the * cancellation in the list on the first round of the loop, * but debugfs_enter_cancellation() signals the completion * after adding it, so this code gets woken up to call the * ->cancel() function. */ while (refcount_read(&fsd->active_users)) { struct debugfs_cancellation *c; /* * Lock the cancellations. Note that the cancellations * structs are meant to be on the stack, so we need to * ensure we either use them here or don't touch them, * and debugfs_leave_cancellation() will wait for this * to be finished processing before exiting one. It may * of course win and remove the cancellation, but then * chances are we never even got into this bit, we only * do if the refcount isn't zero already. */ mutex_lock(&fsd->cancellations_mtx); while ((c = list_first_entry_or_null(&fsd->cancellations, typeof(*c), list))) { list_del_init(&c->list); c->cancel(dentry, c->cancel_data); } mutex_unlock(&fsd->cancellations_mtx); wait_for_completion(&fsd->active_users_drained); } } static void remove_one(struct dentry *victim) { if (d_is_reg(victim)) __debugfs_file_removed(victim); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } /** * debugfs_remove - recursively removes a directory * @dentry: a pointer to a the dentry of the directory to be removed. If this * parameter is NULL or an error value, nothing will be done. * * This function recursively removes a directory tree in debugfs that * was previously created with a call to another debugfs function * (like debugfs_create_file() or variants thereof.) * * This function is required to be called in order for the file to be * removed, no automatic cleanup of files will happen when a module is * removed, you are responsible here. */ void debugfs_remove(struct dentry *dentry) { if (IS_ERR_OR_NULL(dentry)) return; simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); simple_recursive_removal(dentry, remove_one); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove); /** * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it * @name: a pointer to a string containing the name of the item to look up. * @parent: a pointer to the parent dentry of the item. * * This is the equlivant of doing something like * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting * handled for the directory being looked up. */ void debugfs_lookup_and_remove(const char *name, struct dentry *parent) { struct dentry *dentry; dentry = debugfs_lookup(name, parent); if (!dentry) return; debugfs_remove(dentry); dput(dentry); } EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove); /** * debugfs_rename - rename a file/directory in the debugfs filesystem * @old_dir: a pointer to the parent dentry for the renamed object. This * should be a directory dentry. * @old_dentry: dentry of an object to be renamed. * @new_dir: a pointer to the parent dentry where the object should be * moved. This should be a directory dentry. * @new_name: a pointer to a string containing the target name. * * This function renames a file/directory in debugfs. The target must not * exist for rename to succeed. * * This function will return a pointer to old_dentry (which is updated to * reflect renaming) if it succeeds. If an error occurs, ERR_PTR(-ERROR) * will be returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. */ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name) { int error; struct dentry *dentry = NULL, *trap; struct name_snapshot old_name; if (IS_ERR(old_dir)) return old_dir; if (IS_ERR(new_dir)) return new_dir; if (IS_ERR_OR_NULL(old_dentry)) return old_dentry; trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) goto exit; /* Source does not exist, cyclic rename, or mountpoint? */ if (d_really_is_negative(old_dentry) || old_dentry == trap || d_mountpoint(old_dentry)) goto exit; dentry = lookup_one_len(new_name, new_dir, strlen(new_name)); /* Lookup failed, cyclic rename or target exists? */ if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry)) goto exit; take_dentry_name_snapshot(&old_name, old_dentry); error = simple_rename(&nop_mnt_idmap, d_inode(old_dir), old_dentry, d_inode(new_dir), dentry, 0); if (error) { release_dentry_name_snapshot(&old_name); goto exit; } d_move(old_dentry, dentry); fsnotify_move(d_inode(old_dir), d_inode(new_dir), &old_name.name, d_is_dir(old_dentry), NULL, old_dentry); release_dentry_name_snapshot(&old_name); unlock_rename(new_dir, old_dir); dput(dentry); return old_dentry; exit: if (dentry && !IS_ERR(dentry)) dput(dentry); unlock_rename(new_dir, old_dir); if (IS_ERR(dentry)) return dentry; return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(debugfs_rename); /** * debugfs_initialized - Tells whether debugfs has been registered */ bool debugfs_initialized(void) { return debugfs_registered; } EXPORT_SYMBOL_GPL(debugfs_initialized); static int __init debugfs_kernel(char *str) { if (str) { if (!strcmp(str, "on")) debugfs_allow = DEBUGFS_ALLOW_API | DEBUGFS_ALLOW_MOUNT; else if (!strcmp(str, "no-mount")) debugfs_allow = DEBUGFS_ALLOW_API; else if (!strcmp(str, "off")) debugfs_allow = 0; } return 0; } early_param("debugfs", debugfs_kernel); static int __init debugfs_init(void) { int retval; if (!(debugfs_allow & DEBUGFS_ALLOW_MOUNT)) return -EPERM; retval = sysfs_create_mount_point(kernel_kobj, "debug"); if (retval) return retval; retval = register_filesystem(&debug_fs_type); if (retval) sysfs_remove_mount_point(kernel_kobj, "debug"); else debugfs_registered = true; return retval; } core_initcall(debugfs_init); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | // SPDX-License-Identifier: GPL-2.0 /* * Xsens MT USB driver * * Copyright (C) 2013 Xsens <info@xsens.com> */ #include <linux/kernel.h> #include <linux/tty.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/uaccess.h> #define XSENS_VID 0x2639 #define MTi_10_IMU_PID 0x0001 #define MTi_20_VRU_PID 0x0002 #define MTi_30_AHRS_PID 0x0003 #define MTi_100_IMU_PID 0x0011 #define MTi_200_VRU_PID 0x0012 #define MTi_300_AHRS_PID 0x0013 #define MTi_G_700_GPS_INS_PID 0x0017 static const struct usb_device_id id_table[] = { { USB_DEVICE(XSENS_VID, MTi_10_IMU_PID) }, { USB_DEVICE(XSENS_VID, MTi_20_VRU_PID) }, { USB_DEVICE(XSENS_VID, MTi_30_AHRS_PID) }, { USB_DEVICE(XSENS_VID, MTi_100_IMU_PID) }, { USB_DEVICE(XSENS_VID, MTi_200_VRU_PID) }, { USB_DEVICE(XSENS_VID, MTi_300_AHRS_PID) }, { USB_DEVICE(XSENS_VID, MTi_G_700_GPS_INS_PID) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); static int xsens_mt_probe(struct usb_serial *serial, const struct usb_device_id *id) { if (serial->interface->cur_altsetting->desc.bInterfaceNumber == 1) return 0; return -ENODEV; } static struct usb_serial_driver xsens_mt_device = { .driver = { .name = "xsens_mt", }, .id_table = id_table, .num_ports = 1, .probe = xsens_mt_probe, }; static struct usb_serial_driver * const serial_drivers[] = { &xsens_mt_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR("Frans Klaver <frans.klaver@xsens.com>"); MODULE_DESCRIPTION("USB-serial driver for Xsens motion trackers"); MODULE_LICENSE("GPL v2"); |
1 2 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 | // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" struct stats_req_info { struct ethnl_req_info base; DECLARE_BITMAP(stat_mask, __ETHTOOL_STATS_CNT); enum ethtool_mac_stats_src src; }; #define STATS_REQINFO(__req_base) \ container_of(__req_base, struct stats_req_info, base) struct stats_reply_data { struct ethnl_reply_data base; struct_group(stats, struct ethtool_eth_phy_stats phy_stats; struct ethtool_eth_mac_stats mac_stats; struct ethtool_eth_ctrl_stats ctrl_stats; struct ethtool_rmon_stats rmon_stats; ); const struct ethtool_rmon_hist_range *rmon_ranges; }; #define STATS_REPDATA(__reply_base) \ container_of(__reply_base, struct stats_reply_data, base) const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = { [ETHTOOL_STATS_ETH_PHY] = "eth-phy", [ETHTOOL_STATS_ETH_MAC] = "eth-mac", [ETHTOOL_STATS_ETH_CTRL] = "eth-ctrl", [ETHTOOL_STATS_RMON] = "rmon", }; const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = { [ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR] = "SymbolErrorDuringCarrier", }; const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN] = { [ETHTOOL_A_STATS_ETH_MAC_2_TX_PKT] = "FramesTransmittedOK", [ETHTOOL_A_STATS_ETH_MAC_3_SINGLE_COL] = "SingleCollisionFrames", [ETHTOOL_A_STATS_ETH_MAC_4_MULTI_COL] = "MultipleCollisionFrames", [ETHTOOL_A_STATS_ETH_MAC_5_RX_PKT] = "FramesReceivedOK", [ETHTOOL_A_STATS_ETH_MAC_6_FCS_ERR] = "FrameCheckSequenceErrors", [ETHTOOL_A_STATS_ETH_MAC_7_ALIGN_ERR] = "AlignmentErrors", [ETHTOOL_A_STATS_ETH_MAC_8_TX_BYTES] = "OctetsTransmittedOK", [ETHTOOL_A_STATS_ETH_MAC_9_TX_DEFER] = "FramesWithDeferredXmissions", [ETHTOOL_A_STATS_ETH_MAC_10_LATE_COL] = "LateCollisions", [ETHTOOL_A_STATS_ETH_MAC_11_XS_COL] = "FramesAbortedDueToXSColls", [ETHTOOL_A_STATS_ETH_MAC_12_TX_INT_ERR] = "FramesLostDueToIntMACXmitError", [ETHTOOL_A_STATS_ETH_MAC_13_CS_ERR] = "CarrierSenseErrors", [ETHTOOL_A_STATS_ETH_MAC_14_RX_BYTES] = "OctetsReceivedOK", [ETHTOOL_A_STATS_ETH_MAC_15_RX_INT_ERR] = "FramesLostDueToIntMACRcvError", [ETHTOOL_A_STATS_ETH_MAC_18_TX_MCAST] = "MulticastFramesXmittedOK", [ETHTOOL_A_STATS_ETH_MAC_19_TX_BCAST] = "BroadcastFramesXmittedOK", [ETHTOOL_A_STATS_ETH_MAC_20_XS_DEFER] = "FramesWithExcessiveDeferral", [ETHTOOL_A_STATS_ETH_MAC_21_RX_MCAST] = "MulticastFramesReceivedOK", [ETHTOOL_A_STATS_ETH_MAC_22_RX_BCAST] = "BroadcastFramesReceivedOK", [ETHTOOL_A_STATS_ETH_MAC_23_IR_LEN_ERR] = "InRangeLengthErrors", [ETHTOOL_A_STATS_ETH_MAC_24_OOR_LEN] = "OutOfRangeLengthField", [ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR] = "FrameTooLongErrors", }; const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN] = { [ETHTOOL_A_STATS_ETH_CTRL_3_TX] = "MACControlFramesTransmitted", [ETHTOOL_A_STATS_ETH_CTRL_4_RX] = "MACControlFramesReceived", [ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP] = "UnsupportedOpcodesReceived", }; const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = { [ETHTOOL_A_STATS_RMON_UNDERSIZE] = "etherStatsUndersizePkts", [ETHTOOL_A_STATS_RMON_OVERSIZE] = "etherStatsOversizePkts", [ETHTOOL_A_STATS_RMON_FRAG] = "etherStatsFragments", [ETHTOOL_A_STATS_RMON_JABBER] = "etherStatsJabbers", }; const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = { [ETHTOOL_A_STATS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_STATS_GROUPS] = { .type = NLA_NESTED }, [ETHTOOL_A_STATS_SRC] = NLA_POLICY_MAX(NLA_U32, ETHTOOL_MAC_STATS_SRC_PMAC), }; static int stats_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, struct netlink_ext_ack *extack) { enum ethtool_mac_stats_src src = ETHTOOL_MAC_STATS_SRC_AGGREGATE; struct stats_req_info *req_info = STATS_REQINFO(req_base); bool mod = false; int err; err = ethnl_update_bitset(req_info->stat_mask, __ETHTOOL_STATS_CNT, tb[ETHTOOL_A_STATS_GROUPS], stats_std_names, extack, &mod); if (err) return err; if (!mod) { NL_SET_ERR_MSG(extack, "no stats requested"); return -EINVAL; } if (tb[ETHTOOL_A_STATS_SRC]) src = nla_get_u32(tb[ETHTOOL_A_STATS_SRC]); req_info->src = src; return 0; } static int stats_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { const struct stats_req_info *req_info = STATS_REQINFO(req_base); struct stats_reply_data *data = STATS_REPDATA(reply_base); enum ethtool_mac_stats_src src = req_info->src; struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; if ((src == ETHTOOL_MAC_STATS_SRC_EMAC || src == ETHTOOL_MAC_STATS_SRC_PMAC) && !__ethtool_dev_mm_supported(dev)) { NL_SET_ERR_MSG_MOD(info->extack, "Device does not support MAC merge layer"); ethnl_ops_complete(dev); return -EOPNOTSUPP; } /* Mark all stats as unset (see ETHTOOL_STAT_NOT_SET) to prevent them * from being reported to user space in case driver did not set them. */ memset(&data->stats, 0xff, sizeof(data->stats)); data->phy_stats.src = src; data->mac_stats.src = src; data->ctrl_stats.src = src; data->rmon_stats.src = src; if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) && dev->ethtool_ops->get_eth_phy_stats) dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats); if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask) && dev->ethtool_ops->get_eth_mac_stats) dev->ethtool_ops->get_eth_mac_stats(dev, &data->mac_stats); if (test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask) && dev->ethtool_ops->get_eth_ctrl_stats) dev->ethtool_ops->get_eth_ctrl_stats(dev, &data->ctrl_stats); if (test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask) && dev->ethtool_ops->get_rmon_stats) dev->ethtool_ops->get_rmon_stats(dev, &data->rmon_stats, &data->rmon_ranges); ethnl_ops_complete(dev); return 0; } static int stats_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct stats_req_info *req_info = STATS_REQINFO(req_base); unsigned int n_grps = 0, n_stats = 0; int len = 0; len += nla_total_size(sizeof(u32)); /* _STATS_SRC */ if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) { n_stats += sizeof(struct ethtool_eth_phy_stats) / sizeof(u64); n_grps++; } if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask)) { n_stats += sizeof(struct ethtool_eth_mac_stats) / sizeof(u64); n_grps++; } if (test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask)) { n_stats += sizeof(struct ethtool_eth_ctrl_stats) / sizeof(u64); n_grps++; } if (test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask)) { n_stats += sizeof(struct ethtool_rmon_stats) / sizeof(u64); n_grps++; /* Above includes the space for _A_STATS_GRP_HIST_VALs */ len += (nla_total_size(0) + /* _A_STATS_GRP_HIST */ nla_total_size(4) + /* _A_STATS_GRP_HIST_BKT_LOW */ nla_total_size(4)) * /* _A_STATS_GRP_HIST_BKT_HI */ ETHTOOL_RMON_HIST_MAX * 2; } len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */ nla_total_size(4) + /* _A_STATS_GRP_ID */ nla_total_size(4)); /* _A_STATS_GRP_SS_ID */ len += n_stats * (nla_total_size(0) + /* _A_STATS_GRP_STAT */ nla_total_size_64bit(sizeof(u64))); return len; } static int stat_put(struct sk_buff *skb, u16 attrtype, u64 val) { struct nlattr *nest; int ret; if (val == ETHTOOL_STAT_NOT_SET) return 0; /* We want to start stats attr types from 0, so we don't have a type * for pad inside ETHTOOL_A_STATS_GRP_STAT. Pad things on the outside * of ETHTOOL_A_STATS_GRP_STAT. Since we're one nest away from the * actual attr we're 4B off - nla_need_padding_for_64bit() & co. * can't be used. */ #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (!IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8)) if (!nla_reserve(skb, ETHTOOL_A_STATS_GRP_PAD, 0)) return -EMSGSIZE; #endif nest = nla_nest_start(skb, ETHTOOL_A_STATS_GRP_STAT); if (!nest) return -EMSGSIZE; ret = nla_put_u64_64bit(skb, attrtype, val, -1 /* not used */); if (ret) { nla_nest_cancel(skb, nest); return ret; } nla_nest_end(skb, nest); return 0; } static int stats_put_phy_stats(struct sk_buff *skb, const struct stats_reply_data *data) { if (stat_put(skb, ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR, data->phy_stats.SymbolErrorDuringCarrier)) return -EMSGSIZE; return 0; } static int stats_put_mac_stats(struct sk_buff *skb, const struct stats_reply_data *data) { if (stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_2_TX_PKT, data->mac_stats.FramesTransmittedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_3_SINGLE_COL, data->mac_stats.SingleCollisionFrames) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_4_MULTI_COL, data->mac_stats.MultipleCollisionFrames) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_5_RX_PKT, data->mac_stats.FramesReceivedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_6_FCS_ERR, data->mac_stats.FrameCheckSequenceErrors) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_7_ALIGN_ERR, data->mac_stats.AlignmentErrors) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_8_TX_BYTES, data->mac_stats.OctetsTransmittedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_9_TX_DEFER, data->mac_stats.FramesWithDeferredXmissions) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_10_LATE_COL, data->mac_stats.LateCollisions) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_11_XS_COL, data->mac_stats.FramesAbortedDueToXSColls) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_12_TX_INT_ERR, data->mac_stats.FramesLostDueToIntMACXmitError) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_13_CS_ERR, data->mac_stats.CarrierSenseErrors) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_14_RX_BYTES, data->mac_stats.OctetsReceivedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_15_RX_INT_ERR, data->mac_stats.FramesLostDueToIntMACRcvError) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_18_TX_MCAST, data->mac_stats.MulticastFramesXmittedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_19_TX_BCAST, data->mac_stats.BroadcastFramesXmittedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_20_XS_DEFER, data->mac_stats.FramesWithExcessiveDeferral) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_21_RX_MCAST, data->mac_stats.MulticastFramesReceivedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_22_RX_BCAST, data->mac_stats.BroadcastFramesReceivedOK) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_23_IR_LEN_ERR, data->mac_stats.InRangeLengthErrors) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_24_OOR_LEN, data->mac_stats.OutOfRangeLengthField) || stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR, data->mac_stats.FrameTooLongErrors)) return -EMSGSIZE; return 0; } static int stats_put_ctrl_stats(struct sk_buff *skb, const struct stats_reply_data *data) { if (stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_3_TX, data->ctrl_stats.MACControlFramesTransmitted) || stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_4_RX, data->ctrl_stats.MACControlFramesReceived) || stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP, data->ctrl_stats.UnsupportedOpcodesReceived)) return -EMSGSIZE; return 0; } static int stats_put_rmon_hist(struct sk_buff *skb, u32 attr, const u64 *hist, const struct ethtool_rmon_hist_range *ranges) { struct nlattr *nest; int i; if (!ranges) return 0; for (i = 0; i < ETHTOOL_RMON_HIST_MAX; i++) { if (!ranges[i].low && !ranges[i].high) break; if (hist[i] == ETHTOOL_STAT_NOT_SET) continue; nest = nla_nest_start(skb, attr); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, ranges[i].low) || nla_put_u32(skb, ETHTOOL_A_STATS_GRP_HIST_BKT_HI, ranges[i].high) || nla_put_u64_64bit(skb, ETHTOOL_A_STATS_GRP_HIST_VAL, hist[i], ETHTOOL_A_STATS_GRP_PAD)) goto err_cancel_hist; nla_nest_end(skb, nest); } return 0; err_cancel_hist: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int stats_put_rmon_stats(struct sk_buff *skb, const struct stats_reply_data *data) { if (stats_put_rmon_hist(skb, ETHTOOL_A_STATS_GRP_HIST_RX, data->rmon_stats.hist, data->rmon_ranges) || stats_put_rmon_hist(skb, ETHTOOL_A_STATS_GRP_HIST_TX, data->rmon_stats.hist_tx, data->rmon_ranges)) return -EMSGSIZE; if (stat_put(skb, ETHTOOL_A_STATS_RMON_UNDERSIZE, data->rmon_stats.undersize_pkts) || stat_put(skb, ETHTOOL_A_STATS_RMON_OVERSIZE, data->rmon_stats.oversize_pkts) || stat_put(skb, ETHTOOL_A_STATS_RMON_FRAG, data->rmon_stats.fragments) || stat_put(skb, ETHTOOL_A_STATS_RMON_JABBER, data->rmon_stats.jabbers)) return -EMSGSIZE; return 0; } static int stats_put_stats(struct sk_buff *skb, const struct stats_reply_data *data, u32 id, u32 ss_id, int (*cb)(struct sk_buff *skb, const struct stats_reply_data *data)) { struct nlattr *nest; nest = nla_nest_start(skb, ETHTOOL_A_STATS_GRP); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_STATS_GRP_ID, id) || nla_put_u32(skb, ETHTOOL_A_STATS_GRP_SS_ID, ss_id)) goto err_cancel; if (cb(skb, data)) goto err_cancel; nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int stats_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct stats_req_info *req_info = STATS_REQINFO(req_base); const struct stats_reply_data *data = STATS_REPDATA(reply_base); int ret = 0; if (nla_put_u32(skb, ETHTOOL_A_STATS_SRC, req_info->src)) return -EMSGSIZE; if (!ret && test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_PHY, ETH_SS_STATS_ETH_PHY, stats_put_phy_stats); if (!ret && test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask)) ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_MAC, ETH_SS_STATS_ETH_MAC, stats_put_mac_stats); if (!ret && test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask)) ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_CTRL, ETH_SS_STATS_ETH_CTRL, stats_put_ctrl_stats); if (!ret && test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask)) ret = stats_put_stats(skb, data, ETHTOOL_STATS_RMON, ETH_SS_STATS_RMON, stats_put_rmon_stats); return ret; } const struct ethnl_request_ops ethnl_stats_request_ops = { .request_cmd = ETHTOOL_MSG_STATS_GET, .reply_cmd = ETHTOOL_MSG_STATS_GET_REPLY, .hdr_attr = ETHTOOL_A_STATS_HEADER, .req_info_size = sizeof(struct stats_req_info), .reply_data_size = sizeof(struct stats_reply_data), .parse_request = stats_parse_request, .prepare_data = stats_prepare_data, .reply_size = stats_reply_size, .fill_reply = stats_fill_reply, }; static u64 ethtool_stats_sum(u64 a, u64 b) { if (a == ETHTOOL_STAT_NOT_SET) return b; if (b == ETHTOOL_STAT_NOT_SET) return a; return a + b; } /* Avoid modifying the aggregation procedure every time a new counter is added * by treating the structures as an array of u64 statistics. */ static void ethtool_aggregate_stats(void *aggr_stats, const void *emac_stats, const void *pmac_stats, size_t stats_size, size_t stats_offset) { size_t num_stats = stats_size / sizeof(u64); const u64 *s1 = emac_stats + stats_offset; const u64 *s2 = pmac_stats + stats_offset; u64 *s = aggr_stats + stats_offset; int i; for (i = 0; i < num_stats; i++) s[i] = ethtool_stats_sum(s1[i], s2[i]); } void ethtool_aggregate_mac_stats(struct net_device *dev, struct ethtool_eth_mac_stats *mac_stats) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_eth_mac_stats pmac, emac; memset(&emac, 0xff, sizeof(emac)); memset(&pmac, 0xff, sizeof(pmac)); emac.src = ETHTOOL_MAC_STATS_SRC_EMAC; pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC; ops->get_eth_mac_stats(dev, &emac); ops->get_eth_mac_stats(dev, &pmac); ethtool_aggregate_stats(mac_stats, &emac, &pmac, sizeof(mac_stats->stats), offsetof(struct ethtool_eth_mac_stats, stats)); } EXPORT_SYMBOL(ethtool_aggregate_mac_stats); void ethtool_aggregate_phy_stats(struct net_device *dev, struct ethtool_eth_phy_stats *phy_stats) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_eth_phy_stats pmac, emac; memset(&emac, 0xff, sizeof(emac)); memset(&pmac, 0xff, sizeof(pmac)); emac.src = ETHTOOL_MAC_STATS_SRC_EMAC; pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC; ops->get_eth_phy_stats(dev, &emac); ops->get_eth_phy_stats(dev, &pmac); ethtool_aggregate_stats(phy_stats, &emac, &pmac, sizeof(phy_stats->stats), offsetof(struct ethtool_eth_phy_stats, stats)); } EXPORT_SYMBOL(ethtool_aggregate_phy_stats); void ethtool_aggregate_ctrl_stats(struct net_device *dev, struct ethtool_eth_ctrl_stats *ctrl_stats) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_eth_ctrl_stats pmac, emac; memset(&emac, 0xff, sizeof(emac)); memset(&pmac, 0xff, sizeof(pmac)); emac.src = ETHTOOL_MAC_STATS_SRC_EMAC; pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC; ops->get_eth_ctrl_stats(dev, &emac); ops->get_eth_ctrl_stats(dev, &pmac); ethtool_aggregate_stats(ctrl_stats, &emac, &pmac, sizeof(ctrl_stats->stats), offsetof(struct ethtool_eth_ctrl_stats, stats)); } EXPORT_SYMBOL(ethtool_aggregate_ctrl_stats); void ethtool_aggregate_pause_stats(struct net_device *dev, struct ethtool_pause_stats *pause_stats) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_pause_stats pmac, emac; memset(&emac, 0xff, sizeof(emac)); memset(&pmac, 0xff, sizeof(pmac)); emac.src = ETHTOOL_MAC_STATS_SRC_EMAC; pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC; ops->get_pause_stats(dev, &emac); ops->get_pause_stats(dev, &pmac); ethtool_aggregate_stats(pause_stats, &emac, &pmac, sizeof(pause_stats->stats), offsetof(struct ethtool_pause_stats, stats)); } EXPORT_SYMBOL(ethtool_aggregate_pause_stats); void ethtool_aggregate_rmon_stats(struct net_device *dev, struct ethtool_rmon_stats *rmon_stats) { const struct ethtool_ops *ops = dev->ethtool_ops; const struct ethtool_rmon_hist_range *dummy; struct ethtool_rmon_stats pmac, emac; memset(&emac, 0xff, sizeof(emac)); memset(&pmac, 0xff, sizeof(pmac)); emac.src = ETHTOOL_MAC_STATS_SRC_EMAC; pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC; ops->get_rmon_stats(dev, &emac, &dummy); ops->get_rmon_stats(dev, &pmac, &dummy); ethtool_aggregate_stats(rmon_stats, &emac, &pmac, sizeof(rmon_stats->stats), offsetof(struct ethtool_rmon_stats, stats)); } EXPORT_SYMBOL(ethtool_aggregate_rmon_stats); |
449 89 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H #include <linux/ratelimit_types.h> #include <linux/sched.h> #include <linux/spinlock.h> static inline void ratelimit_state_init(struct ratelimit_state *rs, int interval, int burst) { memset(rs, 0, sizeof(*rs)); raw_spin_lock_init(&rs->lock); rs->interval = interval; rs->burst = burst; } static inline void ratelimit_default_init(struct ratelimit_state *rs) { return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); } static inline void ratelimit_state_exit(struct ratelimit_state *rs) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; if (rs->missed) { pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, rs->missed); rs->missed = 0; } } static inline void ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) { rs->flags = flags; } extern struct ratelimit_state printk_ratelimit_state; #ifdef CONFIG_PRINTK #define WARN_ON_RATELIMIT(condition, state) ({ \ bool __rtn_cond = !!(condition); \ WARN_ON(__rtn_cond && __ratelimit(state)); \ __rtn_cond; \ }) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ int rtn = !!(condition); \ \ if (unlikely(rtn && __ratelimit(&_rs))) \ WARN(rtn, format, ##__VA_ARGS__); \ \ rtn; \ }) #else #define WARN_ON_RATELIMIT(condition, state) \ WARN_ON(condition) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ int rtn = WARN(condition, format, ##__VA_ARGS__); \ rtn; \ }) #endif #endif /* _LINUX_RATELIMIT_H */ |
2142 2142 2147 106 106 105 3692 3700 3699 3765 3758 3765 3764 3765 3769 811 811 89 89 8 8 8 8 2142 2145 28 28 28 2143 6 6 6 8 8 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 | // SPDX-License-Identifier: GPL-2.0 #include "blk-rq-qos.h" /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. */ static bool atomic_inc_below(atomic_t *v, unsigned int below) { unsigned int cur = atomic_read(v); do { if (cur >= below) return false; } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); return true; } bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) { return atomic_inc_below(&rq_wait->inflight, limit); } void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->cleanup) rqos->ops->cleanup(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->done) rqos->ops->done(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->issue) rqos->ops->issue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->requeue) rqos->ops->requeue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->throttle) rqos->ops->throttle(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->track) rqos->ops->track(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->merge) rqos->ops->merge(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->done_bio) rqos->ops->done_bio(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_queue_depth_changed(struct rq_qos *rqos) { do { if (rqos->ops->queue_depth_changed) rqos->ops->queue_depth_changed(rqos); rqos = rqos->next; } while (rqos); } /* * Return true, if we can't increase the depth further by scaling */ bool rq_depth_calc_max_depth(struct rq_depth *rqd) { unsigned int depth; bool ret = false; /* * For QD=1 devices, this is a special case. It's important for those * to have one request ready when one completes, so force a depth of * 2 for those devices. On the backend, it'll be a depth of 1 anyway, * since the device can't have more than that in flight. If we're * scaling down, then keep a setting of 1/1/1. */ if (rqd->queue_depth == 1) { if (rqd->scale_step > 0) rqd->max_depth = 1; else { rqd->max_depth = 2; ret = true; } } else { /* * scale_step == 0 is our default state. If we have suffered * latency spikes, step will be > 0, and we shrink the * allowed write depths. If step is < 0, we're only doing * writes, and we allow a temporarily higher depth to * increase performance. */ depth = min_t(unsigned int, rqd->default_depth, rqd->queue_depth); if (rqd->scale_step > 0) depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); else if (rqd->scale_step < 0) { unsigned int maxd = 3 * rqd->queue_depth / 4; depth = 1 + ((depth - 1) << -rqd->scale_step); if (depth > maxd) { depth = maxd; ret = true; } } rqd->max_depth = depth; } return ret; } /* Returns true on success and false if scaling up wasn't possible */ bool rq_depth_scale_up(struct rq_depth *rqd) { /* * Hit max in previous round, stop here */ if (rqd->scaled_max) return false; rqd->scale_step--; rqd->scaled_max = rq_depth_calc_max_depth(rqd); return true; } /* * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we * had a latency violation. Returns true on success and returns false if * scaling down wasn't possible. */ bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) { /* * Stop scaling down when we've hit the limit. This also prevents * ->scale_step from going to crazy values, if the device can't * keep up. */ if (rqd->max_depth == 1) return false; if (rqd->scale_step < 0 && hard_throttle) rqd->scale_step = 0; else rqd->scale_step++; rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); return true; } struct rq_qos_wait_data { struct wait_queue_entry wq; struct task_struct *task; struct rq_wait *rqw; acquire_inflight_cb_t *cb; void *private_data; bool got_token; }; static int rq_qos_wake_function(struct wait_queue_entry *curr, unsigned int mode, int wake_flags, void *key) { struct rq_qos_wait_data *data = container_of(curr, struct rq_qos_wait_data, wq); /* * If we fail to get a budget, return -1 to interrupt the wake up loop * in __wake_up_common. */ if (!data->cb(data->rqw, data->private_data)) return -1; data->got_token = true; smp_wmb(); list_del_init(&curr->entry); wake_up_process(data->task); return 1; } /** * rq_qos_wait - throttle on a rqw if we need to * @rqw: rqw to throttle on * @private_data: caller provided specific data * @acquire_inflight_cb: inc the rqw->inflight counter if we can * @cleanup_cb: the callback to cleanup in case we race with a waker * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the * waking up based on the resources available. The acquire_inflight_cb should * inc the rqw->inflight if we have the ability to do so, or return false if not * and then we will sleep until the room becomes available. * * cleanup_cb is in case that we race with a waker and need to cleanup the * inflight count accordingly. */ void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { .wq = { .func = rq_qos_wake_function, .entry = LIST_HEAD_INIT(data.wq.entry), }, .task = current, .rqw = rqw, .cb = acquire_inflight_cb, .private_data = private_data, }; bool has_sleeper; has_sleeper = wq_has_sleeper(&rqw->wait); if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) return; has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { /* The memory barrier in set_current_state saves us here. */ if (data.got_token) break; if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { finish_wait(&rqw->wait, &data.wq); /* * We raced with rq_qos_wake_function() getting a token, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. */ smp_rmb(); if (data.got_token) cleanup_cb(rqw, private_data); break; } io_schedule(); has_sleeper = true; set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } void rq_qos_exit(struct request_queue *q) { mutex_lock(&q->rq_qos_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); } mutex_unlock(&q->rq_qos_mutex); } int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; lockdep_assert_held(&q->rq_qos_mutex); rqos->disk = disk; rqos->id = id; rqos->ops = ops; /* * No IO can be in-flight when adding rqos, so freeze queue, which * is fine since we only support rq_qos for blk-mq queue. */ blk_mq_freeze_queue(q); if (rq_qos_id(q, rqos->id)) goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; blk_mq_unfreeze_queue(q); if (rqos->ops->debugfs_attrs) { mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } return 0; ebusy: blk_mq_unfreeze_queue(q); return -EBUSY; } void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; struct rq_qos **cur; lockdep_assert_held(&q->rq_qos_mutex); blk_mq_freeze_queue(q); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } blk_mq_unfreeze_queue(q); mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } |
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 | // SPDX-License-Identifier: GPL-2.0-only /* * NFC Digital Protocol stack * Copyright (c) 2013, Intel Corporation. */ #define pr_fmt(fmt) "digital: %s: " fmt, __func__ #include <linux/module.h> #include "digital.h" #define DIGITAL_PROTO_NFCA_RF_TECH \ (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | \ NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO14443_MASK) #define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK #define DIGITAL_PROTO_NFCF_RF_TECH \ (NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK) #define DIGITAL_PROTO_ISO15693_RF_TECH NFC_PROTO_ISO15693_MASK /* Delay between each poll frame (ms) */ #define DIGITAL_POLL_INTERVAL 10 struct digital_cmd { struct list_head queue; u8 type; u8 pending; u16 timeout; struct sk_buff *req; struct sk_buff *resp; struct digital_tg_mdaa_params *mdaa_params; nfc_digital_cmd_complete_t cmd_cb; void *cb_context; }; struct sk_buff *digital_skb_alloc(struct nfc_digital_dev *ddev, unsigned int len) { struct sk_buff *skb; skb = alloc_skb(len + ddev->tx_headroom + ddev->tx_tailroom, GFP_KERNEL); if (skb) skb_reserve(skb, ddev->tx_headroom); return skb; } void digital_skb_add_crc(struct sk_buff *skb, crc_func_t crc_func, u16 init, u8 bitwise_inv, u8 msb_first) { u16 crc; crc = crc_func(init, skb->data, skb->len); if (bitwise_inv) crc = ~crc; if (msb_first) crc = __fswab16(crc); skb_put_u8(skb, crc & 0xFF); skb_put_u8(skb, (crc >> 8) & 0xFF); } int digital_skb_check_crc(struct sk_buff *skb, crc_func_t crc_func, u16 crc_init, u8 bitwise_inv, u8 msb_first) { int rc; u16 crc; if (skb->len <= 2) return -EIO; crc = crc_func(crc_init, skb->data, skb->len - 2); if (bitwise_inv) crc = ~crc; if (msb_first) crc = __swab16(crc); rc = (skb->data[skb->len - 2] - (crc & 0xFF)) + (skb->data[skb->len - 1] - ((crc >> 8) & 0xFF)); if (rc) return -EIO; skb_trim(skb, skb->len - 2); return 0; } static inline void digital_switch_rf(struct nfc_digital_dev *ddev, bool on) { ddev->ops->switch_rf(ddev, on); } static inline void digital_abort_cmd(struct nfc_digital_dev *ddev) { ddev->ops->abort_cmd(ddev); } static void digital_wq_cmd_complete(struct work_struct *work) { struct digital_cmd *cmd; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, cmd_complete_work); mutex_lock(&ddev->cmd_lock); cmd = list_first_entry_or_null(&ddev->cmd_queue, struct digital_cmd, queue); if (!cmd) { mutex_unlock(&ddev->cmd_lock); return; } list_del(&cmd->queue); mutex_unlock(&ddev->cmd_lock); if (!IS_ERR(cmd->resp)) print_hex_dump_debug("DIGITAL RX: ", DUMP_PREFIX_NONE, 16, 1, cmd->resp->data, cmd->resp->len, false); cmd->cmd_cb(ddev, cmd->cb_context, cmd->resp); kfree(cmd->mdaa_params); kfree(cmd); schedule_work(&ddev->cmd_work); } static void digital_send_cmd_complete(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { struct digital_cmd *cmd = arg; cmd->resp = resp; schedule_work(&ddev->cmd_complete_work); } static void digital_wq_cmd(struct work_struct *work) { int rc; struct digital_cmd *cmd; struct digital_tg_mdaa_params *params; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, cmd_work); mutex_lock(&ddev->cmd_lock); cmd = list_first_entry_or_null(&ddev->cmd_queue, struct digital_cmd, queue); if (!cmd || cmd->pending) { mutex_unlock(&ddev->cmd_lock); return; } cmd->pending = 1; mutex_unlock(&ddev->cmd_lock); if (cmd->req) print_hex_dump_debug("DIGITAL TX: ", DUMP_PREFIX_NONE, 16, 1, cmd->req->data, cmd->req->len, false); switch (cmd->type) { case DIGITAL_CMD_IN_SEND: rc = ddev->ops->in_send_cmd(ddev, cmd->req, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_SEND: rc = ddev->ops->tg_send_cmd(ddev, cmd->req, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN: rc = ddev->ops->tg_listen(ddev, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN_MDAA: params = cmd->mdaa_params; rc = ddev->ops->tg_listen_mdaa(ddev, params, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN_MD: rc = ddev->ops->tg_listen_md(ddev, cmd->timeout, digital_send_cmd_complete, cmd); break; default: pr_err("Unknown cmd type %d\n", cmd->type); return; } if (!rc) return; pr_err("in_send_command returned err %d\n", rc); mutex_lock(&ddev->cmd_lock); list_del(&cmd->queue); mutex_unlock(&ddev->cmd_lock); kfree_skb(cmd->req); kfree(cmd->mdaa_params); kfree(cmd); schedule_work(&ddev->cmd_work); } int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type, struct sk_buff *skb, struct digital_tg_mdaa_params *params, u16 timeout, nfc_digital_cmd_complete_t cmd_cb, void *cb_context) { struct digital_cmd *cmd; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->type = cmd_type; cmd->timeout = timeout; cmd->req = skb; cmd->mdaa_params = params; cmd->cmd_cb = cmd_cb; cmd->cb_context = cb_context; INIT_LIST_HEAD(&cmd->queue); mutex_lock(&ddev->cmd_lock); list_add_tail(&cmd->queue, &ddev->cmd_queue); mutex_unlock(&ddev->cmd_lock); schedule_work(&ddev->cmd_work); return 0; } int digital_in_configure_hw(struct nfc_digital_dev *ddev, int type, int param) { int rc; rc = ddev->ops->in_configure_hw(ddev, type, param); if (rc) pr_err("in_configure_hw failed: %d\n", rc); return rc; } int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param) { int rc; rc = ddev->ops->tg_configure_hw(ddev, type, param); if (rc) pr_err("tg_configure_hw failed: %d\n", rc); return rc; } static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; int rc; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; params->sens_res = DIGITAL_SENS_RES_NFC_DEP; get_random_bytes(params->nfcid1, sizeof(params->nfcid1)); params->sel_res = DIGITAL_SEL_RES_NFC_DEP; params->nfcid2[0] = DIGITAL_SENSF_NFCID2_NFC_DEP_B1; params->nfcid2[1] = DIGITAL_SENSF_NFCID2_NFC_DEP_B2; get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2); params->sc = DIGITAL_SENSF_FELICA_SC; rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, 500, digital_tg_recv_atr_req, NULL); if (rc) kfree(params); return rc; } static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech) { return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MD, NULL, NULL, 500, digital_tg_recv_md_req, NULL); } int digital_target_found(struct nfc_digital_dev *ddev, struct nfc_target *target, u8 protocol) { int rc; u8 framing; u8 rf_tech; u8 poll_tech_count; int (*check_crc)(struct sk_buff *skb); void (*add_crc)(struct sk_buff *skb); rf_tech = ddev->poll_techs[ddev->poll_tech_index].rf_tech; switch (protocol) { case NFC_PROTO_JEWEL: framing = NFC_DIGITAL_FRAMING_NFCA_T1T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; case NFC_PROTO_MIFARE: framing = NFC_DIGITAL_FRAMING_NFCA_T2T; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; break; case NFC_PROTO_FELICA: framing = NFC_DIGITAL_FRAMING_NFCF_T3T; check_crc = digital_skb_check_crc_f; add_crc = digital_skb_add_crc_f; break; case NFC_PROTO_NFC_DEP: if (rf_tech == NFC_DIGITAL_RF_TECH_106A) { framing = NFC_DIGITAL_FRAMING_NFCA_NFC_DEP; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; } else { framing = NFC_DIGITAL_FRAMING_NFCF_NFC_DEP; check_crc = digital_skb_check_crc_f; add_crc = digital_skb_add_crc_f; } break; case NFC_PROTO_ISO15693: framing = NFC_DIGITAL_FRAMING_ISO15693_T5T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; case NFC_PROTO_ISO14443: framing = NFC_DIGITAL_FRAMING_NFCA_T4T; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; break; case NFC_PROTO_ISO14443_B: framing = NFC_DIGITAL_FRAMING_NFCB_T4T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; default: pr_err("Invalid protocol %d\n", protocol); return -EINVAL; } pr_debug("rf_tech=%d, protocol=%d\n", rf_tech, protocol); ddev->curr_rf_tech = rf_tech; if (DIGITAL_DRV_CAPS_IN_CRC(ddev)) { ddev->skb_add_crc = digital_skb_add_crc_none; ddev->skb_check_crc = digital_skb_check_crc_none; } else { ddev->skb_add_crc = add_crc; ddev->skb_check_crc = check_crc; } rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, framing); if (rc) return rc; target->supported_protocols = (1 << protocol); poll_tech_count = ddev->poll_tech_count; ddev->poll_tech_count = 0; rc = nfc_targets_found(ddev->nfc_dev, target, 1); if (rc) { ddev->poll_tech_count = poll_tech_count; return rc; } return 0; } void digital_poll_next_tech(struct nfc_digital_dev *ddev) { u8 rand_mod; digital_switch_rf(ddev, 0); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { mutex_unlock(&ddev->poll_lock); return; } get_random_bytes(&rand_mod, sizeof(rand_mod)); ddev->poll_tech_index = rand_mod % ddev->poll_tech_count; mutex_unlock(&ddev->poll_lock); schedule_delayed_work(&ddev->poll_work, msecs_to_jiffies(DIGITAL_POLL_INTERVAL)); } static void digital_wq_poll(struct work_struct *work) { int rc; struct digital_poll_tech *poll_tech; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, poll_work.work); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { mutex_unlock(&ddev->poll_lock); return; } poll_tech = &ddev->poll_techs[ddev->poll_tech_index]; mutex_unlock(&ddev->poll_lock); rc = poll_tech->poll_func(ddev, poll_tech->rf_tech); if (rc) digital_poll_next_tech(ddev); } static void digital_add_poll_tech(struct nfc_digital_dev *ddev, u8 rf_tech, digital_poll_t poll_func) { struct digital_poll_tech *poll_tech; if (ddev->poll_tech_count >= NFC_DIGITAL_POLL_MODE_COUNT_MAX) return; poll_tech = &ddev->poll_techs[ddev->poll_tech_count++]; poll_tech->rf_tech = rf_tech; poll_tech->poll_func = poll_func; } /** * digital_start_poll - start_poll operation * @nfc_dev: device to be polled * @im_protocols: bitset of nfc initiator protocols to be used for polling * @tm_protocols: bitset of nfc transport protocols to be used for polling * * For every supported protocol, the corresponding polling function is added * to the table of polling technologies (ddev->poll_techs[]) using * digital_add_poll_tech(). * When a polling function fails (by timeout or protocol error) the next one is * schedule by digital_poll_next_tech() on the poll workqueue (ddev->poll_work). */ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, __u32 tm_protocols) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); u32 matching_im_protocols, matching_tm_protocols; pr_debug("protocols: im 0x%x, tm 0x%x, supported 0x%x\n", im_protocols, tm_protocols, ddev->protocols); matching_im_protocols = ddev->protocols & im_protocols; matching_tm_protocols = ddev->protocols & tm_protocols; if (!matching_im_protocols && !matching_tm_protocols) { pr_err("Unknown protocol\n"); return -EINVAL; } if (ddev->poll_tech_count) { pr_err("Already polling\n"); return -EBUSY; } if (ddev->curr_protocol) { pr_err("A target is already active\n"); return -EBUSY; } ddev->poll_tech_count = 0; ddev->poll_tech_index = 0; if (matching_im_protocols & DIGITAL_PROTO_NFCA_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_in_send_sens_req); if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B, digital_in_send_sensb_req); if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_in_send_sensf_req); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_424F, digital_in_send_sensf_req); } if (matching_im_protocols & DIGITAL_PROTO_ISO15693_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_ISO15693, digital_in_send_iso15693_inv_req); if (matching_tm_protocols & NFC_PROTO_NFC_DEP_MASK) { if (ddev->ops->tg_listen_mdaa) { digital_add_poll_tech(ddev, 0, digital_tg_listen_mdaa); } else if (ddev->ops->tg_listen_md) { digital_add_poll_tech(ddev, 0, digital_tg_listen_md); } else { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_tg_listen_nfca); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_tg_listen_nfcf); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_424F, digital_tg_listen_nfcf); } } if (!ddev->poll_tech_count) { pr_err("Unsupported protocols: im=0x%x, tm=0x%x\n", matching_im_protocols, matching_tm_protocols); return -EINVAL; } schedule_delayed_work(&ddev->poll_work, 0); return 0; } static void digital_stop_poll(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { pr_err("Polling operation was not running\n"); mutex_unlock(&ddev->poll_lock); return; } ddev->poll_tech_count = 0; mutex_unlock(&ddev->poll_lock); cancel_delayed_work_sync(&ddev->poll_work); digital_abort_cmd(ddev); } static int digital_dev_up(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_switch_rf(ddev, 1); return 0; } static int digital_dev_down(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_switch_rf(ddev, 0); return 0; } static int digital_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, __u8 comm_mode, __u8 *gb, size_t gb_len) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); int rc; rc = digital_in_send_atr_req(ddev, target, comm_mode, gb, gb_len); if (!rc) ddev->curr_protocol = NFC_PROTO_NFC_DEP; return rc; } static int digital_dep_link_down(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_abort_cmd(ddev); ddev->curr_protocol = 0; return 0; } static int digital_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u32 protocol) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); if (ddev->poll_tech_count) { pr_err("Can't activate a target while polling\n"); return -EBUSY; } if (ddev->curr_protocol) { pr_err("A target is already active\n"); return -EBUSY; } ddev->curr_protocol = protocol; return 0; } static void digital_deactivate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, u8 mode) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); if (!ddev->curr_protocol) { pr_err("No active target\n"); return; } digital_abort_cmd(ddev); ddev->curr_protocol = 0; } static int digital_tg_send(struct nfc_dev *dev, struct sk_buff *skb) { struct nfc_digital_dev *ddev = nfc_get_drvdata(dev); return digital_tg_send_dep_res(ddev, skb); } static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { struct digital_data_exch *data_exch = arg; int rc; if (IS_ERR(resp)) { rc = PTR_ERR(resp); resp = NULL; goto done; } if (ddev->curr_protocol == NFC_PROTO_MIFARE) { rc = digital_in_recv_mifare_res(resp); /* crc check is done in digital_in_recv_mifare_res() */ goto done; } if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_pull_sod(ddev, resp); if (rc) goto done; } rc = ddev->skb_check_crc(resp); done: if (rc) { kfree_skb(resp); resp = NULL; } data_exch->cb(data_exch->cb_context, resp, rc); kfree(data_exch); } static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); struct digital_data_exch *data_exch; int rc; data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL); if (!data_exch) return -ENOMEM; data_exch->cb = cb; data_exch->cb_context = cb_context; if (ddev->curr_protocol == NFC_PROTO_NFC_DEP) { rc = digital_in_send_dep_req(ddev, target, skb, data_exch); goto exit; } if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_push_sod(ddev, skb); if (rc) goto exit; } ddev->skb_add_crc(skb); rc = digital_in_send_cmd(ddev, skb, 500, digital_in_send_complete, data_exch); exit: if (rc) kfree(data_exch); return rc; } static const struct nfc_ops digital_nfc_ops = { .dev_up = digital_dev_up, .dev_down = digital_dev_down, .start_poll = digital_start_poll, .stop_poll = digital_stop_poll, .dep_link_up = digital_dep_link_up, .dep_link_down = digital_dep_link_down, .activate_target = digital_activate_target, .deactivate_target = digital_deactivate_target, .tm_send = digital_tg_send, .im_transceive = digital_in_send, }; struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, int tx_tailroom) { struct nfc_digital_dev *ddev; if (!ops->in_configure_hw || !ops->in_send_cmd || !ops->tg_listen || !ops->tg_configure_hw || !ops->tg_send_cmd || !ops->abort_cmd || !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech)) return NULL; ddev = kzalloc(sizeof(*ddev), GFP_KERNEL); if (!ddev) return NULL; ddev->driver_capabilities = driver_capabilities; ddev->ops = ops; mutex_init(&ddev->cmd_lock); INIT_LIST_HEAD(&ddev->cmd_queue); INIT_WORK(&ddev->cmd_work, digital_wq_cmd); INIT_WORK(&ddev->cmd_complete_work, digital_wq_cmd_complete); mutex_init(&ddev->poll_lock); INIT_DELAYED_WORK(&ddev->poll_work, digital_wq_poll); if (supported_protocols & NFC_PROTO_JEWEL_MASK) ddev->protocols |= NFC_PROTO_JEWEL_MASK; if (supported_protocols & NFC_PROTO_MIFARE_MASK) ddev->protocols |= NFC_PROTO_MIFARE_MASK; if (supported_protocols & NFC_PROTO_FELICA_MASK) ddev->protocols |= NFC_PROTO_FELICA_MASK; if (supported_protocols & NFC_PROTO_NFC_DEP_MASK) ddev->protocols |= NFC_PROTO_NFC_DEP_MASK; if (supported_protocols & NFC_PROTO_ISO15693_MASK) ddev->protocols |= NFC_PROTO_ISO15693_MASK; if (supported_protocols & NFC_PROTO_ISO14443_MASK) ddev->protocols |= NFC_PROTO_ISO14443_MASK; if (supported_protocols & NFC_PROTO_ISO14443_B_MASK) ddev->protocols |= NFC_PROTO_ISO14443_B_MASK; ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN; ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN; ddev->nfc_dev = nfc_allocate_device(&digital_nfc_ops, ddev->protocols, ddev->tx_headroom, ddev->tx_tailroom); if (!ddev->nfc_dev) { pr_err("nfc_allocate_device failed\n"); goto free_dev; } nfc_set_drvdata(ddev->nfc_dev, ddev); return ddev; free_dev: kfree(ddev); return NULL; } EXPORT_SYMBOL(nfc_digital_allocate_device); void nfc_digital_free_device(struct nfc_digital_dev *ddev) { nfc_free_device(ddev->nfc_dev); kfree(ddev); } EXPORT_SYMBOL(nfc_digital_free_device); int nfc_digital_register_device(struct nfc_digital_dev *ddev) { return nfc_register_device(ddev->nfc_dev); } EXPORT_SYMBOL(nfc_digital_register_device); void nfc_digital_unregister_device(struct nfc_digital_dev *ddev) { struct digital_cmd *cmd, *n; nfc_unregister_device(ddev->nfc_dev); mutex_lock(&ddev->poll_lock); ddev->poll_tech_count = 0; mutex_unlock(&ddev->poll_lock); cancel_delayed_work_sync(&ddev->poll_work); cancel_work_sync(&ddev->cmd_work); cancel_work_sync(&ddev->cmd_complete_work); list_for_each_entry_safe(cmd, n, &ddev->cmd_queue, queue) { list_del(&cmd->queue); /* Call the command callback if any and pass it a ENODEV error. * This gives a chance to the command issuer to free any * allocated buffer. */ if (cmd->cmd_cb) cmd->cmd_cb(ddev, cmd->cb_context, ERR_PTR(-ENODEV)); kfree(cmd->mdaa_params); kfree(cmd); } } EXPORT_SYMBOL(nfc_digital_unregister_device); MODULE_DESCRIPTION("NFC Digital protocol stack"); MODULE_LICENSE("GPL"); |
238 19 19 19 19 19 19 32 26 19 1 26 19 19 43 43 43 29 29 29 29 29 29 1 29 29 29 29 28 28 32 2 32 31 32 32 32 32 32 32 461 29 460 363 32 363 457 149 149 145 144 148 363 448 28 26 363 19 28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | // SPDX-License-Identifier: GPL-2.0 /* * Ldisc rw semaphore * * The ldisc semaphore is semantically a rw_semaphore but which enforces * an alternate policy, namely: * 1) Supports lock wait timeouts * 2) Write waiter has priority * 3) Downgrading is not supported * * Implementation notes: * 1) Upper half of semaphore count is a wait count (differs from rwsem * in that rwsem normalizes the upper half to the wait bias) * 2) Lacks overflow checking * * The generic counting was copied and modified from include/asm-generic/rwsem.h * by Paul Mackerras <paulus@samba.org>. * * The scheduling policy was copied and modified from lib/rwsem.c * Written by David Howells (dhowells@redhat.com). * * This implementation incorporates the write lock stealing work of * Michel Lespinasse <walken@google.com>. * * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/tty.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task.h> #if BITS_PER_LONG == 64 # define LDSEM_ACTIVE_MASK 0xffffffffL #else # define LDSEM_ACTIVE_MASK 0x0000ffffL #endif #define LDSEM_UNLOCKED 0L #define LDSEM_ACTIVE_BIAS 1L #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) struct ldsem_waiter { struct list_head list; struct task_struct *task; }; /* * Initialize an ldsem: */ void __init_ldsem(struct ld_semaphore *sem, const char *name, struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held semaphore: */ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif atomic_long_set(&sem->count, LDSEM_UNLOCKED); sem->wait_readers = 0; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->read_wait); INIT_LIST_HEAD(&sem->write_wait); } static void __ldsem_wake_readers(struct ld_semaphore *sem) { struct ldsem_waiter *waiter, *next; struct task_struct *tsk; long adjust, count; /* * Try to grant read locks to all readers on the read wait list. * Note the 'active part' of the count is incremented by * the number of readers before waking any processes up. */ adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); count = atomic_long_add_return(adjust, &sem->count); do { if (count > 0) break; if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) return; } while (1); list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { tsk = waiter->task; smp_store_release(&waiter->task, NULL); wake_up_process(tsk); put_task_struct(tsk); } INIT_LIST_HEAD(&sem->read_wait); sem->wait_readers = 0; } static inline int writer_trylock(struct ld_semaphore *sem) { /* * Only wake this writer if the active part of the count can be * transitioned from 0 -> 1 */ long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); do { if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) return 1; if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) return 0; } while (1); } static void __ldsem_wake_writer(struct ld_semaphore *sem) { struct ldsem_waiter *waiter; waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); wake_up_process(waiter->task); } /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) * - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed */ static void __ldsem_wake(struct ld_semaphore *sem) { if (!list_empty(&sem->write_wait)) __ldsem_wake_writer(sem); else if (!list_empty(&sem->read_wait)) __ldsem_wake_readers(sem); } static void ldsem_wake(struct ld_semaphore *sem) { unsigned long flags; raw_spin_lock_irqsave(&sem->wait_lock, flags); __ldsem_wake(sem); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* * wait for the read lock to be granted */ static struct ld_semaphore __sched * down_read_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if there are no waiters, * and early-out if not */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { count += adjust; break; } if (count > 0) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->read_wait); sem->wait_readers++; waiter.task = current; get_task_struct(current); /* if there are no active locks, wake the new lock owner(s) */ if ((count & LDSEM_ACTIVE_MASK) == 0) __ldsem_wake(sem); raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!smp_load_acquire(&waiter.task)) break; if (!timeout) break; timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); if (!timeout) { /* * Lock timed out but check if this task was just * granted lock ownership - if so, pretend there * was no timeout; otherwise, cleanup lock wait. */ raw_spin_lock_irq(&sem->wait_lock); if (waiter.task) { atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); sem->wait_readers--; list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); put_task_struct(waiter.task); return NULL; } raw_spin_unlock_irq(&sem->wait_lock); } return sem; } /* * wait for the write lock to be granted */ static struct ld_semaphore __sched * down_write_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS; int locked = 0; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if the lock is now owned, * and early-out if so. */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) break; if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->write_wait); waiter.task = current; set_current_state(TASK_UNINTERRUPTIBLE); for (;;) { if (!timeout) break; raw_spin_unlock_irq(&sem->wait_lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->wait_lock); set_current_state(TASK_UNINTERRUPTIBLE); locked = writer_trylock(sem); if (locked) break; } if (!locked) atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); list_del(&waiter.list); /* * In case of timeout, wake up every reader who gave the right of way * to writer. Prevent separation readers into two groups: * one that helds semaphore and another that sleeps. * (in case of no contention with a writer) */ if (!locked && list_empty(&sem->write_wait)) __ldsem_wake_readers(sem); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); /* lock wait may have timed out */ if (!locked) return NULL; return sem; } static int __ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); if (count <= 0) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_read_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } static int __ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_write_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } /* * lock for reading -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, 0, timeout); } /* * trylock for reading -- returns 1 if successful, 0 if contention */ int ldsem_down_read_trylock(struct ld_semaphore *sem) { long count = atomic_long_read(&sem->count); while (count >= 0) { if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); lock_acquired(&sem->dep_map, _RET_IP_); return 1; } } return 0; } /* * lock for writing -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, 0, timeout); } /* * trylock for writing -- returns 1 if successful, 0 if contention */ int ldsem_down_write_trylock(struct ld_semaphore *sem) { long count = atomic_long_read(&sem->count); while ((count & LDSEM_ACTIVE_MASK) == 0) { if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); lock_acquired(&sem->dep_map, _RET_IP_); return 1; } } return 0; } /* * release a read lock */ void ldsem_up_read(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) ldsem_wake(sem); } /* * release a write lock */ void ldsem_up_write(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); if (count < 0) ldsem_wake(sem); } #ifdef CONFIG_DEBUG_LOCK_ALLOC int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, subclass, timeout); } int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, subclass, timeout); } #endif |
166 167 45 90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_LOG_FORMAT_H__ #define __XFS_LOG_FORMAT_H__ struct xfs_mount; struct xfs_trans_res; /* * On-disk Log Format definitions. * * This file contains all the on-disk format definitions used within the log. It * includes the physical log structure itself, as well as all the log item * format structures that are written into the log and intepreted by log * recovery. We start with the physical log format definitions, and then work * through all the log items definitions and everything they encode into the * log. */ typedef uint32_t xlog_tid_t; #define XLOG_MIN_ICLOGS 2 #define XLOG_MAX_ICLOGS 8 #define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Invalid cycle number */ #define XLOG_VERSION_1 1 #define XLOG_VERSION_2 2 /* Large IClogs, Log sunit */ #define XLOG_VERSION_OKBITS (XLOG_VERSION_1 | XLOG_VERSION_2) #define XLOG_MIN_RECORD_BSIZE (16*1024) /* eventually 32k */ #define XLOG_BIG_RECORD_BSIZE (32*1024) /* 32k buffers */ #define XLOG_MAX_RECORD_BSIZE (256*1024) #define XLOG_HEADER_CYCLE_SIZE (32*1024) /* cycle data in header */ #define XLOG_MIN_RECORD_BSHIFT 14 /* 16384 == 1 << 14 */ #define XLOG_BIG_RECORD_BSHIFT 15 /* 32k == 1 << 15 */ #define XLOG_MAX_RECORD_BSHIFT 18 /* 256k == 1 << 18 */ #define XLOG_HEADER_SIZE 512 /* Minimum number of transactions that must fit in the log (defined by mkfs) */ #define XFS_MIN_LOG_FACTOR 3 #define XLOG_REC_SHIFT(log) \ BTOBB(1 << (xfs_has_logv2(log->l_mp) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) #define XLOG_TOTAL_REC_SHIFT(log) \ BTOBB(XLOG_MAX_ICLOGS << (xfs_has_logv2(log->l_mp) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) /* get lsn fields */ #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) #define BLOCK_LSN(lsn) ((uint)(lsn)) /* this is used in a spot where we might otherwise double-endian-flip */ #define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0]) static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) { return ((xfs_lsn_t)cycle << 32) | block; } static inline uint xlog_get_cycle(char *ptr) { if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) return be32_to_cpu(*((__be32 *)ptr + 1)); else return be32_to_cpu(*(__be32 *)ptr); } /* Log Clients */ #define XFS_TRANSACTION 0x69 #define XFS_LOG 0xaa #define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ /* * Log item for unmount records. * * The unmount record used to have a string "Unmount filesystem--" in the * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE). * We just write the magic number now; see xfs_log_unmount_write. */ struct xfs_unmount_log_format { uint16_t magic; /* XLOG_UNMOUNT_TYPE */ uint16_t pad1; uint32_t pad2; /* may as well make it 64 bits */ }; /* Region types for iovec's i_type */ #define XLOG_REG_TYPE_BFORMAT 1 #define XLOG_REG_TYPE_BCHUNK 2 #define XLOG_REG_TYPE_EFI_FORMAT 3 #define XLOG_REG_TYPE_EFD_FORMAT 4 #define XLOG_REG_TYPE_IFORMAT 5 #define XLOG_REG_TYPE_ICORE 6 #define XLOG_REG_TYPE_IEXT 7 #define XLOG_REG_TYPE_IBROOT 8 #define XLOG_REG_TYPE_ILOCAL 9 #define XLOG_REG_TYPE_IATTR_EXT 10 #define XLOG_REG_TYPE_IATTR_BROOT 11 #define XLOG_REG_TYPE_IATTR_LOCAL 12 #define XLOG_REG_TYPE_QFORMAT 13 #define XLOG_REG_TYPE_DQUOT 14 #define XLOG_REG_TYPE_QUOTAOFF 15 #define XLOG_REG_TYPE_LRHEADER 16 #define XLOG_REG_TYPE_UNMOUNT 17 #define XLOG_REG_TYPE_COMMIT 18 #define XLOG_REG_TYPE_TRANSHDR 19 #define XLOG_REG_TYPE_ICREATE 20 #define XLOG_REG_TYPE_RUI_FORMAT 21 #define XLOG_REG_TYPE_RUD_FORMAT 22 #define XLOG_REG_TYPE_CUI_FORMAT 23 #define XLOG_REG_TYPE_CUD_FORMAT 24 #define XLOG_REG_TYPE_BUI_FORMAT 25 #define XLOG_REG_TYPE_BUD_FORMAT 26 #define XLOG_REG_TYPE_ATTRI_FORMAT 27 #define XLOG_REG_TYPE_ATTRD_FORMAT 28 #define XLOG_REG_TYPE_ATTR_NAME 29 #define XLOG_REG_TYPE_ATTR_VALUE 30 #define XLOG_REG_TYPE_XMI_FORMAT 31 #define XLOG_REG_TYPE_XMD_FORMAT 32 #define XLOG_REG_TYPE_ATTR_NEWNAME 33 #define XLOG_REG_TYPE_ATTR_NEWVALUE 34 #define XLOG_REG_TYPE_MAX 34 /* * Flags to log operation header * * The first write of a new transaction will be preceded with a start * record, XLOG_START_TRANS. Once a transaction is committed, a commit * record is written, XLOG_COMMIT_TRANS. If a single region can not fit into * the remainder of the current active in-core log, it is split up into * multiple regions. Each partial region will be marked with a * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS. * */ #define XLOG_START_TRANS 0x01 /* Start a new transaction */ #define XLOG_COMMIT_TRANS 0x02 /* Commit this transaction */ #define XLOG_CONTINUE_TRANS 0x04 /* Cont this trans into new region */ #define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */ #define XLOG_END_TRANS 0x10 /* End a continued transaction */ #define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */ typedef struct xlog_op_header { __be32 oh_tid; /* transaction id of operation : 4 b */ __be32 oh_len; /* bytes in data region : 4 b */ __u8 oh_clientid; /* who sent me this : 1 b */ __u8 oh_flags; /* : 1 b */ __u16 oh_res2; /* 32 bit align : 2 b */ } xlog_op_header_t; /* valid values for h_fmt */ #define XLOG_FMT_UNKNOWN 0 #define XLOG_FMT_LINUX_LE 1 #define XLOG_FMT_LINUX_BE 2 #define XLOG_FMT_IRIX_BE 3 /* our fmt */ #ifdef XFS_NATIVE_HOST #define XLOG_FMT XLOG_FMT_LINUX_BE #else #define XLOG_FMT XLOG_FMT_LINUX_LE #endif typedef struct xlog_rec_header { __be32 h_magicno; /* log record (LR) identifier : 4 */ __be32 h_cycle; /* write cycle of log : 4 */ __be32 h_version; /* LR version : 4 */ __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ __be64 h_lsn; /* lsn of this LR : 8 */ __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ __le32 h_crc; /* crc of log record : 4 */ __be32 h_prev_block; /* block number to previous LR : 4 */ __be32 h_num_logops; /* number of log operations in this LR : 4 */ __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* new fields */ __be32 h_fmt; /* format of log record : 4 */ uuid_t h_fs_uuid; /* uuid of FS : 16 */ __be32 h_size; /* iclog size : 4 */ } xlog_rec_header_t; typedef struct xlog_rec_ext_header { __be32 xh_cycle; /* write cycle of log : 4 */ __be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ } xlog_rec_ext_header_t; /* * Quite misnamed, because this union lays out the actual on-disk log buffer. */ typedef union xlog_in_core2 { xlog_rec_header_t hic_header; xlog_rec_ext_header_t hic_xheader; char hic_sector[XLOG_HEADER_SIZE]; } xlog_in_core_2_t; /* not an on-disk structure, but needed by log recovery in userspace */ typedef struct xfs_log_iovec { void *i_addr; /* beginning address of region */ int i_len; /* length in bytes of region */ uint i_type; /* type of region */ } xfs_log_iovec_t; /* * Transaction Header definitions. * * This is the structure written in the log at the head of every transaction. It * identifies the type and id of the transaction, and contains the number of * items logged by the transaction so we know how many to expect during * recovery. * * Do not change the below structure without redoing the code in * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans(). */ typedef struct xfs_trans_header { uint th_magic; /* magic number */ uint th_type; /* transaction type */ int32_t th_tid; /* transaction id (unused) */ uint th_num_items; /* num items logged by trans */ } xfs_trans_header_t; #define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */ /* * The only type valid for th_type in CIL-enabled file system logs: */ #define XFS_TRANS_CHECKPOINT 40 /* * Log item types. */ #define XFS_LI_EFI 0x1236 #define XFS_LI_EFD 0x1237 #define XFS_LI_IUNLINK 0x1238 #define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */ #define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ #define XFS_LI_DQUOT 0x123d #define XFS_LI_QUOTAOFF 0x123e #define XFS_LI_ICREATE 0x123f #define XFS_LI_RUI 0x1240 /* rmap update intent */ #define XFS_LI_RUD 0x1241 #define XFS_LI_CUI 0x1242 /* refcount update intent */ #define XFS_LI_CUD 0x1243 #define XFS_LI_BUI 0x1244 /* bmbt update intent */ #define XFS_LI_BUD 0x1245 #define XFS_LI_ATTRI 0x1246 /* attr set/remove intent*/ #define XFS_LI_ATTRD 0x1247 /* attr set/remove done */ #define XFS_LI_XMI 0x1248 /* mapping exchange intent */ #define XFS_LI_XMD 0x1249 /* mapping exchange done */ #define XFS_LI_TYPE_DESC \ { XFS_LI_EFI, "XFS_LI_EFI" }, \ { XFS_LI_EFD, "XFS_LI_EFD" }, \ { XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \ { XFS_LI_INODE, "XFS_LI_INODE" }, \ { XFS_LI_BUF, "XFS_LI_BUF" }, \ { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \ { XFS_LI_ICREATE, "XFS_LI_ICREATE" }, \ { XFS_LI_RUI, "XFS_LI_RUI" }, \ { XFS_LI_RUD, "XFS_LI_RUD" }, \ { XFS_LI_CUI, "XFS_LI_CUI" }, \ { XFS_LI_CUD, "XFS_LI_CUD" }, \ { XFS_LI_BUI, "XFS_LI_BUI" }, \ { XFS_LI_BUD, "XFS_LI_BUD" }, \ { XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \ { XFS_LI_ATTRD, "XFS_LI_ATTRD" }, \ { XFS_LI_XMI, "XFS_LI_XMI" }, \ { XFS_LI_XMD, "XFS_LI_XMD" } /* * Inode Log Item Format definitions. * * This is the structure used to lay out an inode log item in the * log. The size of the inline data/extents/b-tree root to be logged * (if any) is indicated in the ilf_dsize field. Changes to this structure * must be added on to the end. */ struct xfs_inode_log_format { uint16_t ilf_type; /* inode log item type */ uint16_t ilf_size; /* size of this item */ uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ int32_t ilf_boffset; /* off of inode in buffer */ }; /* * Old 32 bit systems will log in this format without the 64 bit * alignment padding. Recovery will detect this and convert it to the * correct format. */ struct xfs_inode_log_format_32 { uint16_t ilf_type; /* inode log item type */ uint16_t ilf_size; /* size of this item */ uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ int32_t ilf_boffset; /* off of inode in buffer */ } __attribute__((packed)); /* * Flags for xfs_trans_log_inode flags field. */ #define XFS_ILOG_CORE 0x001 /* log standard inode fields */ #define XFS_ILOG_DDATA 0x002 /* log i_df.if_data */ #define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */ #define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */ #define XFS_ILOG_DEV 0x010 /* log the dev field */ #define XFS_ILOG_UUID 0x020 /* added long ago, but never used */ #define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ #define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */ #define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */ /* * The timestamps are dirty, but not necessarily anything else in the inode * core. Unlike the other fields above this one must never make it to disk * in the ilf_fields of the inode_log_format, but is purely store in-memory in * ili_fields in the inode_log_item. */ #define XFS_ILOG_TIMESTAMP 0x4000 /* * The version field has been changed, but not necessarily anything else of * interest. This must never make it to disk - it is used purely to ensure that * the inode item ->precommit operation can update the fsync flag triggers * in the inode item correctly. */ #define XFS_ILOG_IVERSION 0x8000 #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ XFS_ILOG_ABROOT | XFS_ILOG_DOWNER | \ XFS_ILOG_AOWNER) #define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT) #define XFS_ILOG_AFORK (XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ XFS_ILOG_ABROOT) #define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \ XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ XFS_ILOG_DEV | XFS_ILOG_ADATA | \ XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ XFS_ILOG_TIMESTAMP | XFS_ILOG_DOWNER | \ XFS_ILOG_AOWNER) static inline int xfs_ilog_fbroot(int w) { return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); } static inline int xfs_ilog_fext(int w) { return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); } static inline int xfs_ilog_fdata(int w) { return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA); } /* * Incore version of the on-disk inode core structures. We log this directly * into the journal in host CPU format (for better or worse) and as such * directly mirrors the xfs_dinode structure as it must contain all the same * information. */ typedef uint64_t xfs_log_timestamp_t; /* Legacy timestamp encoding format. */ struct xfs_log_legacy_timestamp { int32_t t_sec; /* timestamp seconds */ int32_t t_nsec; /* timestamp nanoseconds */ }; /* * Define the format of the inode core that is logged. This structure must be * kept identical to struct xfs_dinode except for the endianness annotations. */ struct xfs_log_dinode { uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ uint16_t di_mode; /* mode and type of file */ int8_t di_version; /* inode version */ int8_t di_format; /* format of di_c data */ uint8_t di_pad3[2]; /* unused in v2/3 inodes */ uint32_t di_uid; /* owner's user id */ uint32_t di_gid; /* owner's group id */ uint32_t di_nlink; /* number of links to file */ uint16_t di_projid_lo; /* lower part of owner's project id */ uint16_t di_projid_hi; /* higher part of owner's project id */ union { /* Number of data fork extents if NREXT64 is set */ uint64_t di_big_nextents; /* Padding for V3 inodes without NREXT64 set. */ uint64_t di_v3_pad; /* Padding and inode flush counter for V2 inodes. */ struct { uint8_t di_v2_pad[6]; /* V2 inode zeroed space */ uint16_t di_flushiter; /* V2 inode incremented on flush */ }; }; xfs_log_timestamp_t di_atime; /* time last accessed */ xfs_log_timestamp_t di_mtime; /* time last modified */ xfs_log_timestamp_t di_ctime; /* time created/inode modified */ xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ union { /* * For V2 inodes and V3 inodes without NREXT64 set, this * is the number of data and attr fork extents. */ struct { uint32_t di_nextents; uint16_t di_anextents; } __packed; /* Number of attr fork extents if NREXT64 is set. */ struct { uint32_t di_big_anextents; uint16_t di_nrext64_pad; } __packed; } __packed; uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ int8_t di_aformat; /* format of attr fork's data */ uint32_t di_dmevmask; /* DMIG event mask */ uint16_t di_dmstate; /* DMIG state info */ uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ uint32_t di_gen; /* generation number */ /* di_next_unlinked is the only non-core field in the old dinode */ xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */ /* start of the extended dinode, writable fields */ uint32_t di_crc; /* CRC of the inode */ uint64_t di_changecount; /* number of attribute changes */ /* * The LSN we write to this field during formatting is not a reflection * of the current on-disk LSN. It should never be used for recovery * sequencing, nor should it be recovered into the on-disk inode at all. * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() * for details. */ xfs_lsn_t di_lsn; uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ uint8_t di_pad2[12]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_log_timestamp_t di_crtime; /* time created */ xfs_ino_t di_ino; /* inode number */ uuid_t di_uuid; /* UUID of the filesystem */ /* structure must be padded to 64 bit alignment */ }; #define xfs_log_dinode_size(mp) \ (xfs_has_v3inodes((mp)) ? \ sizeof(struct xfs_log_dinode) : \ offsetof(struct xfs_log_dinode, di_next_unlinked)) /* * Buffer Log Format definitions * * These are the physical dirty bitmap definitions for the log format structure. */ #define XFS_BLF_CHUNK 128 #define XFS_BLF_SHIFT 7 #define BIT_TO_WORD_SHIFT 5 #define NBWORD (NBBY * sizeof(unsigned int)) /* * This flag indicates that the buffer contains on disk inodes * and requires special recovery handling. */ #define XFS_BLF_INODE_BUF (1<<0) /* * This flag indicates that the buffer should not be replayed * during recovery because its blocks are being freed. */ #define XFS_BLF_CANCEL (1<<1) /* * This flag indicates that the buffer contains on disk * user or group dquots and may require special recovery handling. */ #define XFS_BLF_UDQUOT_BUF (1<<2) #define XFS_BLF_PDQUOT_BUF (1<<3) #define XFS_BLF_GDQUOT_BUF (1<<4) /* * This is the structure used to lay out a buf log item in the log. The data * map describes which 128 byte chunks of the buffer have been logged. * * The placement of blf_map_size causes blf_data_map to start at an odd * multiple of sizeof(unsigned int) offset within the struct. Because the data * bitmap size will always be an even number, the end of the data_map (and * therefore the structure) will also be at an odd multiple of sizeof(unsigned * int). Some 64-bit compilers will insert padding at the end of the struct to * ensure 64-bit alignment of blf_blkno, but 32-bit ones will not. Therefore, * XFS_BLF_DATAMAP_SIZE must be an odd number to make the padding explicit and * keep the structure size consistent between 32-bit and 64-bit platforms. */ #define __XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) #define XFS_BLF_DATAMAP_SIZE (__XFS_BLF_DATAMAP_SIZE + 1) typedef struct xfs_buf_log_format { unsigned short blf_type; /* buf log item type indicator */ unsigned short blf_size; /* size of this item */ unsigned short blf_flags; /* misc state */ unsigned short blf_len; /* number of blocks in this buf */ int64_t blf_blkno; /* starting blkno of this buf */ unsigned int blf_map_size; /* used size of data bitmap in words */ unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */ } xfs_buf_log_format_t; /* * All buffers now need to tell recovery where the magic number * is so that it can verify and calculate the CRCs on the buffer correctly * once the changes have been replayed into the buffer. * * The type value is held in the upper 5 bits of the blf_flags field, which is * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down. */ #define XFS_BLFT_BITS 5 #define XFS_BLFT_SHIFT 11 #define XFS_BLFT_MASK (((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT) enum xfs_blft { XFS_BLFT_UNKNOWN_BUF = 0, XFS_BLFT_UDQUOT_BUF, XFS_BLFT_PDQUOT_BUF, XFS_BLFT_GDQUOT_BUF, XFS_BLFT_BTREE_BUF, XFS_BLFT_AGF_BUF, XFS_BLFT_AGFL_BUF, XFS_BLFT_AGI_BUF, XFS_BLFT_DINO_BUF, XFS_BLFT_SYMLINK_BUF, XFS_BLFT_DIR_BLOCK_BUF, XFS_BLFT_DIR_DATA_BUF, XFS_BLFT_DIR_FREE_BUF, XFS_BLFT_DIR_LEAF1_BUF, XFS_BLFT_DIR_LEAFN_BUF, XFS_BLFT_DA_NODE_BUF, XFS_BLFT_ATTR_LEAF_BUF, XFS_BLFT_ATTR_RMT_BUF, XFS_BLFT_SB_BUF, XFS_BLFT_RTBITMAP_BUF, XFS_BLFT_RTSUMMARY_BUF, XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS), }; static inline void xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type) { ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF); blf->blf_flags &= ~XFS_BLFT_MASK; blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK); } static inline uint16_t xfs_blft_from_flags(struct xfs_buf_log_format *blf) { return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT; } /* * EFI/EFD log format definitions */ typedef struct xfs_extent { xfs_fsblock_t ext_start; xfs_extlen_t ext_len; } xfs_extent_t; /* * Since an xfs_extent_t has types (start:64, len: 32) * there are different alignments on 32 bit and 64 bit kernels. * So we provide the different variants for use by a * conversion routine. */ typedef struct xfs_extent_32 { uint64_t ext_start; uint32_t ext_len; } __attribute__((packed)) xfs_extent_32_t; typedef struct xfs_extent_64 { uint64_t ext_start; uint32_t ext_len; uint32_t ext_pad; } xfs_extent_64_t; /* * This is the structure used to lay out an efi log item in the * log. The efi_extents field is a variable size array whose * size is given by efi_nextents. */ typedef struct xfs_efi_log_format { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ xfs_extent_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_t; static inline size_t xfs_efi_log_format_sizeof( unsigned int nr) { return sizeof(struct xfs_efi_log_format) + nr * sizeof(struct xfs_extent); } typedef struct xfs_efi_log_format_32 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ xfs_extent_32_t efi_extents[]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; static inline size_t xfs_efi_log_format32_sizeof( unsigned int nr) { return sizeof(struct xfs_efi_log_format_32) + nr * sizeof(struct xfs_extent_32); } typedef struct xfs_efi_log_format_64 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ xfs_extent_64_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_64_t; static inline size_t xfs_efi_log_format64_sizeof( unsigned int nr) { return sizeof(struct xfs_efi_log_format_64) + nr * sizeof(struct xfs_extent_64); } /* * This is the structure used to lay out an efd log item in the * log. The efd_extents array is a variable size array whose * size is given by efd_nextents; */ typedef struct xfs_efd_log_format { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_t; static inline size_t xfs_efd_log_format_sizeof( unsigned int nr) { return sizeof(struct xfs_efd_log_format) + nr * sizeof(struct xfs_extent); } typedef struct xfs_efd_log_format_32 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_32_t efd_extents[]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; static inline size_t xfs_efd_log_format32_sizeof( unsigned int nr) { return sizeof(struct xfs_efd_log_format_32) + nr * sizeof(struct xfs_extent_32); } typedef struct xfs_efd_log_format_64 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ xfs_extent_64_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_64_t; static inline size_t xfs_efd_log_format64_sizeof( unsigned int nr) { return sizeof(struct xfs_efd_log_format_64) + nr * sizeof(struct xfs_extent_64); } /* * RUI/RUD (reverse mapping) log format definitions */ struct xfs_map_extent { uint64_t me_owner; uint64_t me_startblock; uint64_t me_startoff; uint32_t me_len; uint32_t me_flags; }; /* rmap me_flags: upper bits are flags, lower byte is type code */ #define XFS_RMAP_EXTENT_MAP 1 #define XFS_RMAP_EXTENT_MAP_SHARED 2 #define XFS_RMAP_EXTENT_UNMAP 3 #define XFS_RMAP_EXTENT_UNMAP_SHARED 4 #define XFS_RMAP_EXTENT_CONVERT 5 #define XFS_RMAP_EXTENT_CONVERT_SHARED 6 #define XFS_RMAP_EXTENT_ALLOC 7 #define XFS_RMAP_EXTENT_FREE 8 #define XFS_RMAP_EXTENT_TYPE_MASK 0xFF #define XFS_RMAP_EXTENT_ATTR_FORK (1U << 31) #define XFS_RMAP_EXTENT_BMBT_BLOCK (1U << 30) #define XFS_RMAP_EXTENT_UNWRITTEN (1U << 29) #define XFS_RMAP_EXTENT_FLAGS (XFS_RMAP_EXTENT_TYPE_MASK | \ XFS_RMAP_EXTENT_ATTR_FORK | \ XFS_RMAP_EXTENT_BMBT_BLOCK | \ XFS_RMAP_EXTENT_UNWRITTEN) /* * This is the structure used to lay out an rui log item in the * log. The rui_extents field is a variable size array whose * size is given by rui_nextents. */ struct xfs_rui_log_format { uint16_t rui_type; /* rui log item type */ uint16_t rui_size; /* size of this item */ uint32_t rui_nextents; /* # extents to free */ uint64_t rui_id; /* rui identifier */ struct xfs_map_extent rui_extents[]; /* array of extents to rmap */ }; static inline size_t xfs_rui_log_format_sizeof( unsigned int nr) { return sizeof(struct xfs_rui_log_format) + nr * sizeof(struct xfs_map_extent); } /* * This is the structure used to lay out an rud log item in the * log. The rud_extents array is a variable size array whose * size is given by rud_nextents; */ struct xfs_rud_log_format { uint16_t rud_type; /* rud log item type */ uint16_t rud_size; /* size of this item */ uint32_t __pad; uint64_t rud_rui_id; /* id of corresponding rui */ }; /* * CUI/CUD (refcount update) log format definitions */ struct xfs_phys_extent { uint64_t pe_startblock; uint32_t pe_len; uint32_t pe_flags; }; /* refcount pe_flags: upper bits are flags, lower byte is type code */ /* Type codes are taken directly from enum xfs_refcount_intent_type. */ #define XFS_REFCOUNT_EXTENT_TYPE_MASK 0xFF #define XFS_REFCOUNT_EXTENT_FLAGS (XFS_REFCOUNT_EXTENT_TYPE_MASK) /* * This is the structure used to lay out a cui log item in the * log. The cui_extents field is a variable size array whose * size is given by cui_nextents. */ struct xfs_cui_log_format { uint16_t cui_type; /* cui log item type */ uint16_t cui_size; /* size of this item */ uint32_t cui_nextents; /* # extents to free */ uint64_t cui_id; /* cui identifier */ struct xfs_phys_extent cui_extents[]; /* array of extents */ }; static inline size_t xfs_cui_log_format_sizeof( unsigned int nr) { return sizeof(struct xfs_cui_log_format) + nr * sizeof(struct xfs_phys_extent); } /* * This is the structure used to lay out a cud log item in the * log. The cud_extents array is a variable size array whose * size is given by cud_nextents; */ struct xfs_cud_log_format { uint16_t cud_type; /* cud log item type */ uint16_t cud_size; /* size of this item */ uint32_t __pad; uint64_t cud_cui_id; /* id of corresponding cui */ }; /* * BUI/BUD (inode block mapping) log format definitions */ /* bmbt me_flags: upper bits are flags, lower byte is type code */ /* Type codes are taken directly from enum xfs_bmap_intent_type. */ #define XFS_BMAP_EXTENT_TYPE_MASK 0xFF #define XFS_BMAP_EXTENT_ATTR_FORK (1U << 31) #define XFS_BMAP_EXTENT_UNWRITTEN (1U << 30) #define XFS_BMAP_EXTENT_REALTIME (1U << 29) #define XFS_BMAP_EXTENT_FLAGS (XFS_BMAP_EXTENT_TYPE_MASK | \ XFS_BMAP_EXTENT_ATTR_FORK | \ XFS_BMAP_EXTENT_UNWRITTEN | \ XFS_BMAP_EXTENT_REALTIME) /* * This is the structure used to lay out an bui log item in the * log. The bui_extents field is a variable size array whose * size is given by bui_nextents. */ struct xfs_bui_log_format { uint16_t bui_type; /* bui log item type */ uint16_t bui_size; /* size of this item */ uint32_t bui_nextents; /* # extents to free */ uint64_t bui_id; /* bui identifier */ struct xfs_map_extent bui_extents[]; /* array of extents to bmap */ }; static inline size_t xfs_bui_log_format_sizeof( unsigned int nr) { return sizeof(struct xfs_bui_log_format) + nr * sizeof(struct xfs_map_extent); } /* * This is the structure used to lay out an bud log item in the * log. The bud_extents array is a variable size array whose * size is given by bud_nextents; */ struct xfs_bud_log_format { uint16_t bud_type; /* bud log item type */ uint16_t bud_size; /* size of this item */ uint32_t __pad; uint64_t bud_bui_id; /* id of corresponding bui */ }; /* * XMI/XMD (file mapping exchange) log format definitions */ /* This is the structure used to lay out an mapping exchange log item. */ struct xfs_xmi_log_format { uint16_t xmi_type; /* xmi log item type */ uint16_t xmi_size; /* size of this item */ uint32_t __pad; /* must be zero */ uint64_t xmi_id; /* xmi identifier */ uint64_t xmi_inode1; /* inumber of first file */ uint64_t xmi_inode2; /* inumber of second file */ uint32_t xmi_igen1; /* generation of first file */ uint32_t xmi_igen2; /* generation of second file */ uint64_t xmi_startoff1; /* block offset into file1 */ uint64_t xmi_startoff2; /* block offset into file2 */ uint64_t xmi_blockcount; /* number of blocks */ uint64_t xmi_flags; /* XFS_EXCHMAPS_* */ uint64_t xmi_isize1; /* intended file1 size */ uint64_t xmi_isize2; /* intended file2 size */ }; /* Exchange mappings between extended attribute forks instead of data forks. */ #define XFS_EXCHMAPS_ATTR_FORK (1ULL << 0) /* Set the file sizes when finished. */ #define XFS_EXCHMAPS_SET_SIZES (1ULL << 1) /* * Exchange the mappings of the two files only if the file allocation units * mapped to file1's range have been written. */ #define XFS_EXCHMAPS_INO1_WRITTEN (1ULL << 2) /* Clear the reflink flag from inode1 after the operation. */ #define XFS_EXCHMAPS_CLEAR_INO1_REFLINK (1ULL << 3) /* Clear the reflink flag from inode2 after the operation. */ #define XFS_EXCHMAPS_CLEAR_INO2_REFLINK (1ULL << 4) #define XFS_EXCHMAPS_LOGGED_FLAGS (XFS_EXCHMAPS_ATTR_FORK | \ XFS_EXCHMAPS_SET_SIZES | \ XFS_EXCHMAPS_INO1_WRITTEN | \ XFS_EXCHMAPS_CLEAR_INO1_REFLINK | \ XFS_EXCHMAPS_CLEAR_INO2_REFLINK) /* This is the structure used to lay out an mapping exchange done log item. */ struct xfs_xmd_log_format { uint16_t xmd_type; /* xmd log item type */ uint16_t xmd_size; /* size of this item */ uint32_t __pad; uint64_t xmd_xmi_id; /* id of corresponding xmi */ }; /* * Dquot Log format definitions. * * The first two fields must be the type and size fitting into * 32 bits : log_recovery code assumes that. */ typedef struct xfs_dq_logformat { uint16_t qlf_type; /* dquot log item type */ uint16_t qlf_size; /* size of this item */ xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ int64_t qlf_blkno; /* blkno of dquot buffer */ int32_t qlf_len; /* len of dquot buffer */ uint32_t qlf_boffset; /* off of dquot in buffer */ } xfs_dq_logformat_t; /* * log format struct for QUOTAOFF records. * The first two fields must be the type and size fitting into * 32 bits : log_recovery code assumes that. * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer * to the first and ensures that the first logitem is taken out of the AIL * only when the last one is securely committed. */ typedef struct xfs_qoff_logformat { unsigned short qf_type; /* quotaoff log item type */ unsigned short qf_size; /* size of this item */ unsigned int qf_flags; /* USR and/or GRP */ char qf_pad[12]; /* padding for future */ } xfs_qoff_logformat_t; /* * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. */ #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ #define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ #define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */ #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ /* * Conversion to and from the combined OQUOTA flag (if necessary) * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk() */ #define XFS_GQUOTA_ENFD 0x0080 /* group quota limits enforced */ #define XFS_GQUOTA_CHKD 0x0100 /* quotacheck run on group quotas */ #define XFS_PQUOTA_ENFD 0x0200 /* project quota limits enforced */ #define XFS_PQUOTA_CHKD 0x0400 /* quotacheck run on project quotas */ #define XFS_ALL_QUOTA_ACCT \ (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) #define XFS_ALL_QUOTA_ENFD \ (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD) #define XFS_ALL_QUOTA_CHKD \ (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD) #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\ XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\ XFS_PQUOTA_CHKD) /* * Inode create log item structure * * Log recovery assumes the first two entries are the type and size and they fit * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so * decoding can be done correctly. */ struct xfs_icreate_log { uint16_t icl_type; /* type of log format structure */ uint16_t icl_size; /* size of log format structure */ __be32 icl_ag; /* ag being allocated in */ __be32 icl_agbno; /* start block of inode range */ __be32 icl_count; /* number of inodes to initialise */ __be32 icl_isize; /* size of inodes */ __be32 icl_length; /* length of extent to initialise */ __be32 icl_gen; /* inode generation number to use */ }; /* * Flags for deferred attribute operations. * Upper bits are flags, lower byte is type code */ #define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */ #define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */ #define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */ #define XFS_ATTRI_OP_FLAGS_PPTR_SET 4 /* Set parent pointer */ #define XFS_ATTRI_OP_FLAGS_PPTR_REMOVE 5 /* Remove parent pointer */ #define XFS_ATTRI_OP_FLAGS_PPTR_REPLACE 6 /* Replace parent pointer */ #define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */ /* * alfi_attr_filter captures the state of xfs_da_args.attr_filter, so it should * never have any other bits set. */ #define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \ XFS_ATTR_SECURE | \ XFS_ATTR_PARENT | \ XFS_ATTR_INCOMPLETE) /* * This is the structure used to lay out an attr log item in the * log. */ struct xfs_attri_log_format { uint16_t alfi_type; /* attri log item type */ uint16_t alfi_size; /* size of this item */ uint32_t alfi_igen; /* generation of alfi_ino for pptr ops */ uint64_t alfi_id; /* attri identifier */ uint64_t alfi_ino; /* the inode for this attr operation */ uint32_t alfi_op_flags; /* marks the op as a set or remove */ union { uint32_t alfi_name_len; /* attr name length */ struct { /* * For PPTR_REPLACE, these are the lengths of the old * and new attr names. The new and old values must * have the same length. */ uint16_t alfi_old_name_len; uint16_t alfi_new_name_len; }; }; uint32_t alfi_value_len; /* attr value length */ uint32_t alfi_attr_filter;/* attr filter flags */ }; struct xfs_attrd_log_format { uint16_t alfd_type; /* attrd log item type */ uint16_t alfd_size; /* size of this item */ uint32_t __pad; /* pad to 64 bit aligned */ uint64_t alfd_alf_id; /* id of corresponding attri */ }; #endif /* __XFS_LOG_FORMAT_H__ */ |
134 154 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 104 105 105 105 1 104 104 103 104 104 104 104 104 104 90 1 90 2 2 1 1 1 1 1 1 1 1 1 1 4 4 4 4 4 4 4 4 4 4 4 4 4 23 23 23 11 11 11 11 11 11 11 10 10 10 10 10 10 10 10 10 10 10 101 100 90 90 105 103 105 1 104 104 104 1 1 103 103 103 101 101 101 101 101 101 101 101 105 1 1 90 89 89 89 89 89 1 89 79 88 89 89 89 1 89 79 90 89 90 90 90 90 90 90 90 89 90 90 90 90 89 36 1 35 1 34 36 34 34 33 34 33 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 96 96 1 96 2 96 96 7 96 2 96 96 2 1 1 1 1 1 1 1 1 98 98 98 98 98 98 98 98 2 2 2 1 2 2 2 2 6 6 6 6 6 6 6 6 6 6 6 1 1 1 1 1 11 11 10 11 11 11 11 11 4 10 3 10 1 10 3 9 1 11 9 11 11 11 10 6 6 6 6 6 6 6 6 1 6 6 1 1 1 1 1 1 1 1 1 1 1 4 4 4 4 4 4 4 4 4 4 4 4 4 94 1 93 93 94 6 8 3 3 3 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 3 3 3 3 261 261 261 261 261 261 199 12 3 12 7 7 7 32 32 32 6 6 6 6 6 1 23 23 23 11 1 1 64 64 64 7 7 7 7 7 7 7 7 7 6 7 6 7 7 7 7 7 7 98 97 3 1 1 2 2 3 3 3 3 96 97 94 96 2 97 97 97 23 23 97 96 2 2 97 97 97 3 3 94 95 96 97 97 97 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2002 Intel Corp. * Copyright (c) 2002 Nokia Corp. * * This is part of the SCTP Linux Kernel Implementation. * * These are the state functions for the state machine. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Mathew Kotowsky <kotowsky@sctp.org> * Sridhar Samudrala <samudrala@us.ibm.com> * Jon Grimm <jgrimm@us.ibm.com> * Hui Huang <hui.huang@nokia.com> * Dajiang Zhang <dajiang.zhang@nokia.com> * Daisy Chang <daisyc@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * Ryan Layer <rmlayer@us.ibm.com> * Kevin Gao <kevin.gao@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/kernel.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/net.h> #include <linux/inet.h> #include <linux/slab.h> #include <net/sock.h> #include <net/proto_memory.h> #include <net/inet_ecn.h> #include <linux/skbuff.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/structs.h> #define CREATE_TRACE_POINTS #include <trace/events/sctp.h> static struct sctp_packet *sctp_abort_pkt_new( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, const void *payload, size_t paylen); static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_cmd_seq *commands); static struct sctp_packet *sctp_ootb_pkt_new( struct net *net, const struct sctp_association *asoc, const struct sctp_chunk *chunk); static void sctp_send_stale_cookie_err(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_cmd_seq *commands, struct sctp_chunk *err_chunk); static enum sctp_disposition sctp_sf_do_5_2_6_stale( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static enum sctp_disposition sctp_sf_shut_8_4_5( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static enum sctp_disposition sctp_sf_tabort_8_4_8( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static enum sctp_disposition sctp_sf_new_encap_port( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); static enum sctp_disposition sctp_stop_t1_and_abort( struct net *net, struct sctp_cmd_seq *commands, __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport); static enum sctp_disposition sctp_sf_abort_violation( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, void *arg, struct sctp_cmd_seq *commands, const __u8 *payload, const size_t paylen); static enum sctp_disposition sctp_sf_violation_chunklen( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static enum sctp_disposition sctp_sf_violation_paramlen( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, void *ext, struct sctp_cmd_seq *commands); static enum sctp_disposition sctp_sf_violation_ctsn( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static enum sctp_disposition sctp_sf_violation_chunk( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static enum sctp_ierror sctp_sf_authenticate( const struct sctp_association *asoc, struct sctp_chunk *chunk); static enum sctp_disposition __sctp_sf_do_9_1_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); static enum sctp_disposition __sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. * Return Values: true = Valid length * false = Invalid length * */ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length) { __u16 chunk_length = ntohs(chunk->chunk_hdr->length); /* Previously already marked? */ if (unlikely(chunk->pdiscard)) return false; if (unlikely(chunk_length < required_length)) return false; return true; } /* Check for format error in an ABORT chunk */ static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) { struct sctp_errhdr *err; sctp_walk_errors(err, chunk->chunk_hdr); return (void *)err == (void *)chunk->chunk_end; } /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ /* * Process the final SHUTDOWN COMPLETE. * * Section: 4 (C) (diagram), 9.2 * Upon reception of the SHUTDOWN COMPLETE chunk the endpoint will verify * that it is in SHUTDOWN-ACK-SENT state, if it is not the chunk should be * discarded. If the endpoint is in the SHUTDOWN-ACK-SENT state the endpoint * should stop the T2-shutdown timer and remove all knowledge of the * association (and thus the association enters the CLOSED state). * * Verification Tag: 8.5.1(C), sctpimpguide 2.41. * C) Rules for packet carrying SHUTDOWN COMPLETE: * ... * - The receiver of a SHUTDOWN COMPLETE shall accept the packet * if the Verification Tag field of the packet matches its own tag and * the T bit is not set * OR * it is set to its peer's tag and the T bit is set in the Chunk * Flags. * Otherwise, the receiver MUST silently discard the packet * and take no further action. An endpoint MUST ignore the * SHUTDOWN COMPLETE if it is not in the SHUTDOWN-ACK-SENT state. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_4_C(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* RFC 2960 6.10 Bundling * * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* RFC 2960 10.2 SCTP-to-ULP * * H) SHUTDOWN COMPLETE notification * * When SCTP completes the shutdown procedures (section 9.2) this * notification is passed to the upper layer. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, 0, 0, 0, NULL, GFP_ATOMIC); if (ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint * will verify that it is in SHUTDOWN-ACK-SENT state, if it is * not the chunk should be discarded. If the endpoint is in * the SHUTDOWN-ACK-SENT state the endpoint should stop the * T2-shutdown timer and remove all knowledge of the * association (and thus the association enters the CLOSED * state). */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; } /* * Respond to a normal INIT chunk. * We are the side that is being asked for an association. * * Section: 5.1 Normal Establishment of an Association, B * B) "Z" shall respond immediately with an INIT ACK chunk. The * destination IP address of the INIT ACK MUST be set to the source * IP address of the INIT to which this INIT ACK is responding. In * the response, besides filling in other parameters, "Z" must set the * Verification Tag field to Tag_A, and also provide its own * Verification Tag (Tag_Z) in the Initiate Tag field. * * Verification Tag: Must be 0. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg, *repl, *err_chunk; struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; struct sctp_packet *packet; int len; /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. * * IG Section 2.11.2 * Furthermore, we require that the receiver of an INIT chunk MUST * enforce these rules by silently discarding an arriving packet * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. * Normally, this would cause an ABORT with a Protocol Violation * error, but since we don't have an association, we'll * just discard the packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being * backloged to the socket at the same time as the user issues close(). * Since the socket and all its associations are going away, we * can treat this OOTB */ if (sctp_sstate(ep->base.sk, CLOSING)) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr)); sctp_chunk_free(err_chunk); if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); return SCTP_DISPOSITION_CONSUME; } else { return SCTP_DISPOSITION_NOMEM; } } else { return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Tag the variable length parameters. */ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC); if (!new_asoc) goto nomem; /* Update socket peer label if first association. */ if (security_sctp_assoc_request(new_asoc, chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (sctp_assoc_set_bind_addr_from_ep(new_asoc, sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0) goto nomem_init; /* The call, sctp_process_init(), can fail on memory allocation. */ if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), (struct sctp_init_chunk *)chunk->chunk_hdr, GFP_ATOMIC)) goto nomem_init; /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, * make sure to reserve enough room in the INIT ACK for them. */ len = 0; if (err_chunk) len = ntohs(err_chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) goto nomem_init; /* If there are errors need to be reported for unknown parameters, * include them in the outgoing INIT ACK as "Unrecognized parameter" * parameter. */ if (err_chunk) { /* Get the "Unrecognized parameter" parameter(s) out of the * ERROR chunk generated by sctp_verify_init(). Since the * error cause code for "unknown parameter" and the * "Unrecognized parameter" type is the same, we can * construct the parameters in INIT ACK by copying the * ERROR causes over. */ unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" * parameter type. */ sctp_addto_chunk(repl, len, unk_param); sctp_chunk_free(err_chunk); } sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* * Note: After sending out INIT ACK with the State Cookie parameter, * "Z" MUST NOT allocate any resources, nor keep any states for the * new association. Otherwise, "Z" will be vulnerable to resource * attacks. */ sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; nomem_init: sctp_association_free(new_asoc); nomem: if (err_chunk) sctp_chunk_free(err_chunk); return SCTP_DISPOSITION_NOMEM; } /* * Respond to a normal INIT ACK chunk. * We are the side that is initiating the association. * * Section: 5.1 Normal Establishment of an Association, C * C) Upon reception of the INIT ACK from "Z", "A" shall stop the T1-init * timer and leave COOKIE-WAIT state. "A" shall then send the State * Cookie received in the INIT ACK chunk in a COOKIE ECHO chunk, start * the T1-cookie timer, and enter the COOKIE-ECHOED state. * * Note: The COOKIE ECHO chunk can be bundled with any pending outbound * DATA chunks, but it MUST be the first chunk in the packet and * until the COOKIE ACK is returned the sender MUST NOT send any * other packets to the peer. * * Verification Tag: 3.3.3 * If the value of the Initiate Tag in a received INIT ACK chunk is * found to be 0, the receiver MUST treat it as an error and close the * association by transmitting an ABORT. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_init_chunk *initchunk; struct sctp_chunk *chunk = arg; struct sctp_chunk *err_chunk; struct sctp_packet *packet; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the INIT-ACK chunk has a valid length */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { enum sctp_error error = SCTP_ERROR_NO_RESOURCE; /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes. If there are no causes, * then there wasn't enough memory. Just terminate * the association. */ if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr)); sctp_chunk_free(err_chunk); if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); error = SCTP_ERROR_INV_PARAM; } } /* SCTP-AUTH, Section 6.3: * It should be noted that if the receiver wants to tear * down an association in an authenticated way only, the * handling of malformed packets should not result in * tearing down the association. * * This means that if we only want to abort associations * in an authenticated way (i.e AUTH+ABORT), then we * can't destroy this association just because the packet * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); } /* Tag the variable length parameters. Note that we never * convert the parameters in an INIT chunk. */ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); initchunk = (struct sctp_init_chunk *)chunk->chunk_hdr; sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT, SCTP_PEER_INIT(initchunk)); /* Reset init error count upon receipt of INIT-ACK. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); /* 5.1 C) "A" shall stop the T1-init timer and leave * COOKIE-WAIT state. "A" shall then ... start the T1-cookie * timer, and enter the COOKIE-ECHOED state. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_COOKIE_ECHOED)); /* SCTP-AUTH: generate the association shared keys so that * we can potentially sign the COOKIE-ECHO. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_SHKEY, SCTP_NULL()); /* 5.1 C) "A" shall then send the State Cookie received in the * INIT ACK chunk in a COOKIE ECHO chunk, ... */ /* If there is any errors to report, send the ERROR chunk generated * for unknown parameters as well. */ sctp_add_cmd_sf(commands, SCTP_CMD_GEN_COOKIE_ECHO, SCTP_CHUNK(err_chunk)); return SCTP_DISPOSITION_CONSUME; } static bool sctp_auth_chunk_verify(struct net *net, struct sctp_chunk *chunk, const struct sctp_association *asoc) { struct sctp_chunk auth; if (!chunk->auth_chunk) return true; /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo * is supposed to be authenticated and we have to do delayed * authentication. We've just recreated the association using * the information in the cookie and now it's much easier to * do the authentication. */ /* Make sure that we and the peer are AUTH capable */ if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return false; /* set-up our fake chunk so that we can process it */ auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; auth.sctp_hdr = chunk->sctp_hdr; auth.chunk_hdr = (struct sctp_chunkhdr *) skb_push(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); auth.transport = chunk->transport; return sctp_sf_authenticate(asoc, &auth) == SCTP_IERROR_NO_ERROR; } /* * Respond to a normal COOKIE ECHO chunk. * We are the side that is being asked for an association. * * Section: 5.1 Normal Establishment of an Association, D * D) Upon reception of the COOKIE ECHO chunk, Endpoint "Z" will reply * with a COOKIE ACK chunk after building a TCB and moving to * the ESTABLISHED state. A COOKIE ACK chunk may be bundled with * any pending DATA chunks (and/or SACK chunks), but the COOKIE ACK * chunk MUST be the first chunk in the packet. * * IMPLEMENTATION NOTE: An implementation may choose to send the * Communication Up notification to the SCTP user upon reception * of a valid COOKIE ECHO chunk. * * Verification Tag: 8.5.1 Exceptions in Verification Tag Rules * D) Rules for packet carrying a COOKIE ECHO * * - When sending a COOKIE ECHO, the endpoint MUST use the value of the * Initial Tag received in the INIT ACK. * * - The receiver of a COOKIE ECHO follows the procedures in Section 5. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL; struct sctp_association *new_asoc; struct sctp_init_chunk *peer_init; struct sctp_chunk *chunk = arg; struct sctp_chunk *err_chk_p; struct sctp_chunk *repl; struct sock *sk; int error = 0; if (asoc && !sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* Make sure that the COOKIE_ECHO chunk has a valid length. * In this case, we check that we have enough for at least a * chunk header. More detailed verification is done * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an * ABORT. */ sk = ep->base.sk; if (!sctp_sstate(sk, LISTENING) || (sctp_style(sk, TCP) && sk_acceptq_is_full(sk))) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* "Decode" the chunk. We have no optional parameters so we * are in good shape. */ chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data; if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr))) goto nomem; /* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint * "Z" will reply with a COOKIE ACK chunk after building a TCB * and moving to the ESTABLISHED state. */ new_asoc = sctp_unpack_cookie(ep, asoc, chunk, GFP_ATOMIC, &error, &err_chk_p); /* FIXME: * If the re-build failed, what is the proper error path * from here? * * [We should abort the association. --piggy] */ if (!new_asoc) { /* FIXME: Several errors are possible. A bad cookie should * be silently discarded, but think about logging it too. */ switch (error) { case -SCTP_IERROR_NOMEM: goto nomem; case -SCTP_IERROR_STALE_COOKIE: sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Delay state machine commands until later. * * Re-build the bind address for the association is done in * the sctp_unpack_cookie() already. */ /* This is a brand-new association, so these are not yet side * effects--it is safe to run them here. */ peer_init = (struct sctp_init_chunk *)(chunk->subh.cookie_hdr + 1); if (!sctp_process_init(new_asoc, chunk, &chunk->subh.cookie_hdr->c.peer_addr, peer_init, GFP_ATOMIC)) goto nomem_init; /* SCTP-AUTH: Now that we've populate required fields in * sctp_process_init, set up the association shared keys as * necessary so that we can potentially authenticate the ACK */ error = sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC); if (error) goto nomem_init; if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem_init; /* RFC 2960 5.1 Normal Establishment of an Association * * D) IMPLEMENTATION NOTE: An implementation may choose to * send the Communication Up notification to the SCTP user * upon reception of a valid COOKIE ECHO chunk. */ ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0, new_asoc->c.sinit_num_ostreams, new_asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (!ev) goto nomem_ev; /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaptation Layer Indication parameter , SCTP * delivers this notification to inform the application that of the * peers requested adaptation layer. */ if (new_asoc->peer.adaptation_ind) { ai_ev = sctp_ulpevent_make_adaptation_indication(new_asoc, GFP_ATOMIC); if (!ai_ev) goto nomem_aiev; } if (!new_asoc->peer.auth_capable) { auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC); if (!auth_ev) goto nomem_authev; } /* Add all the state machine commands now since we've created * everything. This way we don't introduce memory corruptions * during side-effect processing and correctly count established * associations. */ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (new_asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); /* This will send the COOKIE ACK */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* Queue the ASSOC_CHANGE event */ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Send up the Adaptation Layer Indication event */ if (ai_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); if (auth_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(auth_ev)); return SCTP_DISPOSITION_CONSUME; nomem_authev: sctp_ulpevent_free(ai_ev); nomem_aiev: sctp_ulpevent_free(ev); nomem_ev: sctp_chunk_free(repl); nomem_init: sctp_association_free(new_asoc); nomem: return SCTP_DISPOSITION_NOMEM; } /* * Respond to a normal COOKIE ACK chunk. * We are the side that is asking for an association. * * RFC 2960 5.1 Normal Establishment of an Association * * E) Upon reception of the COOKIE ACK, endpoint "A" will move from the * COOKIE-ECHOED state to the ESTABLISHED state, stopping the T1-cookie * timer. It may also notify its ULP about the successful * establishment of the association with a Communication Up * notification (see Section 10). * * Verification Tag: * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Set peer label for connection. */ if (security_sctp_assoc_established((struct sctp_association *)asoc, chunk->head_skb ?: chunk->skb)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Reset init error count upon receipt of COOKIE-ACK, * to avoid problems with the management of this * counter in stale cookie situations when a transition back * from the COOKIE-ECHOED state to the COOKIE-WAIT * state is performed. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); /* RFC 2960 5.1 Normal Establishment of an Association * * E) Upon reception of the COOKIE ACK, endpoint "A" will move * from the COOKIE-ECHOED state to the ESTABLISHED state, * stopping the T1-cookie timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); /* It may also notify its ULP about the successful * establishment of the association with a Communication Up * notification (see Section 10). */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0, asoc->c.sinit_num_ostreams, asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (!ev) goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaptation Layer Indication parameter , SCTP * delivers this notification to inform the application that of the * peers requested adaptation layer. */ if (asoc->peer.adaptation_ind) { ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); if (!ev) goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); } if (!asoc->peer.auth_capable) { ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC); if (!ev) goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); } return SCTP_DISPOSITION_CONSUME; nomem: return SCTP_DISPOSITION_NOMEM; } /* Generate and sendout a heartbeat packet. */ static enum sctp_disposition sctp_sf_heartbeat( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_transport *transport = (struct sctp_transport *) arg; struct sctp_chunk *reply; /* Send a heartbeat to our peer. */ reply = sctp_make_heartbeat(asoc, transport, 0); if (!reply) return SCTP_DISPOSITION_NOMEM; /* Set rto_pending indicating that an RTT measurement * is started with this heartbeat chunk. */ sctp_add_cmd_sf(commands, SCTP_CMD_RTO_PENDING, SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); return SCTP_DISPOSITION_CONSUME; } /* Generate a HEARTBEAT packet on the given transport. */ enum sctp_disposition sctp_sf_sendbeat_8_3(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_transport *transport = (struct sctp_transport *) arg; if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } /* Section 3.3.5. * The Sender-specific Heartbeat Info field should normally include * information about the sender's current time when this HEARTBEAT * chunk is sent and the destination transport address to which this * HEARTBEAT is sent (see Section 8.3). */ if (transport->param_flags & SPP_HB_ENABLE) { if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type, arg, commands)) return SCTP_DISPOSITION_NOMEM; /* Set transport error counter and association error counter * when sending heartbeat. */ sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(transport)); } sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, SCTP_TRANSPORT(transport)); return SCTP_DISPOSITION_CONSUME; } /* resend asoc strreset_chunk. */ enum sctp_disposition sctp_sf_send_reconf(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_transport *transport = arg; if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } sctp_chunk_hold(asoc->strreset_chunk); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asoc->strreset_chunk)); sctp_add_cmd_sf(commands, SCTP_CMD_STRIKE, SCTP_TRANSPORT(transport)); return SCTP_DISPOSITION_CONSUME; } /* send hb chunk with padding for PLPMUTD. */ enum sctp_disposition sctp_sf_send_probe(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_transport *transport = (struct sctp_transport *)arg; struct sctp_chunk *reply; if (!sctp_transport_pl_enabled(transport)) return SCTP_DISPOSITION_CONSUME; sctp_transport_pl_send(transport); reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); if (!reply) return SCTP_DISPOSITION_NOMEM; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, SCTP_TRANSPORT(transport)); return SCTP_DISPOSITION_CONSUME; } /* * Process an heartbeat request. * * Section: 8.3 Path Heartbeat * The receiver of the HEARTBEAT should immediately respond with a * HEARTBEAT ACK that contains the Heartbeat Information field copied * from the received HEARTBEAT chunk. * * Verification Tag: 8.5 Verification Tag [Normal verification] * When receiving an SCTP packet, the endpoint MUST ensure that the * value in the Verification Tag field of the received SCTP packet * matches its own Tag. If the received Verification Tag value does not * match the receiver's own tag value, the receiver shall silently * discard the packet and shall not process it any further except for * those cases listed in Section 8.5.1 below. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_beat_8_3(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_paramhdr *param_hdr; struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; size_t paylen = 0; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_heartbeat_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 8.3 The receiver of the HEARTBEAT should immediately * respond with a HEARTBEAT ACK that contains the Heartbeat * Information field copied from the received HEARTBEAT chunk. */ chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data; param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr; paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); if (ntohs(param_hdr->length) > paylen) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, param_hdr, commands); if (!pskb_pull(chunk->skb, paylen)) goto nomem; reply = sctp_make_heartbeat_ack(asoc, chunk, param_hdr, paylen); if (!reply) goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); return SCTP_DISPOSITION_CONSUME; nomem: return SCTP_DISPOSITION_NOMEM; } /* * Process the returning HEARTBEAT ACK. * * Section: 8.3 Path Heartbeat * Upon the receipt of the HEARTBEAT ACK, the sender of the HEARTBEAT * should clear the error counter of the destination transport * address to which the HEARTBEAT was sent, and mark the destination * transport address as active if it is not so marked. The endpoint may * optionally report to the upper layer when an inactive destination * address is marked as active due to the reception of the latest * HEARTBEAT ACK. The receiver of the HEARTBEAT ACK must also * clear the association overall error count as well (as defined * in section 8.1). * * The receiver of the HEARTBEAT ACK should also perform an RTT * measurement for that destination transport address using the time * value carried in the HEARTBEAT ACK chunk. * * Verification Tag: 8.5 Verification Tag [Normal verification] * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_sender_hb_info *hbinfo; struct sctp_chunk *chunk = arg; struct sctp_transport *link; unsigned long max_interval; union sctp_addr from_addr; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) + sizeof(*hbinfo))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data; /* Make sure that the length of the parameter is what we expect */ if (ntohs(hbinfo->param_hdr.length) != sizeof(*hbinfo)) return SCTP_DISPOSITION_DISCARD; from_addr = hbinfo->daddr; link = sctp_assoc_lookup_paddr(asoc, &from_addr); /* This should never happen, but lets log it if so. */ if (unlikely(!link)) { if (from_addr.sa.sa_family == AF_INET6) { net_warn_ratelimited("%s association %p could not find address %pI6\n", __func__, asoc, &from_addr.v6.sin6_addr); } else { net_warn_ratelimited("%s association %p could not find address %pI4\n", __func__, asoc, &from_addr.v4.sin_addr.s_addr); } return SCTP_DISPOSITION_DISCARD; } /* Validate the 64-bit random nonce. */ if (hbinfo->hb_nonce != link->hb_nonce) return SCTP_DISPOSITION_DISCARD; if (hbinfo->probe_size) { if (hbinfo->probe_size != link->pl.probe_size || !sctp_transport_pl_enabled(link)) return SCTP_DISPOSITION_DISCARD; if (sctp_transport_pl_recv(link)) return SCTP_DISPOSITION_CONSUME; return sctp_sf_send_probe(net, ep, asoc, type, link, commands); } max_interval = link->hbinterval + link->rto; /* Check if the timestamp looks valid. */ if (time_after(hbinfo->sent_at, jiffies) || time_after(jiffies, hbinfo->sent_at + max_interval)) { pr_debug("%s: HEARTBEAT ACK with invalid timestamp received " "for transport:%p\n", __func__, link); return SCTP_DISPOSITION_DISCARD; } /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of * the HEARTBEAT should clear the error counter of the * destination transport address to which the HEARTBEAT was * sent and mark the destination transport address as active if * it is not so marked. */ sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_ON, SCTP_TRANSPORT(link)); return SCTP_DISPOSITION_CONSUME; } /* Helper function to send out an abort for the restart * condition. */ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, struct sctp_chunk *init, struct sctp_cmd_seq *commands) { struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family); union sctp_addr_param *addrparm; struct sctp_errhdr *errhdr; char buffer[sizeof(*errhdr) + sizeof(*addrparm)]; struct sctp_endpoint *ep; struct sctp_packet *pkt; int len; /* Build the error on the stack. We are way to malloc crazy * throughout the code today. */ errhdr = (struct sctp_errhdr *)buffer; addrparm = (union sctp_addr_param *)(errhdr + 1); /* Copy into a parm format. */ len = af->to_addr_param(ssa, addrparm); len += sizeof(*errhdr); errhdr->cause = SCTP_ERROR_RESTART; errhdr->length = htons(len); /* Assign to the control socket. */ ep = sctp_sk(net->sctp.ctl_sock)->ep; /* Association is NULL since this may be a restart attack and we * want to send back the attacker's vtag. */ pkt = sctp_abort_pkt_new(net, ep, NULL, init, errhdr, len); if (!pkt) goto out; sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt)); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* Discard the rest of the inbound packet. */ sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); out: /* Even if there is no memory, treat as a failure so * the packet will get dropped. */ return 0; } static bool list_has_sctp_addr(const struct list_head *list, union sctp_addr *ipaddr) { struct sctp_transport *addr; list_for_each_entry(addr, list, transports) { if (sctp_cmp_addr_exact(ipaddr, &addr->ipaddr)) return true; } return false; } /* A restart is occurring, check to make sure no new addresses * are being added as we may be under a takeover attack. */ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, const struct sctp_association *asoc, struct sctp_chunk *init, struct sctp_cmd_seq *commands) { struct net *net = new_asoc->base.net; struct sctp_transport *new_addr; int ret = 1; /* Implementor's Guide - Section 5.2.2 * ... * Before responding the endpoint MUST check to see if the * unexpected INIT adds new addresses to the association. If new * addresses are added to the association, the endpoint MUST respond * with an ABORT.. */ /* Search through all current addresses and make sure * we aren't adding any new ones. */ list_for_each_entry(new_addr, &new_asoc->peer.transport_addr_list, transports) { if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, &new_addr->ipaddr)) { sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init, commands); ret = 0; break; } } /* Return success if all addresses were found. */ return ret; } /* Populate the verification/tie tags based on overlapping INIT * scenario. * * Note: Do not use in CLOSED or SHUTDOWN-ACK-SENT state. */ static void sctp_tietags_populate(struct sctp_association *new_asoc, const struct sctp_association *asoc) { switch (asoc->state) { /* 5.2.1 INIT received in COOKIE-WAIT or COOKIE-ECHOED State */ case SCTP_STATE_COOKIE_WAIT: new_asoc->c.my_vtag = asoc->c.my_vtag; new_asoc->c.my_ttag = asoc->c.my_vtag; new_asoc->c.peer_ttag = 0; break; case SCTP_STATE_COOKIE_ECHOED: new_asoc->c.my_vtag = asoc->c.my_vtag; new_asoc->c.my_ttag = asoc->c.my_vtag; new_asoc->c.peer_ttag = asoc->c.peer_vtag; break; /* 5.2.2 Unexpected INIT in States Other than CLOSED, COOKIE-ECHOED, * COOKIE-WAIT and SHUTDOWN-ACK-SENT */ default: new_asoc->c.my_ttag = asoc->c.my_vtag; new_asoc->c.peer_ttag = asoc->c.peer_vtag; break; } /* Other parameters for the endpoint SHOULD be copied from the * existing parameters of the association (e.g. number of * outbound streams) into the INIT ACK and cookie. */ new_asoc->rwnd = asoc->rwnd; new_asoc->c.sinit_num_ostreams = asoc->c.sinit_num_ostreams; new_asoc->c.sinit_max_instreams = asoc->c.sinit_max_instreams; new_asoc->c.initial_tsn = asoc->c.initial_tsn; } /* * Compare vtag/tietag values to determine unexpected COOKIE-ECHO * handling action. * * RFC 2960 5.2.4 Handle a COOKIE ECHO when a TCB exists. * * Returns value representing action to be taken. These action values * correspond to Action/Description values in RFC 2960, Table 2. */ static char sctp_tietags_compare(struct sctp_association *new_asoc, const struct sctp_association *asoc) { /* In this case, the peer may have restarted. */ if ((asoc->c.my_vtag != new_asoc->c.my_vtag) && (asoc->c.peer_vtag != new_asoc->c.peer_vtag) && (asoc->c.my_vtag == new_asoc->c.my_ttag) && (asoc->c.peer_vtag == new_asoc->c.peer_ttag)) return 'A'; /* Collision case B. */ if ((asoc->c.my_vtag == new_asoc->c.my_vtag) && ((asoc->c.peer_vtag != new_asoc->c.peer_vtag) || (0 == asoc->c.peer_vtag))) { return 'B'; } /* Collision case D. */ if ((asoc->c.my_vtag == new_asoc->c.my_vtag) && (asoc->c.peer_vtag == new_asoc->c.peer_vtag)) return 'D'; /* Collision case C. */ if ((asoc->c.my_vtag != new_asoc->c.my_vtag) && (asoc->c.peer_vtag == new_asoc->c.peer_vtag) && (0 == new_asoc->c.my_ttag) && (0 == new_asoc->c.peer_ttag)) return 'C'; /* No match to any of the special cases; discard this packet. */ return 'E'; } /* Common helper routine for both duplicate and simultaneous INIT * chunk handling. */ static enum sctp_disposition sctp_sf_do_unexpected_init( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg, *repl, *err_chunk; struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; enum sctp_disposition retval; struct sctp_packet *packet; int len; /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. * * IG Section 2.11.2 * Furthermore, we require that the receiver of an INIT chunk MUST * enforce these rules by silently discarding an arriving packet * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port) return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Tag the variable length parameters. */ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr)); if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); retval = SCTP_DISPOSITION_CONSUME; } else { retval = SCTP_DISPOSITION_NOMEM; } goto cleanup; } else { return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } /* * Other parameters for the endpoint SHOULD be copied from the * existing parameters of the association (e.g. number of * outbound streams) into the INIT ACK and cookie. * FIXME: We are copying parameters from the endpoint not the * association. */ new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC); if (!new_asoc) goto nomem; /* Update socket peer label if first association. */ if (security_sctp_assoc_request(new_asoc, chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (sctp_assoc_set_bind_addr_from_ep(new_asoc, sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0) goto nomem; /* In the outbound INIT ACK the endpoint MUST copy its current * Verification Tag and Peers Verification tag into a reserved * place (local tie-tag and per tie-tag) within the state cookie. */ if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), (struct sctp_init_chunk *)chunk->chunk_hdr, GFP_ATOMIC)) goto nomem; /* Make sure no new addresses are being added during the * restart. Do not do this check for COOKIE-WAIT state, * since there are no peer addresses to check against. * Upon return an ABORT will have been sent if needed. */ if (!sctp_state(asoc, COOKIE_WAIT)) { if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) { retval = SCTP_DISPOSITION_CONSUME; goto nomem_retval; } } sctp_tietags_populate(new_asoc, asoc); /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, * make sure to reserve enough room in the INIT ACK for them. */ len = 0; if (err_chunk) { len = ntohs(err_chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); } repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) goto nomem; /* If there are errors need to be reported for unknown parameters, * include them in the outgoing INIT ACK as "Unrecognized parameter" * parameter. */ if (err_chunk) { /* Get the "Unrecognized parameter" parameter(s) out of the * ERROR chunk generated by sctp_verify_init(). Since the * error cause code for "unknown parameter" and the * "Unrecognized parameter" type is the same, we can * construct the parameters in INIT ACK by copying the * ERROR causes over. */ unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" * parameter type. */ sctp_addto_chunk(repl, len, unk_param); } sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* * Note: After sending out INIT ACK with the State Cookie parameter, * "Z" MUST NOT allocate any resources for this new association. * Otherwise, "Z" will be vulnerable to resource attacks. */ sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); retval = SCTP_DISPOSITION_CONSUME; return retval; nomem: retval = SCTP_DISPOSITION_NOMEM; nomem_retval: if (new_asoc) sctp_association_free(new_asoc); cleanup: if (err_chunk) sctp_chunk_free(err_chunk); return retval; } /* * Handle simultaneous INIT. * This means we started an INIT and then we got an INIT request from * our peer. * * Section: 5.2.1 INIT received in COOKIE-WAIT or COOKIE-ECHOED State (Item B) * This usually indicates an initialization collision, i.e., each * endpoint is attempting, at about the same time, to establish an * association with the other endpoint. * * Upon receipt of an INIT in the COOKIE-WAIT or COOKIE-ECHOED state, an * endpoint MUST respond with an INIT ACK using the same parameters it * sent in its original INIT chunk (including its Verification Tag, * unchanged). These original parameters are combined with those from the * newly received INIT chunk. The endpoint shall also generate a State * Cookie with the INIT ACK. The endpoint uses the parameters sent in its * INIT to calculate the State Cookie. * * After that, the endpoint MUST NOT change its state, the T1-init * timer shall be left running and the corresponding TCB MUST NOT be * destroyed. The normal procedures for handling State Cookies when * a TCB exists will resolve the duplicate INITs to a single association. * * For an endpoint that is in the COOKIE-ECHOED state it MUST populate * its Tie-Tags with the Tag information of itself and its peer (see * section 5.2.2 for a description of the Tie-Tags). * * Verification Tag: Not explicit, but an INIT can not have a valid * verification tag, so we skip the check. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_5_2_1_siminit( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { /* Call helper to do the real work for both simultaneous and * duplicate INIT chunk handling. */ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } /* * Handle duplicated INIT messages. These are usually delayed * restransmissions. * * Section: 5.2.2 Unexpected INIT in States Other than CLOSED, * COOKIE-ECHOED and COOKIE-WAIT * * Unless otherwise stated, upon reception of an unexpected INIT for * this association, the endpoint shall generate an INIT ACK with a * State Cookie. In the outbound INIT ACK the endpoint MUST copy its * current Verification Tag and peer's Verification Tag into a reserved * place within the state cookie. We shall refer to these locations as * the Peer's-Tie-Tag and the Local-Tie-Tag. The outbound SCTP packet * containing this INIT ACK MUST carry a Verification Tag value equal to * the Initiation Tag found in the unexpected INIT. And the INIT ACK * MUST contain a new Initiation Tag (randomly generated see Section * 5.3.1). Other parameters for the endpoint SHOULD be copied from the * existing parameters of the association (e.g. number of outbound * streams) into the INIT ACK and cookie. * * After sending out the INIT ACK, the endpoint shall take no further * actions, i.e., the existing association, including its current state, * and the corresponding TCB MUST NOT be changed. * * Note: Only when a TCB exists and the association is not in a COOKIE- * WAIT state are the Tie-Tags populated. For a normal association INIT * (i.e. the endpoint is in a COOKIE-WAIT state), the Tie-Tags MUST be * set to 0 (indicating that no previous TCB existed). The INIT ACK and * State Cookie are populated as specified in section 5.2.1. * * Verification Tag: Not specified, but an INIT has no way of knowing * what the verification tag could be, so we ignore it. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_5_2_2_dupinit( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { /* Call helper to do the real work for both simultaneous and * duplicate INIT chunk handling. */ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } /* * Unexpected INIT-ACK handler. * * Section 5.2.3 * If an INIT ACK received by an endpoint in any state other than the * COOKIE-WAIT state, the endpoint should discard the INIT ACK chunk. * An unexpected INIT ACK usually indicates the processing of an old or * duplicated INIT chunk. */ enum sctp_disposition sctp_sf_do_5_2_3_initack( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { /* Per the above section, we'll discard the chunk if we have an * endpoint. If this is an OOTB INIT-ACK, treat it as such. */ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) return sctp_sf_ootb(net, ep, asoc, type, arg, commands); else return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); } static int sctp_sf_do_assoc_update(struct sctp_association *asoc, struct sctp_association *new, struct sctp_cmd_seq *cmds) { struct net *net = asoc->base.net; struct sctp_chunk *abort; if (!sctp_assoc_update(asoc, new)) return 0; abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr)); if (abort) { sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); } sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_RSRC_LOW)); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return -ENOMEM; } /* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A') * * Section 5.2.4 * A) In this case, the peer may have restarted. */ static enum sctp_disposition sctp_sf_do_dupcook_a( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { struct sctp_init_chunk *peer_init; enum sctp_disposition disposition; struct sctp_ulpevent *ev; struct sctp_chunk *repl; struct sctp_chunk *err; /* new_asoc is a brand-new association, so these are not yet * side effects--it is safe to run them here. */ peer_init = (struct sctp_init_chunk *)(chunk->subh.cookie_hdr + 1); if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), peer_init, GFP_ATOMIC)) goto nomem; if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC)) goto nomem; if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) return SCTP_DISPOSITION_DISCARD; /* Make sure no new addresses are being added during the * restart. Though this is a pretty complicated attack * since you'd have to get inside the cookie. */ if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) return SCTP_DISPOSITION_CONSUME; /* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes * the peer has restarted (Action A), it MUST NOT setup a new * association but instead resend the SHUTDOWN ACK and send an ERROR * chunk with a "Cookie Received while Shutting Down" error cause to * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc, SCTP_ST_CHUNK(chunk->chunk_hdr->type), chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) goto nomem; err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_COOKIE_IN_SHUTDOWN, NULL, 0, 0); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); return SCTP_DISPOSITION_CONSUME; } /* For now, stop pending T3-rtx and SACK timers, fail any unsent/unacked * data. Consider the optional choice of resending of this data. */ sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); /* Stop pending T4-rto timer, teardown ASCONF queue, ASCONF-ACK queue * and ASCONF-ACK cache. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL()); /* Update the content of current association. */ if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands)) goto nomem; repl = sctp_make_cookie_ack(asoc, chunk); if (!repl) goto nomem; /* Report association restart to upper layer. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, asoc->c.sinit_num_ostreams, asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (!ev) goto nomem_ev; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); if ((sctp_state(asoc, SHUTDOWN_PENDING) || sctp_state(asoc, SHUTDOWN_SENT)) && (sctp_sstate(asoc->base.sk, CLOSING) || sock_flag(asoc->base.sk, SOCK_DEAD))) { /* If the socket has been closed by user, don't * transition to ESTABLISHED. Instead trigger SHUTDOWN * bundled with COOKIE_ACK. */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, SCTP_ST_CHUNK(0), repl, commands); } else { sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); } return SCTP_DISPOSITION_CONSUME; nomem_ev: sctp_chunk_free(repl); nomem: return SCTP_DISPOSITION_NOMEM; } /* Unexpected COOKIE-ECHO handler for setup collision (Table 2, action 'B') * * Section 5.2.4 * B) In this case, both sides may be attempting to start an association * at about the same time but the peer endpoint started its INIT * after responding to the local endpoint's INIT */ /* This case represents an initialization collision. */ static enum sctp_disposition sctp_sf_do_dupcook_b( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { struct sctp_init_chunk *peer_init; struct sctp_chunk *repl; /* new_asoc is a brand-new association, so these are not yet * side effects--it is safe to run them here. */ peer_init = (struct sctp_init_chunk *)(chunk->subh.cookie_hdr + 1); if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), peer_init, GFP_ATOMIC)) goto nomem; if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC)) goto nomem; if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) return SCTP_DISPOSITION_DISCARD; sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); if (asoc->state < SCTP_STATE_ESTABLISHED) SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); /* Update the content of current association. */ if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands)) goto nomem; repl = sctp_make_cookie_ack(asoc, chunk); if (!repl) goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* RFC 2960 5.1 Normal Establishment of an Association * * D) IMPLEMENTATION NOTE: An implementation may choose to * send the Communication Up notification to the SCTP user * upon reception of a valid COOKIE ECHO chunk. * * Sadly, this needs to be implemented as a side-effect, because * we are not guaranteed to have set the association id of the real * association and so these notifications need to be delayed until * the association id is allocated. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_CHANGE, SCTP_U8(SCTP_COMM_UP)); /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaptation Layer Indication parameter , SCTP * delivers this notification to inform the application that of the * peers requested adaptation layer. * * This also needs to be done as a side effect for the same reason as * above. */ if (asoc->peer.adaptation_ind) sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL()); if (!asoc->peer.auth_capable) sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; nomem: return SCTP_DISPOSITION_NOMEM; } /* Unexpected COOKIE-ECHO handler for setup collision (Table 2, action 'C') * * Section 5.2.4 * C) In this case, the local endpoint's cookie has arrived late. * Before it arrived, the local endpoint sent an INIT and received an * INIT-ACK and finally sent a COOKIE ECHO with the peer's same tag * but a new tag of its own. */ /* This case represents an initialization collision. */ static enum sctp_disposition sctp_sf_do_dupcook_c( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { /* The cookie should be silently discarded. * The endpoint SHOULD NOT change states and should leave * any timers running. */ return SCTP_DISPOSITION_DISCARD; } /* Unexpected COOKIE-ECHO handler lost chunk (Table 2, action 'D') * * Section 5.2.4 * * D) When both local and remote tags match the endpoint should always * enter the ESTABLISHED state, if it has not already done so. */ /* This case represents an initialization collision. */ static enum sctp_disposition sctp_sf_do_dupcook_d( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL; struct sctp_chunk *repl; /* Clarification from Implementor's Guide: * D) When both local and remote tags match the endpoint should * enter the ESTABLISHED state, if it is in the COOKIE-ECHOED state. * It should stop any cookie timer that may be running and send * a COOKIE ACK. */ if (!sctp_auth_chunk_verify(net, chunk, asoc)) return SCTP_DISPOSITION_DISCARD; /* Don't accidentally move back into established state. */ if (asoc->state < SCTP_STATE_ESTABLISHED) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); /* RFC 2960 5.1 Normal Establishment of an Association * * D) IMPLEMENTATION NOTE: An implementation may choose * to send the Communication Up notification to the * SCTP user upon reception of a valid COOKIE * ECHO chunk. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0, asoc->c.sinit_num_ostreams, asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (!ev) goto nomem; /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaptation Layer Indication parameter, * SCTP delivers this notification to inform the application * that of the peers requested adaptation layer. */ if (asoc->peer.adaptation_ind) { ai_ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); if (!ai_ev) goto nomem; } if (!asoc->peer.auth_capable) { auth_ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC); if (!auth_ev) goto nomem; } } repl = sctp_make_cookie_ack(asoc, chunk); if (!repl) goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); if (ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); if (ai_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); if (auth_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(auth_ev)); return SCTP_DISPOSITION_CONSUME; nomem: if (auth_ev) sctp_ulpevent_free(auth_ev); if (ai_ev) sctp_ulpevent_free(ai_ev); if (ev) sctp_ulpevent_free(ev); return SCTP_DISPOSITION_NOMEM; } /* * Handle a duplicate COOKIE-ECHO. This usually means a cookie-carrying * chunk was retransmitted and then delayed in the network. * * Section: 5.2.4 Handle a COOKIE ECHO when a TCB exists * * Verification Tag: None. Do cookie validation. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_association *new_asoc; struct sctp_chunk *chunk = arg; enum sctp_disposition retval; struct sctp_chunk *err_chk_p; int error = 0; char action; /* Make sure that the chunk has a valid length from the protocol * perspective. In this case check to make sure we have at least * enough for the chunk header. Cookie length verification is * done later. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) { if (!sctp_vtag_verify(chunk, asoc)) asoc = NULL; return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); } /* "Decode" the chunk. We have no optional parameters so we * are in good shape. */ chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data; if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr))) goto nomem; /* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie * of a duplicate COOKIE ECHO match the Verification Tags of the * current association, consider the State Cookie valid even if * the lifespan is exceeded. */ new_asoc = sctp_unpack_cookie(ep, asoc, chunk, GFP_ATOMIC, &error, &err_chk_p); /* FIXME: * If the re-build failed, what is the proper error path * from here? * * [We should abort the association. --piggy] */ if (!new_asoc) { /* FIXME: Several errors are possible. A bad cookie should * be silently discarded, but think about logging it too. */ switch (error) { case -SCTP_IERROR_NOMEM: goto nomem; case -SCTP_IERROR_STALE_COOKIE: sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } /* Set temp so that it won't be added into hashtable */ new_asoc->temp = 1; /* Compare the tie_tag in cookie with the verification tag of * current association. */ action = sctp_tietags_compare(new_asoc, asoc); /* In cases C and E the association doesn't enter the ESTABLISHED * state, so there is no need to call security_sctp_assoc_request(). */ switch (action) { case 'A': /* Association restart. */ case 'B': /* Collision case B. */ case 'D': /* Collision case D. */ /* Update socket peer label if first association. */ if (security_sctp_assoc_request((struct sctp_association *)asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } break; } switch (action) { case 'A': /* Association restart. */ retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands, new_asoc); break; case 'B': /* Collision case B. */ retval = sctp_sf_do_dupcook_b(net, ep, asoc, chunk, commands, new_asoc); break; case 'C': /* Collision case C. */ retval = sctp_sf_do_dupcook_c(net, ep, asoc, chunk, commands, new_asoc); break; case 'D': /* Collision case D. */ retval = sctp_sf_do_dupcook_d(net, ep, asoc, chunk, commands, new_asoc); break; default: /* Discard packet for all others. */ retval = sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); break; } /* Delete the temporary new association. */ sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); /* Restore association pointer to provide SCTP command interpreter * with a valid context in case it needs to manipulate * the queues */ sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC((struct sctp_association *)asoc)); return retval; nomem: return SCTP_DISPOSITION_NOMEM; } /* * Process an ABORT. (SHUTDOWN-PENDING state) * * See sctp_sf_do_9_1_abort(). */ enum sctp_disposition sctp_sf_shutdown_pending_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it * because of the following text: * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving * destined to the IP address being deleted MUST be * ignored (see Section 5.3.1 for further details). */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* * Process an ABORT. (SHUTDOWN-SENT state) * * See sctp_sf_do_9_1_abort(). */ enum sctp_disposition sctp_sf_shutdown_sent_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it * because of the following text: * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving * destined to the IP address being deleted MUST be * ignored (see Section 5.3.1 for further details). */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); /* Stop the T5-shutdown guard timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* * Process an ABORT. (SHUTDOWN-ACK-SENT state) * * See sctp_sf_do_9_1_abort(). */ enum sctp_disposition sctp_sf_shutdown_ack_sent_abort( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. */ return sctp_sf_shutdown_sent_abort(net, ep, asoc, type, arg, commands); } /* * Handle an Error received in COOKIE_ECHOED state. * * Only handle the error type of stale COOKIE Error, the other errors will * be ignored. * * Inputs * (endpoint, asoc, chunk) * * Outputs * (asoc, reply_msg, msg_up, timers, counters) * * The return value is the disposition of the chunk. */ enum sctp_disposition sctp_sf_cookie_echoed_err( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_errhdr *err; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. * The parameter walking depends on this as well. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Process the error here */ /* FUTURE FIXME: When PR-SCTP related and other optional * parms are emitted, this will have to change to handle multiple * errors. */ sctp_walk_errors(err, chunk->chunk_hdr) { if (SCTP_ERROR_STALE_COOKIE == err->cause) return sctp_sf_do_5_2_6_stale(net, ep, asoc, type, arg, commands); } /* It is possible |