Total coverage: 221281 (12%)of 1897308
3 6 4 2 2 3 5 1 4 4 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPV6 GSO/GRO offload support * Linux INET6 implementation * * TCPv6 GSO/GRO support */ #include <linux/indirect_call_wrapper.h> #include <linux/skbuff.h> #include <net/inet6_hashtables.h> #include <net/gro.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/ip6_checksum.h> #include "ip6_offload.h" static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, struct tcphdr *th) { #if IS_ENABLED(CONFIG_IPV6) const struct ipv6hdr *hdr; struct sk_buff *p; struct sock *sk; struct net *net; int iif, sdif; if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) return; p = tcp_gro_lookup(head, th); if (p) { NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; return; } inet6_get_iif_sdif(skb, &iif, &sdif); hdr = skb_gro_network_header(skb); net = dev_net_rcu(skb->dev); sk = __inet6_lookup_established(net, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; if (sk) sock_gen_put(sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ } INDIRECT_CALLABLE_SCOPE struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) { struct tcphdr *th; /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && skb_gro_checksum_validate(skb, IPPROTO_TCP, ip6_gro_compute_pseudo)) goto flush; th = tcp_gro_pull_header(skb); if (!th) goto flush; tcp6_check_fraglist_gro(head, skb, th); return tcp_gro_receive(head, skb, th); flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; } INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct tcphdr *th = tcp_hdr(skb); if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6; skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; __skb_incr_checksum_unnecessary(skb); return 0; } th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, &iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; tcp_gro_complete(skb); return 0; } static void __tcpv6_gso_segment_csum(struct sk_buff *seg, struct in6_addr *oldip, const struct in6_addr *newip, __be16 *oldport, __be16 newport) { struct tcphdr *th = tcp_hdr(seg); if (!ipv6_addr_equal(oldip, newip)) { inet_proto_csum_replace16(&th->check, seg, oldip->s6_addr32, newip->s6_addr32, true); *oldip = *newip; } if (*oldport == newport) return; inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); *oldport = newport; } static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) { const struct tcphdr *th; const struct ipv6hdr *iph; struct sk_buff *seg; struct tcphdr *th2; struct ipv6hdr *iph2; seg = segs; th = tcp_hdr(seg); iph = ipv6_hdr(seg); th2 = tcp_hdr(seg->next); iph2 = ipv6_hdr(seg->next); if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && ipv6_addr_equal(&iph->saddr, &iph2->saddr) && ipv6_addr_equal(&iph->daddr, &iph2->daddr)) return segs; while ((seg = seg->next)) { th2 = tcp_hdr(seg); iph2 = ipv6_hdr(seg); __tcpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, &th2->source, th->source); __tcpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, &th2->dest, th->dest); } return segs; } static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb, netdev_features_t features) { skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); if (IS_ERR(skb)) return skb; return __tcpv6_gso_segment_list_csum(skb); } static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct tcphdr *th; if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) return ERR_PTR(-EINVAL); if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) { struct tcphdr *th = tcp_hdr(skb); if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size) return __tcp6_gso_segment_list(skb, features); skb->ip_summed = CHECKSUM_NONE; } if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); /* Set up pseudo header, usually expect stack to have done * this. */ th->check = 0; skb->ip_summed = CHECKSUM_PARTIAL; __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); } return tcp_gso_segment(skb, features); } int __init tcpv6_offload_init(void) { net_hotdata.tcpv6_offload = (struct net_offload) { .callbacks = { .gso_segment = tcp6_gso_segment, .gro_receive = tcp6_gro_receive, .gro_complete = tcp6_gro_complete, }, }; return inet6_add_offload(&net_hotdata.tcpv6_offload, IPPROTO_TCP); }
176 855 3 143 8741 57 4 46 6 1 290 2005 1169 130 1048 1192 9089 8898 105 8778 8966 547 370 237 239 108 329 6 25 26 128 128 128 52 52 19 186 1 188 169 184 1 1 1 1 879 880 1 842 52 1 1 1 186 127 39 1 6 1 12 21 133 113 21 114 207 210 173 173 5 5 13 13 2 2 9 8 38 13 17 9 5 5 33 445 303 5 121 129 129 127 16 119 129 127 35 35 6 2 2 106 7 35 68 2 3 5 7658 2 737 1733 1192 1 588 158 740 732 2 2 7662 5 7647 9227 7729 4 1720 3 1 704 37 1719 9229 907 7 8505 2 2 1 1 33 33 33 15 1 33 295 294 14 288 324 298 33 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/bvec.h> #include <linux/fault-inject-usercopy.h> #include <linux/uio.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/splice.h> #include <linux/compat.h> #include <linux/scatterlist.h> #include <linux/instrumented.h> #include <linux/iov_iter.h> static __always_inline size_t copy_to_user_iter(void __user *iter_to, size_t progress, size_t len, void *from, void *priv2) { if (should_fail_usercopy()) return len; if (access_ok(iter_to, len)) { from += progress; instrument_copy_to_user(iter_to, from, len); len = raw_copy_to_user(iter_to, from, len); } return len; } static __always_inline size_t copy_to_user_iter_nofault(void __user *iter_to, size_t progress, size_t len, void *from, void *priv2) { ssize_t res; if (should_fail_usercopy()) return len; from += progress; res = copy_to_user_nofault(iter_to, from, len); return res < 0 ? len : res; } static __always_inline size_t copy_from_user_iter(void __user *iter_from, size_t progress, size_t len, void *to, void *priv2) { size_t res = len; if (should_fail_usercopy()) return len; if (access_ok(iter_from, len)) { to += progress; instrument_copy_from_user_before(to, iter_from, len); res = raw_copy_from_user(to, iter_from, len); instrument_copy_from_user_after(to, iter_from, len, res); } return res; } static __always_inline size_t memcpy_to_iter(void *iter_to, size_t progress, size_t len, void *from, void *priv2) { memcpy(iter_to, from + progress, len); return 0; } static __always_inline size_t memcpy_from_iter(void *iter_from, size_t progress, size_t len, void *to, void *priv2) { memcpy(to + progress, iter_from, len); return 0; } /* * fault_in_iov_iter_readable - fault in iov iterator for reading * @i: iterator * @size: maximum length * * Fault in one or more iovecs of the given iov_iter, to a maximum length of * @size. For each iovec, fault in each page that constitutes the iovec. * * Returns the number of bytes not faulted in (like copy_to_user() and * copy_from_user()). * * Always returns 0 for non-userspace iterators. */ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) { if (iter_is_ubuf(i)) { size_t n = min(size, iov_iter_count(i)); n -= fault_in_readable(i->ubuf + i->iov_offset, n); return size - n; } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; size -= count; for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) { size_t len = min(count, p->iov_len - skip); size_t ret; if (unlikely(!len)) continue; ret = fault_in_readable(p->iov_base + skip, len); count -= len - ret; if (ret) break; } return count + size; } return 0; } EXPORT_SYMBOL(fault_in_iov_iter_readable); /* * fault_in_iov_iter_writeable - fault in iov iterator for writing * @i: iterator * @size: maximum length * * Faults in the iterator using get_user_pages(), i.e., without triggering * hardware page faults. This is primarily useful when we already know that * some or all of the pages in @i aren't in memory. * * Returns the number of bytes not faulted in, like copy_to_user() and * copy_from_user(). * * Always returns 0 for non-user-space iterators. */ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) { if (iter_is_ubuf(i)) { size_t n = min(size, iov_iter_count(i)); n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n); return size - n; } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; size -= count; for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) { size_t len = min(count, p->iov_len - skip); size_t ret; if (unlikely(!len)) continue; ret = fault_in_safe_writeable(p->iov_base + skip, len); count -= len - ret; if (ret) break; } return count + size; } return 0; } EXPORT_SYMBOL(fault_in_iov_iter_writeable); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count) { WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_IOVEC, .nofault = false, .data_source = direction, .__iov = iov, .nr_segs = nr_segs, .iov_offset = 0, .count = count }; } EXPORT_SYMBOL(iov_iter_init); size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(i->data_source)) return 0; if (user_backed_iter(i)) might_fault(); return iterate_and_advance(i, bytes, (void *)addr, copy_to_user_iter, memcpy_to_iter); } EXPORT_SYMBOL(_copy_to_iter); #ifdef CONFIG_ARCH_HAS_COPY_MC static __always_inline size_t copy_to_user_iter_mc(void __user *iter_to, size_t progress, size_t len, void *from, void *priv2) { if (access_ok(iter_to, len)) { from += progress; instrument_copy_to_user(iter_to, from, len); len = copy_mc_to_user(iter_to, from, len); } return len; } static __always_inline size_t memcpy_to_iter_mc(void *iter_to, size_t progress, size_t len, void *from, void *priv2) { return copy_mc_to_kernel(iter_to, from + progress, len); } /** * _copy_mc_to_iter - copy to iter with source memory error exception handling * @addr: source kernel address * @bytes: total transfer length * @i: destination iterator * * The pmem driver deploys this for the dax operation * (dax_copy_to_iter()) for dax reads (bypass page-cache and the * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes * successfully copied. * * The main differences between this and typical _copy_to_iter(). * * * Typical tail/residue handling after a fault retries the copy * byte-by-byte until the fault happens again. Re-triggering machine * checks is potentially fatal so the implementation uses source * alignment and poison alignment assumptions to avoid re-triggering * hardware exceptions. * * * ITER_KVEC and ITER_BVEC can return short copies. Compare to * copy_to_iter() where only ITER_IOVEC attempts might return a short copy. * * Return: number of bytes copied (may be %0) */ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(i->data_source)) return 0; if (user_backed_iter(i)) might_fault(); return iterate_and_advance(i, bytes, (void *)addr, copy_to_user_iter_mc, memcpy_to_iter_mc); } EXPORT_SYMBOL_GPL(_copy_mc_to_iter); #endif /* CONFIG_ARCH_HAS_COPY_MC */ static __always_inline size_t __copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { return iterate_and_advance(i, bytes, addr, copy_from_user_iter, memcpy_from_iter); } size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(!i->data_source)) return 0; if (user_backed_iter(i)) might_fault(); return __copy_from_iter(addr, bytes, i); } EXPORT_SYMBOL(_copy_from_iter); static __always_inline size_t copy_from_user_iter_nocache(void __user *iter_from, size_t progress, size_t len, void *to, void *priv2) { return __copy_from_user_inatomic_nocache(to + progress, iter_from, len); } size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(!i->data_source)) return 0; return iterate_and_advance(i, bytes, addr, copy_from_user_iter_nocache, memcpy_from_iter); } EXPORT_SYMBOL(_copy_from_iter_nocache); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE static __always_inline size_t copy_from_user_iter_flushcache(void __user *iter_from, size_t progress, size_t len, void *to, void *priv2) { return __copy_from_user_flushcache(to + progress, iter_from, len); } static __always_inline size_t memcpy_from_iter_flushcache(void *iter_from, size_t progress, size_t len, void *to, void *priv2) { memcpy_flushcache(to + progress, iter_from, len); return 0; } /** * _copy_from_iter_flushcache - write destination through cpu cache * @addr: destination kernel address * @bytes: total transfer length * @i: source iterator * * The pmem driver arranges for filesystem-dax to use this facility via * dax_copy_from_iter() for ensuring that writes to persistent memory * are flushed through the CPU cache. It is differentiated from * _copy_from_iter_nocache() in that guarantees all data is flushed for * all iterator types. The _copy_from_iter_nocache() only attempts to * bypass the cache for the ITER_IOVEC case, and on some archs may use * instructions that strand dirty-data in the cache. * * Return: number of bytes copied (may be %0) */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(!i->data_source)) return 0; return iterate_and_advance(i, bytes, addr, copy_from_user_iter_flushcache, memcpy_from_iter_flushcache); } EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); #endif static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) { struct page *head; size_t v = n + offset; /* * The general case needs to access the page order in order * to compute the page size. * However, we mostly deal with order-0 pages and thus can * avoid a possible cache line miss for requests that fit all * page orders. */ if (n <= v && v <= PAGE_SIZE) return true; head = compound_head(page); v += (page - head) << PAGE_SHIFT; if (WARN_ON(n > v || v > page_size(head))) return false; return true; } size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t res = 0; if (!page_copy_sane(page, offset, bytes)) return 0; if (WARN_ON_ONCE(i->data_source)) return 0; page += offset / PAGE_SIZE; // first subpage offset %= PAGE_SIZE; while (1) { void *kaddr = kmap_local_page(page); size_t n = min(bytes, (size_t)PAGE_SIZE - offset); n = _copy_to_iter(kaddr + offset, n, i); kunmap_local(kaddr); res += n; bytes -= n; if (!bytes || !n) break; offset += n; if (offset == PAGE_SIZE) { page++; offset = 0; } } return res; } EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i) { size_t res = 0; if (!page_copy_sane(page, offset, bytes)) return 0; if (WARN_ON_ONCE(i->data_source)) return 0; page += offset / PAGE_SIZE; // first subpage offset %= PAGE_SIZE; while (1) { void *kaddr = kmap_local_page(page); size_t n = min(bytes, (size_t)PAGE_SIZE - offset); n = iterate_and_advance(i, n, kaddr + offset, copy_to_user_iter_nofault, memcpy_to_iter); kunmap_local(kaddr); res += n; bytes -= n; if (!bytes || !n) break; offset += n; if (offset == PAGE_SIZE) { page++; offset = 0; } } return res; } EXPORT_SYMBOL(copy_page_to_iter_nofault); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t res = 0; if (!page_copy_sane(page, offset, bytes)) return 0; page += offset / PAGE_SIZE; // first subpage offset %= PAGE_SIZE; while (1) { void *kaddr = kmap_local_page(page); size_t n = min(bytes, (size_t)PAGE_SIZE - offset); n = _copy_from_iter(kaddr + offset, n, i); kunmap_local(kaddr); res += n; bytes -= n; if (!bytes || !n) break; offset += n; if (offset == PAGE_SIZE) { page++; offset = 0; } } return res; } EXPORT_SYMBOL(copy_page_from_iter); static __always_inline size_t zero_to_user_iter(void __user *iter_to, size_t progress, size_t len, void *priv, void *priv2) { return clear_user(iter_to, len); } static __always_inline size_t zero_to_iter(void *iter_to, size_t progress, size_t len, void *priv, void *priv2) { memset(iter_to, 0, len); return 0; } size_t iov_iter_zero(size_t bytes, struct iov_iter *i) { return iterate_and_advance(i, bytes, NULL, zero_to_user_iter, zero_to_iter); } EXPORT_SYMBOL(iov_iter_zero); size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, size_t bytes, struct iov_iter *i) { size_t n, copied = 0; if (!page_copy_sane(&folio->page, offset, bytes)) return 0; if (WARN_ON_ONCE(!i->data_source)) return 0; do { char *to = kmap_local_folio(folio, offset); n = bytes - copied; if (folio_test_partial_kmap(folio) && n > PAGE_SIZE - offset_in_page(offset)) n = PAGE_SIZE - offset_in_page(offset); pagefault_disable(); n = __copy_from_iter(to, n, i); pagefault_enable(); kunmap_local(to); copied += n; offset += n; } while (copied != bytes && n > 0); return copied; } EXPORT_SYMBOL(copy_folio_from_iter_atomic); static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) { const struct bio_vec *bvec, *end; if (!i->count) return; i->count -= size; size += i->iov_offset; for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) { if (likely(size < bvec->bv_len)) break; size -= bvec->bv_len; } i->iov_offset = size; i->nr_segs -= bvec - i->bvec; i->bvec = bvec; } static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) { const struct iovec *iov, *end; if (!i->count) return; i->count -= size; size += i->iov_offset; // from beginning of current segment for (iov = iter_iov(i), end = iov + i->nr_segs; iov < end; iov++) { if (likely(size < iov->iov_len)) break; size -= iov->iov_len; } i->iov_offset = size; i->nr_segs -= iov - iter_iov(i); i->__iov = iov; } static void iov_iter_folioq_advance(struct iov_iter *i, size_t size) { const struct folio_queue *folioq = i->folioq; unsigned int slot = i->folioq_slot; if (!i->count) return; i->count -= size; if (slot >= folioq_nr_slots(folioq)) { folioq = folioq->next; slot = 0; } size += i->iov_offset; /* From beginning of current segment. */ do { size_t fsize = folioq_folio_size(folioq, slot); if (likely(size < fsize)) break; size -= fsize; slot++; if (slot >= folioq_nr_slots(folioq) && folioq->next) { folioq = folioq->next; slot = 0; } } while (size); i->iov_offset = size; i->folioq_slot = slot; i->folioq = folioq; } void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(i->count < size)) size = i->count; if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) { i->iov_offset += size; i->count -= size; } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { /* iovec and kvec have identical layouts */ iov_iter_iovec_advance(i, size); } else if (iov_iter_is_bvec(i)) { iov_iter_bvec_advance(i, size); } else if (iov_iter_is_folioq(i)) { iov_iter_folioq_advance(i, size); } else if (iov_iter_is_discard(i)) { i->count -= size; } } EXPORT_SYMBOL(iov_iter_advance); static void iov_iter_folioq_revert(struct iov_iter *i, size_t unroll) { const struct folio_queue *folioq = i->folioq; unsigned int slot = i->folioq_slot; for (;;) { size_t fsize; if (slot == 0) { folioq = folioq->prev; slot = folioq_nr_slots(folioq); } slot--; fsize = folioq_folio_size(folioq, slot); if (unroll <= fsize) { i->iov_offset = fsize - unroll; break; } unroll -= fsize; } i->folioq_slot = slot; i->folioq = folioq; } void iov_iter_revert(struct iov_iter *i, size_t unroll) { if (!unroll) return; if (WARN_ON(unroll > MAX_RW_COUNT)) return; i->count += unroll; if (unlikely(iov_iter_is_discard(i))) return; if (unroll <= i->iov_offset) { i->iov_offset -= unroll; return; } unroll -= i->iov_offset; if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) { BUG(); /* We should never go beyond the start of the specified * range since we might then be straying into pages that * aren't pinned. */ } else if (iov_iter_is_bvec(i)) { const struct bio_vec *bvec = i->bvec; while (1) { size_t n = (--bvec)->bv_len; i->nr_segs++; if (unroll <= n) { i->bvec = bvec; i->iov_offset = n - unroll; return; } unroll -= n; } } else if (iov_iter_is_folioq(i)) { i->iov_offset = 0; iov_iter_folioq_revert(i, unroll); } else { /* same logics for iovec and kvec */ const struct iovec *iov = iter_iov(i); while (1) { size_t n = (--iov)->iov_len; i->nr_segs++; if (unroll <= n) { i->__iov = iov; i->iov_offset = n - unroll; return; } unroll -= n; } } } EXPORT_SYMBOL(iov_iter_revert); /* * Return the count of just the current iov_iter segment. */ size_t iov_iter_single_seg_count(const struct iov_iter *i) { if (i->nr_segs > 1) { if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return min(i->count, iter_iov(i)->iov_len - i->iov_offset); if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); } if (unlikely(iov_iter_is_folioq(i))) return !i->count ? 0 : umin(folioq_folio_size(i->folioq, i->folioq_slot), i->count); return i->count; } EXPORT_SYMBOL(iov_iter_single_seg_count); void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count) { WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter){ .iter_type = ITER_KVEC, .data_source = direction, .kvec = kvec, .nr_segs = nr_segs, .iov_offset = 0, .count = count }; } EXPORT_SYMBOL(iov_iter_kvec); void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count) { WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter){ .iter_type = ITER_BVEC, .data_source = direction, .bvec = bvec, .nr_segs = nr_segs, .iov_offset = 0, .count = count }; } EXPORT_SYMBOL(iov_iter_bvec); /** * iov_iter_folio_queue - Initialise an I/O iterator to use the folios in a folio queue * @i: The iterator to initialise. * @direction: The direction of the transfer. * @folioq: The starting point in the folio queue. * @first_slot: The first slot in the folio queue to use * @offset: The offset into the folio in the first slot to start at * @count: The size of the I/O buffer in bytes. * * Set up an I/O iterator to either draw data out of the pages attached to an * inode or to inject data into those pages. The pages *must* be prevented * from evaporation, either by taking a ref on them or locking them by the * caller. */ void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, const struct folio_queue *folioq, unsigned int first_slot, unsigned int offset, size_t count) { BUG_ON(direction & ~1); *i = (struct iov_iter) { .iter_type = ITER_FOLIOQ, .data_source = direction, .folioq = folioq, .folioq_slot = first_slot, .count = count, .iov_offset = offset, }; } EXPORT_SYMBOL(iov_iter_folio_queue); /** * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray * @i: The iterator to initialise. * @direction: The direction of the transfer. * @xarray: The xarray to access. * @start: The start file position. * @count: The size of the I/O buffer in bytes. * * Set up an I/O iterator to either draw data out of the pages attached to an * inode or to inject data into those pages. The pages *must* be prevented * from evaporation, either by taking a ref on them or locking them by the * caller. */ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count) { BUG_ON(direction & ~1); *i = (struct iov_iter) { .iter_type = ITER_XARRAY, .data_source = direction, .xarray = xarray, .xarray_start = start, .count = count, .iov_offset = 0 }; } EXPORT_SYMBOL(iov_iter_xarray); /** * iov_iter_discard - Initialise an I/O iterator that discards data * @i: The iterator to initialise. * @direction: The direction of the transfer. * @count: The size of the I/O buffer in bytes. * * Set up an I/O iterator that just discards everything that's written to it. * It's only available as a READ iterator. */ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) { BUG_ON(direction != READ); *i = (struct iov_iter){ .iter_type = ITER_DISCARD, .data_source = false, .count = count, .iov_offset = 0 }; } EXPORT_SYMBOL(iov_iter_discard); static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { const struct iovec *iov = iter_iov(i); size_t size = i->count; size_t skip = i->iov_offset; do { size_t len = iov->iov_len - skip; if (len > size) len = size; if (len & len_mask) return false; if ((unsigned long)(iov->iov_base + skip) & addr_mask) return false; iov++; size -= len; skip = 0; } while (size); return true; } static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { const struct bio_vec *bvec = i->bvec; unsigned skip = i->iov_offset; size_t size = i->count; do { size_t len = bvec->bv_len - skip; if (len > size) len = size; if (len & len_mask) return false; if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) return false; bvec++; size -= len; skip = 0; } while (size); return true; } /** * iov_iter_is_aligned() - Check if the addresses and lengths of each segments * are aligned to the parameters. * * @i: &struct iov_iter to restore * @addr_mask: bit mask to check against the iov element's addresses * @len_mask: bit mask to check against the iov element's lengths * * Return: false if any addresses or lengths intersect with the provided masks */ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { if (likely(iter_is_ubuf(i))) { if (i->count & len_mask) return false; if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask) return false; return true; } if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_iter_aligned_iovec(i, addr_mask, len_mask); if (iov_iter_is_bvec(i)) return iov_iter_aligned_bvec(i, addr_mask, len_mask); /* With both xarray and folioq types, we're dealing with whole folios. */ if (iov_iter_is_xarray(i)) { if (i->count & len_mask) return false; if ((i->xarray_start + i->iov_offset) & addr_mask) return false; } if (iov_iter_is_folioq(i)) { if (i->count & len_mask) return false; if (i->iov_offset & addr_mask) return false; } return true; } EXPORT_SYMBOL_GPL(iov_iter_is_aligned); static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) { const struct iovec *iov = iter_iov(i); unsigned long res = 0; size_t size = i->count; size_t skip = i->iov_offset; do { size_t len = iov->iov_len - skip; if (len) { res |= (unsigned long)iov->iov_base + skip; if (len > size) len = size; res |= len; size -= len; } iov++; skip = 0; } while (size); return res; } static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) { const struct bio_vec *bvec = i->bvec; unsigned res = 0; size_t size = i->count; unsigned skip = i->iov_offset; do { size_t len = bvec->bv_len - skip; res |= (unsigned long)bvec->bv_offset + skip; if (len > size) len = size; res |= len; bvec++; size -= len; skip = 0; } while (size); return res; } unsigned long iov_iter_alignment(const struct iov_iter *i) { if (likely(iter_is_ubuf(i))) { size_t size = i->count; if (size) return ((unsigned long)i->ubuf + i->iov_offset) | size; return 0; } /* iovec and kvec have identical layouts */ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_iter_alignment_iovec(i); if (iov_iter_is_bvec(i)) return iov_iter_alignment_bvec(i); /* With both xarray and folioq types, we're dealing with whole folios. */ if (iov_iter_is_folioq(i)) return i->iov_offset | i->count; if (iov_iter_is_xarray(i)) return (i->xarray_start + i->iov_offset) | i->count; return 0; } EXPORT_SYMBOL(iov_iter_alignment); unsigned long iov_iter_gap_alignment(const struct iov_iter *i) { unsigned long res = 0; unsigned long v = 0; size_t size = i->count; unsigned k; if (iter_is_ubuf(i)) return 0; if (WARN_ON(!iter_is_iovec(i))) return ~0U; for (k = 0; k < i->nr_segs; k++) { const struct iovec *iov = iter_iov(i) + k; if (iov->iov_len) { unsigned long base = (unsigned long)iov->iov_base; if (v) // if not the first one res |= base | v; // this start | previous end v = base + iov->iov_len; if (size <= iov->iov_len) break; size -= iov->iov_len; } } return res; } EXPORT_SYMBOL(iov_iter_gap_alignment); static int want_pages_array(struct page ***res, size_t size, size_t start, unsigned int maxpages) { unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE); if (count > maxpages) count = maxpages; WARN_ON(!count); // caller should've prevented that if (!*res) { *res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); if (!*res) return 0; } return count; } static ssize_t iter_folioq_get_pages(struct iov_iter *iter, struct page ***ppages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { const struct folio_queue *folioq = iter->folioq; struct page **pages; unsigned int slot = iter->folioq_slot; size_t extracted = 0, count = iter->count, iov_offset = iter->iov_offset; if (slot >= folioq_nr_slots(folioq)) { folioq = folioq->next; slot = 0; if (WARN_ON(iov_offset != 0)) return -EIO; } maxpages = want_pages_array(ppages, maxsize, iov_offset & ~PAGE_MASK, maxpages); if (!maxpages) return -ENOMEM; *_start_offset = iov_offset & ~PAGE_MASK; pages = *ppages; for (;;) { struct folio *folio = folioq_folio(folioq, slot); size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot); size_t part = PAGE_SIZE - offset % PAGE_SIZE; if (offset < fsize) { part = umin(part, umin(maxsize - extracted, fsize - offset)); count -= part; iov_offset += part; extracted += part; *pages = folio_page(folio, offset / PAGE_SIZE); get_page(*pages); pages++; maxpages--; } if (maxpages == 0 || extracted >= maxsize) break; if (iov_offset >= fsize) { iov_offset = 0; slot++; if (slot == folioq_nr_slots(folioq) && folioq->next) { folioq = folioq->next; slot = 0; } } } iter->count = count; iter->iov_offset = iov_offset; iter->folioq = folioq; iter->folioq_slot = slot; return extracted; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, pgoff_t index, unsigned int nr_pages) { XA_STATE(xas, xa, index); struct folio *folio; unsigned int ret = 0; rcu_read_lock(); for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { if (xas_retry(&xas, folio)) continue; /* Has the folio moved or been split? */ if (unlikely(folio != xas_reload(&xas))) { xas_reset(&xas); continue; } pages[ret] = folio_file_page(folio, xas.xa_index); folio_get(folio); if (++ret == nr_pages) break; } rcu_read_unlock(); return ret; } static ssize_t iter_xarray_get_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { unsigned nr, offset, count; pgoff_t index; loff_t pos; pos = i->xarray_start + i->iov_offset; index = pos >> PAGE_SHIFT; offset = pos & ~PAGE_MASK; *_start_offset = offset; count = want_pages_array(pages, maxsize, offset, maxpages); if (!count) return -ENOMEM; nr = iter_xarray_populate_pages(*pages, i->xarray, index, count); if (nr == 0) return 0; maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); i->iov_offset += maxsize; i->count -= maxsize; return maxsize; } /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) { size_t skip; long k; if (iter_is_ubuf(i)) return (unsigned long)i->ubuf + i->iov_offset; for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { const struct iovec *iov = iter_iov(i) + k; size_t len = iov->iov_len - skip; if (unlikely(!len)) continue; if (*size > len) *size = len; return (unsigned long)iov->iov_base + skip; } BUG(); // if it had been empty, we wouldn't get called } /* must be done on non-empty ITER_BVEC one */ static struct page *first_bvec_segment(const struct iov_iter *i, size_t *size, size_t *start) { struct page *page; size_t skip = i->iov_offset, len; len = i->bvec->bv_len - skip; if (*size > len) *size = len; skip += i->bvec->bv_offset; page = i->bvec->bv_page + skip / PAGE_SIZE; *start = skip % PAGE_SIZE; return page; } static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, size_t *start) { unsigned int n, gup_flags = 0; if (maxsize > i->count) maxsize = i->count; if (!maxsize) return 0; if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; if (likely(user_backed_iter(i))) { unsigned long addr; int res; if (iov_iter_rw(i) != WRITE) gup_flags |= FOLL_WRITE; if (i->nofault) gup_flags |= FOLL_NOFAULT; addr = first_iovec_segment(i, &maxsize); *start = addr % PAGE_SIZE; addr &= PAGE_MASK; n = want_pages_array(pages, maxsize, *start, maxpages); if (!n) return -ENOMEM; res = get_user_pages_fast(addr, n, gup_flags, *pages); if (unlikely(res <= 0)) return res; maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start); iov_iter_advance(i, maxsize); return maxsize; } if (iov_iter_is_bvec(i)) { struct page **p; struct page *page; page = first_bvec_segment(i, &maxsize, start); n = want_pages_array(pages, maxsize, *start, maxpages); if (!n) return -ENOMEM; p = *pages; for (int k = 0; k < n; k++) { struct folio *folio = page_folio(page + k); p[k] = page + k; if (!folio_test_slab(folio)) folio_get(folio); } maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); i->count -= maxsize; i->iov_offset += maxsize; if (i->iov_offset == i->bvec->bv_len) { i->iov_offset = 0; i->bvec++; i->nr_segs--; } return maxsize; } if (iov_iter_is_folioq(i)) return iter_folioq_get_pages(i, pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; } ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { if (!maxpages) return 0; BUG_ON(!pages); return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start); } EXPORT_SYMBOL(iov_iter_get_pages2); ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { ssize_t len; *pages = NULL; len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start); if (len <= 0) { kvfree(*pages); *pages = NULL; } return len; } EXPORT_SYMBOL(iov_iter_get_pages_alloc2); static int iov_npages(const struct iov_iter *i, int maxpages) { size_t skip = i->iov_offset, size = i->count; const struct iovec *p; int npages = 0; for (p = iter_iov(i); size; skip = 0, p++) { unsigned offs = offset_in_page(p->iov_base + skip); size_t len = min(p->iov_len - skip, size); if (len) { size -= len; npages += DIV_ROUND_UP(offs + len, PAGE_SIZE); if (unlikely(npages > maxpages)) return maxpages; } } return npages; } static int bvec_npages(const struct iov_iter *i, int maxpages) { size_t skip = i->iov_offset, size = i->count; const struct bio_vec *p; int npages = 0; for (p = i->bvec; size; skip = 0, p++) { unsigned offs = (p->bv_offset + skip) % PAGE_SIZE; size_t len = min(p->bv_len - skip, size); size -= len; npages += DIV_ROUND_UP(offs + len, PAGE_SIZE); if (unlikely(npages > maxpages)) return maxpages; } return npages; } int iov_iter_npages(const struct iov_iter *i, int maxpages) { if (unlikely(!i->count)) return 0; if (likely(iter_is_ubuf(i))) { unsigned offs = offset_in_page(i->ubuf + i->iov_offset); int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE); return min(npages, maxpages); } /* iovec and kvec have identical layouts */ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_npages(i, maxpages); if (iov_iter_is_bvec(i)) return bvec_npages(i, maxpages); if (iov_iter_is_folioq(i)) { unsigned offset = i->iov_offset % PAGE_SIZE; int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); return min(npages, maxpages); } if (iov_iter_is_xarray(i)) { unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE; int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); return min(npages, maxpages); } return 0; } EXPORT_SYMBOL(iov_iter_npages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) { *new = *old; if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, new->nr_segs * sizeof(struct bio_vec), flags); else if (iov_iter_is_kvec(new) || iter_is_iovec(new)) /* iovec and kvec have identical layout */ return new->__iov = kmemdup(new->__iov, new->nr_segs * sizeof(struct iovec), flags); return NULL; } EXPORT_SYMBOL(dup_iter); static __noclone int copy_compat_iovec_from_user(struct iovec *iov, const struct iovec __user *uvec, u32 nr_segs) { const struct compat_iovec __user *uiov = (const struct compat_iovec __user *)uvec; int ret = -EFAULT; u32 i; if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; for (i = 0; i < nr_segs; i++) { compat_uptr_t buf; compat_ssize_t len; unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); /* check for compat_size_t not fitting in compat_ssize_t .. */ if (len < 0) { ret = -EINVAL; goto uaccess_end; } iov[i].iov_base = compat_ptr(buf); iov[i].iov_len = len; } ret = 0; uaccess_end: user_access_end(); return ret; } static __noclone int copy_iovec_from_user(struct iovec *iov, const struct iovec __user *uiov, unsigned long nr_segs) { int ret = -EFAULT; if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; do { void __user *buf; ssize_t len; unsafe_get_user(len, &uiov->iov_len, uaccess_end); unsafe_get_user(buf, &uiov->iov_base, uaccess_end); /* check for size_t not fitting in ssize_t .. */ if (unlikely(len < 0)) { ret = -EINVAL; goto uaccess_end; } iov->iov_base = buf; iov->iov_len = len; uiov++; iov++; } while (--nr_segs); ret = 0; uaccess_end: user_access_end(); return ret; } struct iovec *iovec_from_user(const struct iovec __user *uvec, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_iov, bool compat) { struct iovec *iov = fast_iov; int ret; /* * SuS says "The readv() function *may* fail if the iovcnt argument was * less than or equal to 0, or greater than {IOV_MAX}. Linux has * traditionally returned zero for zero segments, so... */ if (nr_segs == 0) return iov; if (nr_segs > UIO_MAXIOV) return ERR_PTR(-EINVAL); if (nr_segs > fast_segs) { iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); if (!iov) return ERR_PTR(-ENOMEM); } if (unlikely(compat)) ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); else ret = copy_iovec_from_user(iov, uvec, nr_segs); if (ret) { if (iov != fast_iov) kfree(iov); return ERR_PTR(ret); } return iov; } /* * Single segment iovec supplied by the user, import it as ITER_UBUF. */ static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec, struct iovec **iovp, struct iov_iter *i, bool compat) { struct iovec *iov = *iovp; ssize_t ret; *iovp = NULL; if (compat) ret = copy_compat_iovec_from_user(iov, uvec, 1); else ret = copy_iovec_from_user(iov, uvec, 1); if (unlikely(ret)) return ret; ret = import_ubuf(type, iov->iov_base, iov->iov_len, i); if (unlikely(ret)) return ret; return i->count; } ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i, bool compat) { ssize_t total_len = 0; unsigned long seg; struct iovec *iov; if (nr_segs == 1) return __import_iovec_ubuf(type, uvec, iovp, i, compat); iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); if (IS_ERR(iov)) { *iovp = NULL; return PTR_ERR(iov); } /* * According to the Single Unix Specification we should return EINVAL if * an element length is < 0 when cast to ssize_t or if the total length * would overflow the ssize_t return value of the system call. * * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the * overflow case. */ for (seg = 0; seg < nr_segs; seg++) { ssize_t len = (ssize_t)iov[seg].iov_len; if (!access_ok(iov[seg].iov_base, len)) { if (iov != *iovp) kfree(iov); *iovp = NULL; return -EFAULT; } if (len > MAX_RW_COUNT - total_len) { len = MAX_RW_COUNT - total_len; iov[seg].iov_len = len; } total_len += len; } iov_iter_init(i, type, iov, nr_segs, total_len); if (iov == *iovp) *iovp = NULL; else *iovp = iov; return total_len; } /** * import_iovec() - Copy an array of &struct iovec from userspace * into the kernel, check that it is valid, and initialize a new * &struct iov_iter iterator to access it. * * @type: One of %READ or %WRITE. * @uvec: Pointer to the userspace array. * @nr_segs: Number of elements in userspace array. * @fast_segs: Number of elements in @iov. * @iovp: (input and output parameter) Pointer to pointer to (usually small * on-stack) kernel array. * @i: Pointer to iterator that will be initialized on success. * * If the array pointed to by *@iov is large enough to hold all @nr_segs, * then this function places %NULL in *@iov on return. Otherwise, a new * array will be allocated and the result placed in *@iov. This means that * the caller may call kfree() on *@iov regardless of whether the small * on-stack array was used or not (and regardless of whether this function * returns an error or not). * * Return: Negative error code on error, bytes imported on success */ ssize_t import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i) { return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, in_compat_syscall()); } EXPORT_SYMBOL(import_iovec); int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) { if (len > MAX_RW_COUNT) len = MAX_RW_COUNT; if (unlikely(!access_ok(buf, len))) return -EFAULT; iov_iter_ubuf(i, rw, buf, len); return 0; } EXPORT_SYMBOL_GPL(import_ubuf); /** * iov_iter_restore() - Restore a &struct iov_iter to the same state as when * iov_iter_save_state() was called. * * @i: &struct iov_iter to restore * @state: state to restore from * * Used after iov_iter_save_state() to bring restore @i, if operations may * have advanced it. * * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC */ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) { if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) && !iter_is_ubuf(i)) && !iov_iter_is_kvec(i)) return; i->iov_offset = state->iov_offset; i->count = state->count; if (iter_is_ubuf(i)) return; /* * For the *vec iters, nr_segs + iov is constant - if we increment * the vec, then we also decrement the nr_segs count. Hence we don't * need to track both of these, just one is enough and we can deduct * the other from that. ITER_KVEC and ITER_IOVEC are the same struct * size, so we can just increment the iov pointer as they are unionzed. * ITER_BVEC _may_ be the same size on some archs, but on others it is * not. Be safe and handle it separately. */ BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); if (iov_iter_is_bvec(i)) i->bvec -= state->nr_segs - i->nr_segs; else i->__iov -= state->nr_segs - i->nr_segs; i->nr_segs = state->nr_segs; } /* * Extract a list of contiguous pages from an ITER_FOLIOQ iterator. This does * not get references on the pages, nor does it get a pin on them. */ static ssize_t iov_iter_extract_folioq_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0) { const struct folio_queue *folioq = i->folioq; struct page **p; unsigned int nr = 0; size_t extracted = 0, offset, slot = i->folioq_slot; if (slot >= folioq_nr_slots(folioq)) { folioq = folioq->next; slot = 0; if (WARN_ON(i->iov_offset != 0)) return -EIO; } offset = i->iov_offset & ~PAGE_MASK; *offset0 = offset; maxpages = want_pages_array(pages, maxsize, offset, maxpages); if (!maxpages) return -ENOMEM; p = *pages; for (;;) { struct folio *folio = folioq_folio(folioq, slot); size_t offset = i->iov_offset, fsize = folioq_folio_size(folioq, slot); size_t part = PAGE_SIZE - offset % PAGE_SIZE; if (offset < fsize) { part = umin(part, umin(maxsize - extracted, fsize - offset)); i->count -= part; i->iov_offset += part; extracted += part; p[nr++] = folio_page(folio, offset / PAGE_SIZE); } if (nr >= maxpages || extracted >= maxsize) break; if (i->iov_offset >= fsize) { i->iov_offset = 0; slot++; if (slot == folioq_nr_slots(folioq) && folioq->next) { folioq = folioq->next; slot = 0; } } } i->folioq = folioq; i->folioq_slot = slot; return extracted; } /* * Extract a list of contiguous pages from an ITER_XARRAY iterator. This does not * get references on the pages, nor does it get a pin on them. */ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0) { struct page **p; struct folio *folio; unsigned int nr = 0, offset; loff_t pos = i->xarray_start + i->iov_offset; XA_STATE(xas, i->xarray, pos >> PAGE_SHIFT); offset = pos & ~PAGE_MASK; *offset0 = offset; maxpages = want_pages_array(pages, maxsize, offset, maxpages); if (!maxpages) return -ENOMEM; p = *pages; rcu_read_lock(); for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { if (xas_retry(&xas, folio)) continue; /* Has the folio moved or been split? */ if (unlikely(folio != xas_reload(&xas))) { xas_reset(&xas); continue; } p[nr++] = folio_file_page(folio, xas.xa_index); if (nr == maxpages) break; } rcu_read_unlock(); maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); iov_iter_advance(i, maxsize); return maxsize; } /* * Extract a list of virtually contiguous pages from an ITER_BVEC iterator. * This does not get references on the pages, nor does it get a pin on them. */ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0) { size_t skip = i->iov_offset, size = 0; struct bvec_iter bi; int k = 0; if (i->nr_segs == 0) return 0; if (i->iov_offset == i->bvec->bv_len) { i->iov_offset = 0; i->nr_segs--; i->bvec++; skip = 0; } bi.bi_idx = 0; bi.bi_size = maxsize; bi.bi_bvec_done = skip; maxpages = want_pages_array(pages, maxsize, skip, maxpages); while (bi.bi_size && bi.bi_idx < i->nr_segs) { struct bio_vec bv = bvec_iter_bvec(i->bvec, bi); /* * The iov_iter_extract_pages interface only allows an offset * into the first page. Break out of the loop if we see an * offset into subsequent pages, the caller will have to call * iov_iter_extract_pages again for the reminder. */ if (k) { if (bv.bv_offset) break; } else { *offset0 = bv.bv_offset; } (*pages)[k++] = bv.bv_page; size += bv.bv_len; if (k >= maxpages) break; /* * We are done when the end of the bvec doesn't align to a page * boundary as that would create a hole in the returned space. * The caller will handle this with another call to * iov_iter_extract_pages. */ if (bv.bv_offset + bv.bv_len != PAGE_SIZE) break; bvec_iter_advance_single(i->bvec, &bi, bv.bv_len); } iov_iter_advance(i, size); return size; } /* * Extract a list of virtually contiguous pages from an ITER_KVEC iterator. * This does not get references on the pages, nor does it get a pin on them. */ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0) { struct page **p, *page; const void *kaddr; size_t skip = i->iov_offset, offset, len, size; int k; for (;;) { if (i->nr_segs == 0) return 0; size = min(maxsize, i->kvec->iov_len - skip); if (size) break; i->iov_offset = 0; i->nr_segs--; i->kvec++; skip = 0; } kaddr = i->kvec->iov_base + skip; offset = (unsigned long)kaddr & ~PAGE_MASK; *offset0 = offset; maxpages = want_pages_array(pages, size, offset, maxpages); if (!maxpages) return -ENOMEM; p = *pages; kaddr -= offset; len = offset + size; for (k = 0; k < maxpages; k++) { size_t seg = min_t(size_t, len, PAGE_SIZE); if (is_vmalloc_or_module_addr(kaddr)) page = vmalloc_to_page(kaddr); else page = virt_to_page(kaddr); p[k] = page; len -= seg; kaddr += PAGE_SIZE; } size = min_t(size_t, size, maxpages * PAGE_SIZE - offset); iov_iter_advance(i, size); return size; } /* * Extract a list of contiguous pages from a user iterator and get a pin on * each of them. This should only be used if the iterator is user-backed * (IOBUF/UBUF). * * It does not get refs on the pages, but the pages must be unpinned by the * caller once the transfer is complete. * * This is safe to be used where background IO/DMA *is* going to be modifying * the buffer; using a pin rather than a ref makes forces fork() to give the * child a copy of the page. */ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0) { unsigned long addr; unsigned int gup_flags = 0; size_t offset; int res; if (i->data_source == ITER_DEST) gup_flags |= FOLL_WRITE; if (extraction_flags & ITER_ALLOW_P2PDMA) gup_flags |= FOLL_PCI_P2PDMA; if (i->nofault) gup_flags |= FOLL_NOFAULT; addr = first_iovec_segment(i, &maxsize); *offset0 = offset = addr % PAGE_SIZE; addr &= PAGE_MASK; maxpages = want_pages_array(pages, maxsize, offset, maxpages); if (!maxpages) return -ENOMEM; res = pin_user_pages_fast(addr, maxpages, gup_flags, *pages); if (unlikely(res <= 0)) return res; maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset); iov_iter_advance(i, maxsize); return maxsize; } /** * iov_iter_extract_pages - Extract a list of contiguous pages from an iterator * @i: The iterator to extract from * @pages: Where to return the list of pages * @maxsize: The maximum amount of iterator to extract * @maxpages: The maximum size of the list of pages * @extraction_flags: Flags to qualify request * @offset0: Where to return the starting offset into (*@pages)[0] * * Extract a list of contiguous pages from the current point of the iterator, * advancing the iterator. The maximum number of pages and the maximum amount * of page contents can be set. * * If *@pages is NULL, a page list will be allocated to the required size and * *@pages will be set to its base. If *@pages is not NULL, it will be assumed * that the caller allocated a page list at least @maxpages in size and this * will be filled in. * * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA * be allowed on the pages extracted. * * The iov_iter_extract_will_pin() function can be used to query how cleanup * should be performed. * * Extra refs or pins on the pages may be obtained as follows: * * (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF), pins will be * added to the pages, but refs will not be taken. * iov_iter_extract_will_pin() will return true. * * (*) If the iterator is ITER_KVEC, ITER_BVEC, ITER_FOLIOQ or ITER_XARRAY, the * pages are merely listed; no extra refs or pins are obtained. * iov_iter_extract_will_pin() will return 0. * * Note also: * * (*) Use with ITER_DISCARD is not supported as that has no content. * * On success, the function sets *@pages to the new pagelist, if allocated, and * sets *offset0 to the offset into the first page. * * It may also return -ENOMEM and -EFAULT. */ ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0) { maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT); if (!maxsize) return 0; if (likely(user_backed_iter(i))) return iov_iter_extract_user_pages(i, pages, maxsize, maxpages, extraction_flags, offset0); if (iov_iter_is_kvec(i)) return iov_iter_extract_kvec_pages(i, pages, maxsize, maxpages, extraction_flags, offset0); if (iov_iter_is_bvec(i)) return iov_iter_extract_bvec_pages(i, pages, maxsize, maxpages, extraction_flags, offset0); if (iov_iter_is_folioq(i)) return iov_iter_extract_folioq_pages(i, pages, maxsize, maxpages, extraction_flags, offset0); if (iov_iter_is_xarray(i)) return iov_iter_extract_xarray_pages(i, pages, maxsize, maxpages, extraction_flags, offset0); return -EFAULT; } EXPORT_SYMBOL_GPL(iov_iter_extract_pages);
6794 216 216 5 212 14 7 5674 213 5673 7443 501 5673 5832 5835 5840 63 64 64 7429 7 5868 7 5815 5856 7420 216 7429 7452 2557 4568 6016 5970 55 7183 6802 6823 6870 6829 1026 5672 5678 191 5673 4 5 5706 25 52 4039 2 10 10 4078 5673 5662 5696 5674 65 5669 30 30 369 16 55 51 5444 70 104 496 496 491 16 4 481 317 314 4 37 483 4 379 219 495 495 497 17 496 207 320 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 /* SPDX-License-Identifier: GPL-2.0 */ /* * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au> * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * * Code partially derived from nft_hash * Rewritten with rehash code from br_multicast plus single list * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H #include <linux/err.h> #include <linux/errno.h> #include <linux/jhash.h> #include <linux/list_nulls.h> #include <linux/workqueue.h> #include <linux/rculist.h> #include <linux/bit_spinlock.h> #include <linux/rhashtable-types.h> /* * Objects in an rhashtable have an embedded struct rhash_head * which is linked into as hash chain from the hash table - or one * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has * the least significant bit set but otherwise stores the address of * the hash bucket. This allows us to be sure we've found the end * of the right list. * The value stored in the hash bucket has BIT(0) used as a lock bit. * This bit must be atomically set before any changes are made to * the chain. To avoid dereferencing this pointer without clearing * the bit first, we use an opaque 'struct rhash_lock_head *' for the * pointer stored in the bucket. This struct needs to be defined so * that rcu_dereference() works on it, but it has no content so a * cast is needed for it to be useful. This ensures it isn't * used by mistake with clearing the lock bit first. */ struct rhash_lock_head {}; /* Maximum chain length before rehash * * The maximum (not average) chain length grows with the size of the hash * table, at a rate of (log N)/(log log N). * * The value of 16 is selected so that even if the hash table grew to * 2^32 you would not expect the maximum chain length to exceed it * unless we are under attack (or extremely unlucky). * * As this limit is only to detect attacks, we don't need to set it to a * lower value as you'd need the chain length to vastly exceed 16 to have * any real effect on the system. */ #define RHT_ELASTICITY 16u /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; unsigned int nest; u32 hash_rnd; struct list_head walkers; struct rcu_head rcu; struct bucket_table __rcu *future_tbl; struct lockdep_map dep_map; struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /* * NULLS_MARKER() expects a hash value with the low * bits mostly likely to be significant, and it discards * the msb. * We give it an address, in which the bottom bit is * always 0, and the msb might be significant. * So we shift the address down one bit to align with * expectations and avoid losing a significant bit. * * We never store the NULLS_MARKER in the hash table * itself as we need the lsb for locking. * Instead we store a NULL */ #define RHT_NULLS_MARKER(ptr) \ ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1)) #define INIT_RHT_NULLS_HEAD(ptr) \ ((ptr) = NULL) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { return ((unsigned long) ptr & 1); } static inline void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { return (char *)he - ht->p.head_offset; } static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, unsigned int hash) { return hash & (tbl->size - 1); } static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht, const void *key, const struct rhashtable_params params, unsigned int hash_rnd) { unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) hash = ht->p.hashfn(key, ht->key_len, hash_rnd); else if (params.key_len) { unsigned int key_len = params.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, hash_rnd); else if (key_len & (sizeof(u32) - 1)) hash = jhash(key, key_len, hash_rnd); else hash = jhash2(key, key_len / sizeof(u32), hash_rnd); } else { unsigned int key_len = ht->p.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, hash_rnd); else hash = jhash(key, key_len, hash_rnd); } return hash; } static __always_inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd); return rht_bucket_index(tbl, hash); } static __always_inline unsigned int rht_head_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he, const struct rhashtable_params params) { const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: ht->p.key_len, tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } /** * rht_grow_above_75 - returns true if nelems > 0.75 * table-size * @ht: hash table * @tbl: current table */ static inline bool rht_grow_above_75(const struct rhashtable *ht, const struct bucket_table *tbl) { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && (!ht->p.max_size || tbl->size < ht->p.max_size); } /** * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size * @ht: hash table * @tbl: current table */ static inline bool rht_shrink_below_30(const struct rhashtable *ht, const struct bucket_table *tbl) { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && tbl->size > ht->p.min_size; } /** * rht_grow_above_100 - returns true if nelems > table-size * @ht: hash table * @tbl: current table */ static inline bool rht_grow_above_100(const struct rhashtable *ht, const struct bucket_table *tbl) { return atomic_read(&ht->nelems) > tbl->size && (!ht->p.max_size || tbl->size < ht->p.max_size); } /** * rht_grow_above_max - returns true if table is above maximum * @ht: hash table * @tbl: current table */ static inline bool rht_grow_above_max(const struct rhashtable *ht, const struct bucket_table *tbl) { return atomic_read(&ht->nelems) >= ht->max_elems; } #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); #else static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht) { return 1; } static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { return 1; } #endif /* CONFIG_PROVE_LOCKING */ void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj); void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); static inline void rhashtable_walk_start(struct rhashtable_iter *iter) { (void)rhashtable_walk_start_check(iter); } void *rhashtable_walk_next(struct rhashtable_iter *iter); void *rhashtable_walk_peek(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg); void rhashtable_destroy(struct rhashtable *ht); struct rhash_lock_head __rcu **rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash); struct rhash_lock_head __rcu **__rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash); struct rhash_lock_head __rcu **rht_bucket_nested_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_rcu(p, ht) \ rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_bucket(p, tbl, hash) \ rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_dereference_bucket_rcu(p, tbl, hash) \ rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) static inline struct rhash_lock_head __rcu *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } static inline struct rhash_lock_head __rcu **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } static inline struct rhash_lock_head __rcu **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : &tbl->buckets[hash]; } /* * We lock a bucket by setting BIT(0) in the pointer - this is always * zero in real pointers. The NULLS mark is never stored in the bucket, * rather we store NULL if the bucket is empty. * bit_spin_locks do not handle contention well, but the whole point * of the hashtable design is to achieve minimum per-bucket contention. * A nested hash table might not have a bucket pointer. In that case * we cannot get a lock. For remove and replace the bucket cannot be * interesting and doesn't need locking. * For insert we allocate the bucket if this is the last bucket_table, * and then take the lock. * Sometimes we unlock a bucket by writing a new pointer there. In that * case we don't need to unlock, but we do need to reset state such as * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer() * provides the same release semantics that bit_spin_unlock() provides, * this is safe. * When we write to a bucket without unlocking, we use rht_assign_locked(). */ static inline unsigned long rht_lock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt) { unsigned long flags; local_irq_save(flags); bit_spin_lock(0, (unsigned long *)bkt); lock_map_acquire(&tbl->dep_map); return flags; } static inline unsigned long rht_lock_nested(struct bucket_table *tbl, struct rhash_lock_head __rcu **bucket, unsigned int subclass) { unsigned long flags; local_irq_save(flags); bit_spin_lock(0, (unsigned long *)bucket); lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_); return flags; } static inline void rht_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt, unsigned long flags) { lock_map_release(&tbl->dep_map); bit_spin_unlock(0, (unsigned long *)bkt); local_irq_restore(flags); } static inline struct rhash_head *__rht_ptr( struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt) { return (struct rhash_head *) ((unsigned long)p & ~BIT(0) ?: (unsigned long)RHT_NULLS_MARKER(bkt)); } /* * Where 'bkt' is a bucket and might be locked: * rht_ptr_rcu() dereferences that pointer and clears the lock bit. * rht_ptr() dereferences in a context where the bucket is locked. * rht_ptr_exclusive() dereferences in a context where exclusive * access is guaranteed, such as when destroying the table. */ static inline struct rhash_head *rht_ptr_rcu( struct rhash_lock_head __rcu *const *bkt) { return __rht_ptr(rcu_dereference(*bkt), bkt); } static inline struct rhash_head *rht_ptr( struct rhash_lock_head __rcu *const *bkt, struct bucket_table *tbl, unsigned int hash) { return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt); } static inline struct rhash_head *rht_ptr_exclusive( struct rhash_lock_head __rcu *const *bkt) { return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt); } static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, struct rhash_head *obj) { if (rht_is_a_nulls(obj)) obj = NULL; rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0))); } static inline void rht_assign_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt, struct rhash_head *obj, unsigned long flags) { if (rht_is_a_nulls(obj)) obj = NULL; lock_map_release(&tbl->dep_map); rcu_assign_pointer(*bkt, (void *)obj); preempt_enable(); __release(bitlock); local_irq_restore(flags); } /** * rht_for_each_from - iterate over hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ #define rht_for_each_from(pos, head, tbl, hash) \ for (pos = head; \ !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** * rht_for_each - iterate over hash chain * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ tbl, hash) /** * rht_for_each_entry_from - iterate over hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \ for (pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** * rht_for_each_entry - iterate over hash chain of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ rht_for_each_entry_from(tpos, pos, \ rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ tbl, hash, member) /** * rht_for_each_entry_safe - safely iterate over hash chain of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @next: the &struct rhash_head to use as next in loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = next, \ next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** * rht_for_each_rcu_from - iterate over rcu hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu_from(pos, head, tbl, hash) \ for (({barrier(); }), \ pos = head; \ !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) /** * rht_for_each_rcu - iterate over rcu hash chain * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ for (({barrier(); }), \ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) /** * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) /** * rht_for_each_entry_rcu - iterate over rcu hash chain of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive may safely run concurrently with * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ rht_for_each_entry_rcu_from(tpos, pos, \ rht_ptr_rcu(rht_bucket(tbl, hash)), \ tbl, hash, member) /** * rhl_for_each_rcu - iterate over rcu hash table list * @pos: the &struct rlist_head to use as a loop cursor. * @list: the head of the list * * This hash chain list-traversal primitive should be used on the * list returned by rhltable_lookup. */ #define rhl_for_each_rcu(pos, list) \ for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) /** * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct rlist_head to use as a loop cursor. * @list: the head of the list * @member: name of the &struct rlist_head within the hashable struct. * * This hash chain list-traversal primitive should be used on the * list returned by rhltable_lookup. */ #define rhl_for_each_entry_rcu(tpos, pos, list, member) \ for (pos = list; pos && rht_entry(tpos, pos, member); \ pos = rcu_dereference_raw(pos->next)) static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) { struct rhashtable *ht = arg->ht; const char *ptr = obj; return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } /* Internal function, do not use. */ static __always_inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; struct rhash_lock_head __rcu *const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); bkt = rht_bucket(tbl, hash); do { rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { if (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; return he; } /* An object might have been moved to a different hash chain, * while we walk along it - better check and retry. */ } while (he != RHT_NULLS_MARKER(bkt)); /* Ensure we see any new tables. */ smp_rmb(); tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; return NULL; } /** * rhashtable_lookup - search hash table * @ht: hash table * @key: the pointer to the key * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking * for an entry with an identical key. The first matching entry is returned. * * This must only be called under the RCU read lock. * * Returns the first entry on which the compare function returned true. */ static __always_inline void *rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { struct rhash_head *he = __rhashtable_lookup(ht, key, params); return he ? rht_obj(ht, he) : NULL; } /** * rhashtable_lookup_fast - search hash table, without RCU read lock * @ht: hash table * @key: the pointer to the key * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking * for an entry with an identical key. The first matching entry is returned. * * Only use this function when you have other mechanisms guaranteeing * that the object won't go away after the RCU read lock is released. * * Returns the first entry on which the compare function returned true. */ static __always_inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { void *obj; rcu_read_lock(); obj = rhashtable_lookup(ht, key, params); rcu_read_unlock(); return obj; } /** * rhltable_lookup - search hash list table * @hlt: hash table * @key: the pointer to the key * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking * for an entry with an identical key. All matching entries are returned * in a list. * * This must only be called under the RCU read lock. * * Returns the list of entries that match the given key. */ static __always_inline struct rhlist_head *rhltable_lookup( struct rhltable *hlt, const void *key, const struct rhashtable_params params) { struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); return he ? container_of(he, struct rhlist_head, rhead) : NULL; } /* Internal function, please use rhashtable_insert_fast() instead. This * function returns the existing element already in hashes if there is a clash, * otherwise it returns an error via ERR_PTR(). */ static __always_inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; unsigned long flags; unsigned int hash; int elasticity; void *data; rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); hash = rht_head_hashfn(ht, tbl, obj, params); elasticity = RHT_ELASTICITY; bkt = rht_bucket_insert(ht, tbl, hash); data = ERR_PTR(-ENOMEM); if (!bkt) goto out; pprev = NULL; flags = rht_lock(tbl, bkt); if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: rht_unlock(tbl, bkt, flags); rcu_read_unlock(); return rhashtable_insert_slow(ht, key, obj); } rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; elasticity--; if (!key || (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head)))) { pprev = &head->next; continue; } data = rht_obj(ht, head); if (!rhlist) goto out_unlock; list = container_of(obj, struct rhlist_head, rhead); plist = container_of(head, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); if (pprev) { rcu_assign_pointer(*pprev, obj); rht_unlock(tbl, bkt, flags); } else rht_assign_unlock(tbl, bkt, obj, flags); data = NULL; goto out; } if (elasticity <= 0) goto slow_path; data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) goto out_unlock; if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; /* Inserting at head of list makes unlocking free. */ head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { struct rhlist_head *list; list = container_of(obj, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, NULL); } atomic_inc(&ht->nelems); rht_assign_unlock(tbl, bkt, obj, flags); if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); data = NULL; out: rcu_read_unlock(); return data; out_unlock: rht_unlock(tbl, bkt, flags); goto out; } /** * rhashtable_insert_fast - insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless * they map to the same bucket. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static __always_inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { void *ret; ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } /** * rhltable_insert_key - insert object into hash list table * @hlt: hash list table * @key: the pointer to the key * @list: pointer to hash list head inside object * @params: hash table parameters * * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless * they map to the same bucket. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static __always_inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, const struct rhashtable_params params) { return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, params, true)); } /** * rhltable_insert - insert object into hash list table * @hlt: hash list table * @list: pointer to hash list head inside object * @params: hash table parameters * * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless * they map to the same bucket. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static __always_inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { const char *key = rht_obj(&hlt->ht, &list->rhead); key += params.key_offset; return rhltable_insert_key(hlt, key, list, params); } /** * rhashtable_lookup_insert_fast - lookup and insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * This lookup function may only be used for fixed key hash table (key_len * parameter set). It will BUG() if used inappropriately. * * It is safe to call this function from atomic context. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ static __always_inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); void *ret; BUG_ON(ht->p.obj_hashfn); ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } /** * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * Just like rhashtable_lookup_insert_fast(), but this function returns the * object if it exists, NULL if it did not and the insertion was successful, * and an ERR_PTR otherwise. */ static __always_inline void *rhashtable_lookup_get_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); BUG_ON(ht->p.obj_hashfn); return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, false); } /** * rhashtable_lookup_insert_key - search and insert object to hash table * with explicit key * @ht: hash table * @key: key * @obj: pointer to hash head inside object * @params: hash table parameters * * Lookups may occur in parallel with hashtable mutations and resizing. * * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. * * Returns zero on success. */ static __always_inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { void *ret; BUG_ON(!ht->p.obj_hashfn || !key); ret = __rhashtable_insert_fast(ht, key, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } /** * rhashtable_lookup_get_insert_key - lookup and insert object into hash table * @ht: hash table * @key: key * @obj: pointer to hash head inside object * @params: hash table parameters * * Just like rhashtable_lookup_insert_key(), but this function returns the * object if it exists, NULL if it does not and the insertion was successful, * and an ERR_PTR otherwise. */ static __always_inline void *rhashtable_lookup_get_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { BUG_ON(!ht->p.obj_hashfn || !key); return __rhashtable_insert_fast(ht, key, obj, params, false); } /* Internal function, please use rhashtable_remove_fast() instead */ static __always_inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned long flags; unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); bkt = rht_bucket_var(tbl, hash); if (!bkt) return -ENOENT; pprev = NULL; flags = rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); if (he != obj) { struct rhlist_head __rcu **lpprev; pprev = &he->next; if (!rhlist) continue; do { lpprev = &list->next; list = rht_dereference_bucket(list->next, tbl, hash); } while (list && obj != &list->rhead); if (!list) continue; list = rht_dereference_bucket(list->next, tbl, hash); RCU_INIT_POINTER(*lpprev, list); err = 0; break; } obj = rht_dereference_bucket(obj->next, tbl, hash); err = 1; if (rhlist) { list = rht_dereference_bucket(list->next, tbl, hash); if (list) { RCU_INIT_POINTER(list->rhead.next, obj); obj = &list->rhead; err = 0; } } if (pprev) { rcu_assign_pointer(*pprev, obj); rht_unlock(tbl, bkt, flags); } else { rht_assign_unlock(tbl, bkt, obj, flags); } goto unlocked; } rht_unlock(tbl, bkt, flags); unlocked: if (err > 0) { atomic_dec(&ht->nelems); if (unlikely(ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))) schedule_work(&ht->run_work); err = 0; } return err; } /* Internal function, please use rhashtable_remove_fast() instead */ static __always_inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { struct bucket_table *tbl; int err; rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); /* Because we have already taken (and released) the bucket * lock in old_tbl, if we find that future_tbl is not yet * visible then that guarantees the entry to still be in * the old tbl if it exists. */ while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, rhlist)) && (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) ; rcu_read_unlock(); return err; } /** * rhashtable_remove_fast - remove object from hash table * @ht: hash table * @obj: pointer to hash head inside object * @params: hash table parameters * * Since the hash chain is single linked, the removal operation needs to * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * * Will automatically shrink the table if permitted when residency drops * below 30%. * * Returns zero on success, -ENOENT if the entry could not be found. */ static __always_inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { return __rhashtable_remove_fast(ht, obj, params, false); } /** * rhltable_remove - remove object from hash list table * @hlt: hash list table * @list: pointer to hash list head inside object * @params: hash table parameters * * Since the hash chain is single linked, the removal operation needs to * walk the bucket chain upon removal. The removal operation is thus * considerably slower if the hash table is not correctly sized. * * Will automatically shrink the table if permitted when residency drops * below 30% * * Returns zero on success, -ENOENT if the entry could not be found. */ static __always_inline int rhltable_remove( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); } /* Internal function, please use rhashtable_replace_fast() instead */ static __always_inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned long flags; unsigned int hash; int err = -ENOENT; /* Minimally, the old and new objects must have same hash * (which should mean identifiers are the same). */ hash = rht_head_hashfn(ht, tbl, obj_old, params); if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) return -EINVAL; bkt = rht_bucket_var(tbl, hash); if (!bkt) return -ENOENT; pprev = NULL; flags = rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; } rcu_assign_pointer(obj_new->next, obj_old->next); if (pprev) { rcu_assign_pointer(*pprev, obj_new); rht_unlock(tbl, bkt, flags); } else { rht_assign_unlock(tbl, bkt, obj_new, flags); } err = 0; goto unlocked; } rht_unlock(tbl, bkt, flags); unlocked: return err; } /** * rhashtable_replace_fast - replace an object in hash table * @ht: hash table * @obj_old: pointer to hash head inside object being replaced * @obj_new: pointer to hash head inside object which is new * @params: hash table parameters * * Replacing an object doesn't affect the number of elements in the hash table * or bucket, so we don't need to worry about shrinking or expanding the * table here. * * Returns zero on success, -ENOENT if the entry could not be found, * -EINVAL if hash is not the same for the old and new objects. */ static __always_inline int rhashtable_replace_fast( struct rhashtable *ht, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { struct bucket_table *tbl; int err; rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); /* Because we have already taken (and released) the bucket * lock in old_tbl, if we find that future_tbl is not yet * visible then that guarantees the entry to still be in * the old tbl if it exists. */ while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, obj_new, params)) && (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) ; rcu_read_unlock(); return err; } /** * rhltable_walk_enter - Initialise an iterator * @hlt: Table to walk over * @iter: Hash table Iterator * * This function prepares a hash table walk. * * Note that if you restart a walk after rhashtable_walk_stop you * may see the same object twice. Also, you may miss objects if * there are removals in between rhashtable_walk_stop and the next * call to rhashtable_walk_start. * * For a completely stable walk you should construct your own data * structure outside the hash table. * * This function may be called from any process context, including * non-preemptable context, but cannot be called from softirq or * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ static inline void rhltable_walk_enter(struct rhltable *hlt, struct rhashtable_iter *iter) { rhashtable_walk_enter(&hlt->ht, iter); } /** * rhltable_free_and_destroy - free elements and destroy hash list table * @hlt: the hash list table to destroy * @free_fn: callback to release resources of element * @arg: pointer passed to free_fn * * See documentation for rhashtable_free_and_destroy. */ static inline void rhltable_free_and_destroy(struct rhltable *hlt, void (*free_fn)(void *ptr, void *arg), void *arg) { rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); } static inline void rhltable_destroy(struct rhltable *hlt) { rhltable_free_and_destroy(hlt, NULL, NULL); } #endif /* _LINUX_RHASHTABLE_H */
13 12 8 1 8 1 1 1 1 1 1 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/notifier.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/sysctl.h> #include <linux/export.h> #include <net/ip.h> #include <net/arp.h> /* * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. */ static HLIST_HEAD(ax25_uid_list); static DEFINE_RWLOCK(ax25_uid_lock); int ax25_uid_policy; EXPORT_SYMBOL(ax25_uid_policy); ax25_uid_assoc *ax25_findbyuid(kuid_t uid) { ax25_uid_assoc *ax25_uid, *res = NULL; read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (uid_eq(ax25_uid->uid, uid)) { ax25_uid_hold(ax25_uid); res = ax25_uid; break; } } read_unlock(&ax25_uid_lock); return res; } EXPORT_SYMBOL(ax25_findbyuid); int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) { ax25_uid_assoc *ax25_uid; ax25_uid_assoc *user; unsigned long res; switch (cmd) { case SIOCAX25GETUID: res = -ENOENT; read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { res = from_kuid_munged(current_user_ns(), ax25_uid->uid); break; } } read_unlock(&ax25_uid_lock); return res; case SIOCAX25ADDUID: { kuid_t sax25_kuid; if (!capable(CAP_NET_ADMIN)) return -EPERM; sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid); if (!uid_valid(sax25_kuid)) return -EINVAL; user = ax25_findbyuid(sax25_kuid); if (user) { ax25_uid_put(user); return -EEXIST; } if (sax->sax25_uid == 0) return -EINVAL; if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL) return -ENOMEM; refcount_set(&ax25_uid->refcount, 1); ax25_uid->uid = sax25_kuid; ax25_uid->call = sax->sax25_call; write_lock(&ax25_uid_lock); hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list); write_unlock(&ax25_uid_lock); return 0; } case SIOCAX25DELUID: if (!capable(CAP_NET_ADMIN)) return -EPERM; ax25_uid = NULL; write_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) break; } if (ax25_uid == NULL) { write_unlock(&ax25_uid_lock); return -ENOENT; } hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); write_unlock(&ax25_uid_lock); return 0; default: return -EINVAL; } return -EINVAL; /*NOTREACHED */ } #ifdef CONFIG_PROC_FS static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) __acquires(ax25_uid_lock) { read_lock(&ax25_uid_lock); return seq_hlist_start_head(&ax25_uid_list, *pos); } static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_hlist_next(v, &ax25_uid_list, pos); } static void ax25_uid_seq_stop(struct seq_file *seq, void *v) __releases(ax25_uid_lock) { read_unlock(&ax25_uid_lock); } static int ax25_uid_seq_show(struct seq_file *seq, void *v) { char buf[11]; if (v == SEQ_START_TOKEN) seq_printf(seq, "Policy: %d\n", ax25_uid_policy); else { struct ax25_uid_assoc *pt; pt = hlist_entry(v, struct ax25_uid_assoc, uid_node); seq_printf(seq, "%6d %s\n", from_kuid_munged(seq_user_ns(seq), pt->uid), ax2asc(buf, &pt->call)); } return 0; } const struct seq_operations ax25_uid_seqops = { .start = ax25_uid_seq_start, .next = ax25_uid_seq_next, .stop = ax25_uid_seq_stop, .show = ax25_uid_seq_show, }; #endif /* * Free all memory associated with UID/Callsign structures. */ void __exit ax25_uid_free(void) { ax25_uid_assoc *ax25_uid; write_lock(&ax25_uid_lock); again: ax25_uid_for_each(ax25_uid, &ax25_uid_list) { hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); goto again; } write_unlock(&ax25_uid_lock); }
1 1 7 1 5 4 1 3 1 3 3 3 3 1 3 1 4 5 5 5 5 1 1 1 6 2 2 179 179 170 12 5 4 4 3 2 1 1 2 154 59 28 12 17 17 579 575 154 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/init.h> #include <linux/module.h> #include <linux/netfilter.h> #include <net/flow_offload.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_offload.h> #include <net/pkt_cls.h> static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) { struct nft_flow_rule *flow; flow = kzalloc(sizeof(struct nft_flow_rule), GFP_KERNEL); if (!flow) return NULL; flow->rule = flow_rule_alloc(num_actions); if (!flow->rule) { kfree(flow); return NULL; } flow->rule->match.dissector = &flow->match.dissector; flow->rule->match.mask = &flow->match.mask; flow->rule->match.key = &flow->match.key; return flow; } void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type) { struct nft_flow_match *match = &flow->match; struct nft_flow_key *mask = &match->mask; struct nft_flow_key *key = &match->key; if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL)) return; key->control.addr_type = addr_type; mask->control.addr_type = 0xffff; match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL); match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] = offsetof(struct nft_flow_key, control); } struct nft_offload_ethertype { __be16 value; __be16 mask; }; static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow) { struct nft_flow_match *match = &flow->match; struct nft_offload_ethertype ethertype = { .value = match->key.basic.n_proto, .mask = match->mask.basic.n_proto, }; if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) && (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) || match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) { match->key.basic.n_proto = match->key.cvlan.vlan_tpid; match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid; match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid; match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid; match->key.vlan.vlan_tpid = ethertype.value; match->mask.vlan.vlan_tpid = ethertype.mask; match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] = offsetof(struct nft_flow_key, cvlan); match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN); } else if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) && (match->key.basic.n_proto == htons(ETH_P_8021Q) || match->key.basic.n_proto == htons(ETH_P_8021AD))) { match->key.basic.n_proto = match->key.vlan.vlan_tpid; match->mask.basic.n_proto = match->mask.vlan.vlan_tpid; match->key.vlan.vlan_tpid = ethertype.value; match->mask.vlan.vlan_tpid = ethertype.mask; match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = offsetof(struct nft_flow_key, vlan); match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_VLAN); } } struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule) { struct nft_offload_ctx *ctx; struct nft_flow_rule *flow; int num_actions = 0, err; struct nft_expr *expr; expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { if (expr->ops->offload_action && expr->ops->offload_action(expr)) num_actions++; expr = nft_expr_next(expr); } if (num_actions == 0) return ERR_PTR(-EOPNOTSUPP); flow = nft_flow_rule_alloc(num_actions); if (!flow) return ERR_PTR(-ENOMEM); expr = nft_expr_first(rule); ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; goto err_out; } ctx->net = net; ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; while (nft_expr_more(rule, expr)) { if (!expr->ops->offload) { err = -EOPNOTSUPP; goto err_out; } err = expr->ops->offload(ctx, flow, expr); if (err < 0) goto err_out; expr = nft_expr_next(expr); } nft_flow_rule_transfer_vlan(ctx, flow); flow->proto = ctx->dep.l3num; kfree(ctx); return flow; err_out: kfree(ctx); nft_flow_rule_destroy(flow); return ERR_PTR(err); } void nft_flow_rule_destroy(struct nft_flow_rule *flow) { struct flow_action_entry *entry; int i; flow_action_for_each(i, entry, &flow->rule->action) { switch (entry->id) { case FLOW_ACTION_REDIRECT: case FLOW_ACTION_MIRRED: dev_put(entry->dev); break; default: break; } } kfree(flow->rule); kfree(flow); } void nft_offload_set_dependency(struct nft_offload_ctx *ctx, enum nft_offload_dep_type type) { ctx->dep.type = type; } void nft_offload_update_dependency(struct nft_offload_ctx *ctx, const void *data, u32 len) { switch (ctx->dep.type) { case NFT_OFFLOAD_DEP_NETWORK: WARN_ON(len != sizeof(__u16)); memcpy(&ctx->dep.l3num, data, sizeof(__u16)); break; case NFT_OFFLOAD_DEP_TRANSPORT: WARN_ON(len != sizeof(__u8)); memcpy(&ctx->dep.protonum, data, sizeof(__u8)); break; default: break; } ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; } static void nft_flow_offload_common_init(struct flow_cls_common_offload *common, __be16 proto, int priority, struct netlink_ext_ack *extack) { common->protocol = proto; common->prio = priority; common->extack = extack; } static int nft_setup_cb_call(enum tc_setup_type type, void *type_data, struct list_head *cb_list) { struct flow_block_cb *block_cb; int err; list_for_each_entry(block_cb, cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err < 0) return err; } return 0; } static int nft_chain_offload_priority(const struct nft_base_chain *basechain) { if (basechain->ops.priority <= 0 || basechain->ops.priority > USHRT_MAX) return -1; return 0; } bool nft_chain_offload_support(const struct nft_base_chain *basechain) { struct nf_hook_ops *ops; struct net_device *dev; struct nft_hook *hook; if (nft_chain_offload_priority(basechain) < 0) return false; list_for_each_entry(hook, &basechain->hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (ops->pf != NFPROTO_NETDEV || ops->hooknum != NF_NETDEV_INGRESS) return false; dev = ops->dev; if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists()) return false; } } return true; } static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow, const struct nft_base_chain *basechain, const struct nft_rule *rule, const struct nft_flow_rule *flow, struct netlink_ext_ack *extack, enum flow_cls_command command) { __be16 proto = ETH_P_ALL; memset(cls_flow, 0, sizeof(*cls_flow)); if (flow) proto = flow->proto; nft_flow_offload_common_init(&cls_flow->common, proto, basechain->ops.priority, extack); cls_flow->command = command; cls_flow->cookie = (unsigned long) rule; if (flow) cls_flow->rule = flow->rule; } static int nft_flow_offload_cmd(const struct nft_chain *chain, const struct nft_rule *rule, struct nft_flow_rule *flow, enum flow_cls_command command, struct flow_cls_offload *cls_flow) { struct netlink_ext_ack extack = {}; struct nft_base_chain *basechain; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack, command); return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow, &basechain->flow_block.cb_list); } static int nft_flow_offload_rule(const struct nft_chain *chain, struct nft_rule *rule, struct nft_flow_rule *flow, enum flow_cls_command command) { struct flow_cls_offload cls_flow; return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow); } int nft_flow_rule_stats(const struct nft_chain *chain, const struct nft_rule *rule) { struct flow_cls_offload cls_flow = {}; struct nft_expr *expr, *next; int err; err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS, &cls_flow); if (err < 0) return err; nft_rule_for_each_expr(expr, next, rule) { if (expr->ops->offload_stats) expr->ops->offload_stats(expr, &cls_flow.stats); } return 0; } static int nft_flow_offload_bind(struct flow_block_offload *bo, struct nft_base_chain *basechain) { list_splice(&bo->cb_list, &basechain->flow_block.cb_list); return 0; } static int nft_flow_offload_unbind(struct flow_block_offload *bo, struct nft_base_chain *basechain) { struct flow_block_cb *block_cb, *next; struct flow_cls_offload cls_flow; struct netlink_ext_ack extack; struct nft_chain *chain; struct nft_rule *rule; chain = &basechain->chain; list_for_each_entry(rule, &chain->rules, list) { memset(&extack, 0, sizeof(extack)); nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL, &extack, FLOW_CLS_DESTROY); nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list); } list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { list_del(&block_cb->list); flow_block_cb_free(block_cb); } return 0; } static int nft_block_setup(struct nft_base_chain *basechain, struct flow_block_offload *bo, enum flow_block_command cmd) { int err; switch (cmd) { case FLOW_BLOCK_BIND: err = nft_flow_offload_bind(bo, basechain); break; case FLOW_BLOCK_UNBIND: err = nft_flow_offload_unbind(bo, basechain); break; default: WARN_ON_ONCE(1); err = -EOPNOTSUPP; } return err; } static void nft_flow_block_offload_init(struct flow_block_offload *bo, struct net *net, enum flow_block_command cmd, struct nft_base_chain *basechain, struct netlink_ext_ack *extack) { memset(bo, 0, sizeof(*bo)); bo->net = net; bo->block = &basechain->flow_block; bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; bo->cb_list_head = &basechain->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } static int nft_block_offload_cmd(struct nft_base_chain *chain, struct net_device *dev, enum flow_block_command cmd) { struct netlink_ext_ack extack = {}; struct flow_block_offload bo; int err; nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); if (err < 0) return err; return nft_block_setup(chain, &bo, cmd); } static void nft_indr_block_cleanup(struct flow_block_cb *block_cb) { struct nft_base_chain *basechain = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; struct netlink_ext_ack extack = {}; struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct flow_block_offload bo; nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND, basechain, &extack); nft_net = nft_pernet(net); mutex_lock(&nft_net->commit_mutex); list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); nft_flow_offload_unbind(&bo, basechain); mutex_unlock(&nft_net->commit_mutex); } static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain, struct net_device *dev, enum flow_block_command cmd) { struct netlink_ext_ack extack = {}; struct flow_block_offload bo; int err; nft_flow_block_offload_init(&bo, dev_net(dev), cmd, basechain, &extack); err = flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_BLOCK, basechain, &bo, nft_indr_block_cleanup); if (err < 0) return err; if (list_empty(&bo.cb_list)) return -EOPNOTSUPP; return nft_block_setup(basechain, &bo, cmd); } static int nft_chain_offload_cmd(struct nft_base_chain *basechain, struct net_device *dev, enum flow_block_command cmd) { int err; if (dev->netdev_ops->ndo_setup_tc) err = nft_block_offload_cmd(basechain, dev, cmd); else err = nft_indr_block_offload_cmd(basechain, dev, cmd); return err; } static int nft_flow_block_chain(struct nft_base_chain *basechain, const struct net_device *this_dev, enum flow_block_command cmd) { struct nf_hook_ops *ops; struct nft_hook *hook; int err, i = 0; list_for_each_entry(hook, &basechain->hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (this_dev && this_dev != ops->dev) continue; err = nft_chain_offload_cmd(basechain, ops->dev, cmd); if (err < 0 && cmd == FLOW_BLOCK_BIND) { if (!this_dev) goto err_flow_block; return err; } i++; } } return 0; err_flow_block: list_for_each_entry(hook, &basechain->hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) { if (i-- <= 0) break; nft_chain_offload_cmd(basechain, ops->dev, FLOW_BLOCK_UNBIND); } } return err; } static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy, enum flow_block_command cmd) { struct nft_base_chain *basechain; u8 policy; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); policy = ppolicy ? *ppolicy : basechain->policy; /* Only default policy to accept is supported for now. */ if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP) return -EOPNOTSUPP; return nft_flow_block_chain(basechain, NULL, cmd); } static void nft_flow_rule_offload_abort(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); int err = 0; list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_UNBIND); break; case NFT_MSG_DELCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; case NFT_MSG_DELRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; } if (WARN_ON_ONCE(err)) break; } } int nft_flow_rule_offload_commit(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans; int err = 0; u8 policy; list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; policy = nft_trans_chain_policy(trans); err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; if (trans->flags & NLM_F_REPLACE || !(trans->flags & NLM_F_APPEND)) { err = -EOPNOTSUPP; break; } err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; case NFT_MSG_DELRULE: if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; } if (err) { nft_flow_rule_offload_abort(net, trans); break; } } return err; } static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net, struct net_device *dev) { struct nft_base_chain *basechain; struct nft_hook *hook, *found; const struct nft_table *table; struct nft_chain *chain; list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain) || !(chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; found = NULL; basechain = nft_base_chain(chain); list_for_each_entry(hook, &basechain->hook_list, list) { if (!nft_hook_find_ops(hook, dev)) continue; found = hook; break; } if (!found) continue; return chain; } } return NULL; } static int nft_offload_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct nft_chain *chain; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; nft_net = nft_pernet(net); mutex_lock(&nft_net->commit_mutex); chain = __nft_offload_get_chain(nft_net, dev); if (chain) nft_flow_block_chain(nft_base_chain(chain), dev, FLOW_BLOCK_UNBIND); mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } static struct notifier_block nft_offload_netdev_notifier = { .notifier_call = nft_offload_netdev_event, }; int nft_offload_init(void) { return register_netdevice_notifier(&nft_offload_netdev_notifier); } void nft_offload_exit(void) { unregister_netdevice_notifier(&nft_offload_netdev_notifier); }
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> /* * This routine purges all the queues of frames. */ void ax25_clear_queues(ax25_cb *ax25) { skb_queue_purge(&ax25->write_queue); skb_queue_purge(&ax25->ack_queue); skb_queue_purge(&ax25->reseq_queue); skb_queue_purge(&ax25->frag_queue); } /* * This routine purges the input queue of those frames that have been * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the * SDL diagram. */ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr) { struct sk_buff *skb; /* * Remove all the ack-ed frames from the ack queue. */ if (ax25->va != nr) { while (skb_peek(&ax25->ack_queue) != NULL && ax25->va != nr) { skb = skb_dequeue(&ax25->ack_queue); kfree_skb(skb); ax25->va = (ax25->va + 1) % ax25->modulus; } } } void ax25_requeue_frames(ax25_cb *ax25) { struct sk_buff *skb; /* * Requeue all the un-ack-ed frames on the output queue to be picked * up by ax25_kick called from the timer. This arrangement handles the * possibility of an empty output queue. */ while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL) skb_queue_head(&ax25->write_queue, skb); } /* * Validate that the value of nr is between va and vs. Return true or * false for testing. */ int ax25_validate_nr(ax25_cb *ax25, unsigned short nr) { unsigned short vc = ax25->va; while (vc != ax25->vs) { if (nr == vc) return 1; vc = (vc + 1) % ax25->modulus; } if (nr == ax25->vs) return 1; return 0; } /* * This routine is the centralised routine for parsing the control * information for the different frame formats. */ int ax25_decode(ax25_cb *ax25, struct sk_buff *skb, int *ns, int *nr, int *pf) { unsigned char *frame; int frametype = AX25_ILLEGAL; frame = skb->data; *ns = *nr = *pf = 0; if (ax25->modulus == AX25_MODULUS) { if ((frame[0] & AX25_S) == 0) { frametype = AX25_I; /* I frame - carries NR/NS/PF */ *ns = (frame[0] >> 1) & 0x07; *nr = (frame[0] >> 5) & 0x07; *pf = frame[0] & AX25_PF; } else if ((frame[0] & AX25_U) == 1) { /* S frame - take out PF/NR */ frametype = frame[0] & 0x0F; *nr = (frame[0] >> 5) & 0x07; *pf = frame[0] & AX25_PF; } else if ((frame[0] & AX25_U) == 3) { /* U frame - take out PF */ frametype = frame[0] & ~AX25_PF; *pf = frame[0] & AX25_PF; } skb_pull(skb, 1); } else { if ((frame[0] & AX25_S) == 0) { frametype = AX25_I; /* I frame - carries NR/NS/PF */ *ns = (frame[0] >> 1) & 0x7F; *nr = (frame[1] >> 1) & 0x7F; *pf = frame[1] & AX25_EPF; skb_pull(skb, 2); } else if ((frame[0] & AX25_U) == 1) { /* S frame - take out PF/NR */ frametype = frame[0] & 0x0F; *nr = (frame[1] >> 1) & 0x7F; *pf = frame[1] & AX25_EPF; skb_pull(skb, 2); } else if ((frame[0] & AX25_U) == 3) { /* U frame - take out PF */ frametype = frame[0] & ~AX25_PF; *pf = frame[0] & AX25_PF; skb_pull(skb, 1); } } return frametype; } /* * This routine is called when the HDLC layer internally generates a * command or response for the remote machine ( eg. RR, UA etc. ). * Only supervisory or unnumbered frames are processed. */ void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type) { struct sk_buff *skb; unsigned char *dptr; if ((skb = alloc_skb(ax25->ax25_dev->dev->hard_header_len + 2, GFP_ATOMIC)) == NULL) return; skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len); skb_reset_network_header(skb); /* Assume a response - address structure for DTE */ if (ax25->modulus == AX25_MODULUS) { dptr = skb_put(skb, 1); *dptr = frametype; *dptr |= (poll_bit) ? AX25_PF : 0; if ((frametype & AX25_U) == AX25_S) /* S frames carry NR */ *dptr |= (ax25->vr << 5); } else { if ((frametype & AX25_U) == AX25_U) { dptr = skb_put(skb, 1); *dptr = frametype; *dptr |= (poll_bit) ? AX25_PF : 0; } else { dptr = skb_put(skb, 2); dptr[0] = frametype; dptr[1] = (ax25->vr << 1); dptr[1] |= (poll_bit) ? AX25_EPF : 0; } } ax25_transmit_buffer(ax25, skb, type); } /* * Send a 'DM' to an unknown connection attempt, or an invalid caller. * * Note: src here is the sender, thus it's the target of the DM */ void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *dest, ax25_digi *digi) { struct sk_buff *skb; char *dptr; ax25_digi retdigi; if (dev == NULL) return; if ((skb = alloc_skb(dev->hard_header_len + 1, GFP_ATOMIC)) == NULL) return; /* Next SABM will get DM'd */ skb_reserve(skb, dev->hard_header_len); skb_reset_network_header(skb); ax25_digi_invert(digi, &retdigi); dptr = skb_put(skb, 1); *dptr = AX25_DM | AX25_PF; /* * Do the address ourselves */ dptr = skb_push(skb, ax25_addr_size(digi)); dptr += ax25_addr_build(dptr, dest, src, &retdigi, AX25_RESPONSE, AX25_MODULUS); ax25_queue_xmit(skb, dev); } /* * Exponential backoff for AX.25 */ void ax25_calculate_t1(ax25_cb *ax25) { int n, t = 2; switch (ax25->backoff) { case 0: break; case 1: t += 2 * ax25->n2count; break; case 2: for (n = 0; n < ax25->n2count; n++) t *= 2; if (t > 8) t = 8; break; } ax25->t1 = t * ax25->rtt; } /* * Calculate the Round Trip Time */ void ax25_calculate_rtt(ax25_cb *ax25) { if (ax25->backoff == 0) return; if (ax25_t1timer_running(ax25) && ax25->n2count == 0) ax25->rtt = (9 * ax25->rtt + ax25->t1 - ax25_display_timer(&ax25->t1timer)) / 10; if (ax25->rtt < AX25_T1CLAMPLO) ax25->rtt = AX25_T1CLAMPLO; if (ax25->rtt > AX25_T1CLAMPHI) ax25->rtt = AX25_T1CLAMPHI; } void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); if (reason == ENETUNREACH) { timer_delete_sync(&ax25->timer); timer_delete_sync(&ax25->t1timer); timer_delete_sync(&ax25->t2timer); timer_delete_sync(&ax25->t3timer); timer_delete_sync(&ax25->idletimer); } else { if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY)) ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); ax25_stop_idletimer(ax25); } ax25->state = AX25_STATE_0; ax25_link_failed(ax25, reason); if (ax25->sk != NULL) { local_bh_disable(); bh_lock_sock(ax25->sk); ax25->sk->sk_state = TCP_CLOSE; ax25->sk->sk_err = reason; ax25->sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(ax25->sk, SOCK_DEAD)) { ax25->sk->sk_state_change(ax25->sk); sock_set_flag(ax25->sk, SOCK_DEAD); } bh_unlock_sock(ax25->sk); local_bh_enable(); } }
123 126 69 69 5 1 18 18 18 22 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 // SPDX-License-Identifier: GPL-2.0-or-later /* * Digital Audio (PCM) abstract layer * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/time.h> #include <linux/gcd.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/timer.h> #include "pcm_local.h" /* * Timer functions */ void snd_pcm_timer_resolution_change(struct snd_pcm_substream *substream) { unsigned long rate, mult, fsize, l, post; struct snd_pcm_runtime *runtime = substream->runtime; mult = 1000000000; rate = runtime->rate; if (snd_BUG_ON(!rate)) return; l = gcd(mult, rate); mult /= l; rate /= l; fsize = runtime->period_size; if (snd_BUG_ON(!fsize)) return; l = gcd(rate, fsize); rate /= l; fsize /= l; post = 1; while ((mult * fsize) / fsize != mult) { mult /= 2; post *= 2; } if (rate == 0) { pcm_err(substream->pcm, "pcm timer resolution out of range (rate = %u, period_size = %lu)\n", runtime->rate, runtime->period_size); runtime->timer_resolution = -1; return; } runtime->timer_resolution = (mult * fsize / rate) * post; } static unsigned long snd_pcm_timer_resolution(struct snd_timer * timer) { struct snd_pcm_substream *substream; substream = timer->private_data; return substream->runtime ? substream->runtime->timer_resolution : 0; } static int snd_pcm_timer_start(struct snd_timer * timer) { struct snd_pcm_substream *substream; substream = snd_timer_chip(timer); substream->timer_running = 1; return 0; } static int snd_pcm_timer_stop(struct snd_timer * timer) { struct snd_pcm_substream *substream; substream = snd_timer_chip(timer); substream->timer_running = 0; return 0; } static const struct snd_timer_hardware snd_pcm_timer = { .flags = SNDRV_TIMER_HW_AUTO | SNDRV_TIMER_HW_SLAVE, .resolution = 0, .ticks = 1, .c_resolution = snd_pcm_timer_resolution, .start = snd_pcm_timer_start, .stop = snd_pcm_timer_stop, }; /* * Init functions */ static void snd_pcm_timer_free(struct snd_timer *timer) { struct snd_pcm_substream *substream = timer->private_data; substream->timer = NULL; } void snd_pcm_timer_init(struct snd_pcm_substream *substream) { struct snd_timer_id tid; struct snd_timer *timer; tid.dev_sclass = SNDRV_TIMER_SCLASS_NONE; tid.dev_class = SNDRV_TIMER_CLASS_PCM; tid.card = substream->pcm->card->number; tid.device = substream->pcm->device; tid.subdevice = (substream->number << 1) | (substream->stream & 1); if (snd_timer_new(substream->pcm->card, "PCM", &tid, &timer) < 0) return; sprintf(timer->name, "PCM %s %i-%i-%i", snd_pcm_direction_name(substream->stream), tid.card, tid.device, tid.subdevice); timer->hw = snd_pcm_timer; if (snd_device_register(timer->card, timer) < 0) { snd_device_free(timer->card, timer); return; } timer->private_data = substream; timer->private_free = snd_pcm_timer_free; substream->timer = timer; } void snd_pcm_timer_done(struct snd_pcm_substream *substream) { if (substream->timer) { snd_device_free(substream->pcm->card, substream->timer); substream->timer = NULL; } }
5 5 14 14 14 14 14 5 5 5 5 5 1 1 1 1 1 5 5 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ /* Copyright (c) 2008-2019, IBM Corporation */ #include <linux/errno.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/xarray.h> #include <net/addrconf.h> #include <rdma/iw_cm.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/uverbs_ioctl.h> #include "siw.h" #include "siw_verbs.h" #include "siw_mem.h" static int siw_qp_state_to_ib_qp_state[SIW_QP_STATE_COUNT] = { [SIW_QP_STATE_IDLE] = IB_QPS_INIT, [SIW_QP_STATE_RTR] = IB_QPS_RTR, [SIW_QP_STATE_RTS] = IB_QPS_RTS, [SIW_QP_STATE_CLOSING] = IB_QPS_SQD, [SIW_QP_STATE_TERMINATE] = IB_QPS_SQE, [SIW_QP_STATE_ERROR] = IB_QPS_ERR }; static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = SIW_QP_STATE_IDLE, [IB_QPS_INIT] = SIW_QP_STATE_IDLE, [IB_QPS_RTR] = SIW_QP_STATE_RTR, [IB_QPS_RTS] = SIW_QP_STATE_RTS, [IB_QPS_SQD] = SIW_QP_STATE_CLOSING, [IB_QPS_SQE] = SIW_QP_STATE_TERMINATE, [IB_QPS_ERR] = SIW_QP_STATE_ERROR }; static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = { [IB_QPS_RESET] = "RESET", [IB_QPS_INIT] = "INIT", [IB_QPS_RTR] = "RTR", [IB_QPS_RTS] = "RTS", [IB_QPS_SQD] = "SQD", [IB_QPS_SQE] = "SQE", [IB_QPS_ERR] = "ERR" }; void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_entry); kfree(entry); } int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) { struct siw_ucontext *uctx = to_siw_ctx(ctx); size_t size = vma->vm_end - vma->vm_start; struct rdma_user_mmap_entry *rdma_entry; struct siw_user_mmap_entry *entry; int rv = -EINVAL; /* * Must be page aligned */ if (vma->vm_start & (PAGE_SIZE - 1)) { pr_warn("siw: mmap not page aligned\n"); return -EINVAL; } rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, vma); if (!rdma_entry) { siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n", vma->vm_pgoff, size); return -EINVAL; } entry = to_siw_mmap_entry(rdma_entry); rv = remap_vmalloc_range(vma, entry->address, 0); if (rv) pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, size); rdma_user_mmap_entry_put(rdma_entry); return rv; } int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(base_ctx->device); struct siw_ucontext *ctx = to_siw_ctx(base_ctx); struct siw_uresp_alloc_ctx uresp = {}; int rv; if (atomic_inc_return(&sdev->num_ctx) > SIW_MAX_CONTEXT) { rv = -ENOMEM; goto err_out; } ctx->sdev = sdev; uresp.dev_id = sdev->vendor_part_id; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; siw_dbg(base_ctx->device, "success. now %d context(s)\n", atomic_read(&sdev->num_ctx)); return 0; err_out: atomic_dec(&sdev->num_ctx); siw_dbg(base_ctx->device, "failure %d. now %d context(s)\n", rv, atomic_read(&sdev->num_ctx)); return rv; } void siw_dealloc_ucontext(struct ib_ucontext *base_ctx) { struct siw_ucontext *uctx = to_siw_ctx(base_ctx); atomic_dec(&uctx->sdev->num_ctx); } int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(base_dev); if (udata->inlen || udata->outlen) return -EINVAL; memset(attr, 0, sizeof(*attr)); /* Revisit atomic caps if RFC 7306 gets supported */ attr->atomic_cap = 0; attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG; attr->max_cq = sdev->attrs.max_cq; attr->max_cqe = sdev->attrs.max_cqe; attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; attr->max_mr = sdev->attrs.max_mr; attr->max_mw = sdev->attrs.max_mw; attr->max_mr_size = ~0ull; attr->max_pd = sdev->attrs.max_pd; attr->max_qp = sdev->attrs.max_qp; attr->max_qp_init_rd_atom = sdev->attrs.max_ird; attr->max_qp_rd_atom = sdev->attrs.max_ord; attr->max_qp_wr = sdev->attrs.max_qp_wr; attr->max_recv_sge = sdev->attrs.max_sge; attr->max_res_rd_atom = sdev->attrs.max_qp * sdev->attrs.max_ird; attr->max_send_sge = sdev->attrs.max_sge; attr->max_sge_rd = sdev->attrs.max_sge_rd; attr->max_srq = sdev->attrs.max_srq; attr->max_srq_sge = sdev->attrs.max_srq_sge; attr->max_srq_wr = sdev->attrs.max_srq_wr; attr->page_size_cap = PAGE_SIZE; attr->vendor_id = SIW_VENDOR_ID; attr->vendor_part_id = sdev->vendor_part_id; addrconf_addr_eui48((u8 *)&attr->sys_image_guid, sdev->raw_gid); return 0; } int siw_query_port(struct ib_device *base_dev, u32 port, struct ib_port_attr *attr) { struct net_device *ndev; int rv; memset(attr, 0, sizeof(*attr)); rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, &attr->active_width); if (rv) return rv; ndev = ib_device_get_netdev(base_dev, SIW_PORT); if (!ndev) return -ENODEV; attr->gid_tbl_len = 1; attr->max_msg_sz = -1; attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu); attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu)); attr->state = ib_get_curr_port_state(ndev); attr->phys_state = attr->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; /* * All zero * * attr->lid = 0; * attr->bad_pkey_cntr = 0; * attr->qkey_viol_cntr = 0; * attr->sm_lid = 0; * attr->lmc = 0; * attr->max_vl_num = 0; * attr->sm_sl = 0; * attr->subnet_timeout = 0; * attr->init_type_repy = 0; */ dev_put(ndev); return rv; } int siw_get_port_immutable(struct ib_device *base_dev, u32 port, struct ib_port_immutable *port_immutable) { struct ib_port_attr attr; int rv = siw_query_port(base_dev, port, &attr); if (rv) return rv; port_immutable->gid_tbl_len = attr.gid_tbl_len; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, union ib_gid *gid) { struct siw_device *sdev = to_siw_dev(base_dev); /* subnet_prefix == interface_id == 0; */ memset(gid, 0, sizeof(*gid)); memcpy(gid->raw, sdev->raw_gid, ETH_ALEN); return 0; } int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(pd->device); if (atomic_inc_return(&sdev->num_pd) > SIW_MAX_PD) { atomic_dec(&sdev->num_pd); return -ENOMEM; } siw_dbg_pd(pd, "now %d PD's(s)\n", atomic_read(&sdev->num_pd)); return 0; } int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(pd->device); siw_dbg_pd(pd, "free PD\n"); atomic_dec(&sdev->num_pd); return 0; } void siw_qp_get_ref(struct ib_qp *base_qp) { siw_qp_get(to_siw_qp(base_qp)); } void siw_qp_put_ref(struct ib_qp *base_qp) { siw_qp_put(to_siw_qp(base_qp)); } static struct rdma_user_mmap_entry * siw_mmap_entry_insert(struct siw_ucontext *uctx, void *address, size_t length, u64 *offset) { struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); int rv; *offset = SIW_INVAL_UOBJ_KEY; if (!entry) return NULL; entry->address = address; rv = rdma_user_mmap_entry_insert(&uctx->base_ucontext, &entry->rdma_entry, length); if (rv) { kfree(entry); return NULL; } *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); return &entry->rdma_entry; } /* * siw_create_qp() * * Create QP of requested size on given device. * * @qp: Queue pait * @attrs: Initial QP attributes. * @udata: used to provide QP ID, SQ and RQ size back to user. */ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { struct ib_pd *pd = ibqp->pd; struct siw_qp *qp = to_siw_qp(ibqp); struct ib_device *base_dev = pd->device; struct siw_device *sdev = to_siw_dev(base_dev); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); unsigned long flags; int num_sqe, num_rqe, rv = 0; size_t length; siw_dbg(base_dev, "create new QP\n"); if (attrs->create_flags) return -EOPNOTSUPP; if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); rv = -ENOMEM; goto err_atomic; } if (attrs->qp_type != IB_QPT_RC) { siw_dbg(base_dev, "only RC QP's supported\n"); rv = -EOPNOTSUPP; goto err_atomic; } if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) || (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) || (attrs->cap.max_send_sge > SIW_MAX_SGE) || (attrs->cap.max_recv_sge > SIW_MAX_SGE)) { siw_dbg(base_dev, "QP size error\n"); rv = -EINVAL; goto err_atomic; } if (attrs->cap.max_inline_data > SIW_MAX_INLINE) { siw_dbg(base_dev, "max inline send: %d > %d\n", attrs->cap.max_inline_data, (int)SIW_MAX_INLINE); rv = -EINVAL; goto err_atomic; } /* * NOTE: we don't allow for a QP unable to hold any SQ WQE */ if (attrs->cap.max_send_wr == 0) { siw_dbg(base_dev, "QP must have send queue\n"); rv = -EINVAL; goto err_atomic; } if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) { siw_dbg(base_dev, "send CQ or receive CQ invalid\n"); rv = -EINVAL; goto err_atomic; } init_rwsem(&qp->state_lock); spin_lock_init(&qp->sq_lock); spin_lock_init(&qp->rq_lock); spin_lock_init(&qp->orq_lock); rv = siw_qp_add(sdev, qp); if (rv) goto err_atomic; /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); num_rqe = attrs->cap.max_recv_wr; if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); else qp->sendq = vcalloc(num_sqe, sizeof(struct siw_sqe)); if (qp->sendq == NULL) { rv = -ENOMEM; goto err_out_xa; } if (attrs->sq_sig_type != IB_SIGNAL_REQ_WR) { if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) qp->attrs.flags |= SIW_SIGNAL_ALL_WR; else { rv = -EINVAL; goto err_out_xa; } } qp->pd = pd; qp->scq = to_siw_cq(attrs->send_cq); qp->rcq = to_siw_cq(attrs->recv_cq); if (attrs->srq) { /* * SRQ support. * Verbs 6.3.7: ignore RQ size, if SRQ present * Verbs 6.3.5: do not check PD of SRQ against PD of QP */ qp->srq = to_siw_srq(attrs->srq); qp->attrs.rq_size = 0; siw_dbg(base_dev, "QP [%u]: SRQ attached\n", qp->base_qp.qp_num); } else if (num_rqe) { if (udata) qp->recvq = vmalloc_user(num_rqe * sizeof(struct siw_rqe)); else qp->recvq = vcalloc(num_rqe, sizeof(struct siw_rqe)); if (qp->recvq == NULL) { rv = -ENOMEM; goto err_out_xa; } qp->attrs.rq_size = num_rqe; } qp->attrs.sq_size = num_sqe; qp->attrs.sq_max_sges = attrs->cap.max_send_sge; qp->attrs.rq_max_sges = attrs->cap.max_recv_sge; /* Make those two tunables fixed for now. */ qp->tx_ctx.gso_seg_limit = 1; qp->tx_ctx.zcopy_tx = zcopy_tx; qp->attrs.state = SIW_QP_STATE_IDLE; if (udata) { struct siw_uresp_create_qp uresp = {}; uresp.num_sqe = num_sqe; uresp.num_rqe = num_rqe; uresp.qp_id = qp_id(qp); if (qp->sendq) { length = num_sqe * sizeof(struct siw_sqe); qp->sq_entry = siw_mmap_entry_insert(uctx, qp->sendq, length, &uresp.sq_key); if (!qp->sq_entry) { rv = -ENOMEM; goto err_out_xa; } } if (qp->recvq) { length = num_rqe * sizeof(struct siw_rqe); qp->rq_entry = siw_mmap_entry_insert(uctx, qp->recvq, length, &uresp.rq_key); if (!qp->rq_entry) { uresp.sq_key = SIW_INVAL_UOBJ_KEY; rv = -ENOMEM; goto err_out_xa; } } if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out_xa; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out_xa; } qp->tx_cpu = siw_get_tx_cpu(sdev); if (qp->tx_cpu < 0) { rv = -EINVAL; goto err_out_xa; } INIT_LIST_HEAD(&qp->devq); spin_lock_irqsave(&sdev->lock, flags); list_add_tail(&qp->devq, &sdev->qp_list); spin_unlock_irqrestore(&sdev->lock, flags); init_completion(&qp->qp_free); return 0; err_out_xa: xa_erase(&sdev->qp_xa, qp_id(qp)); if (uctx) { rdma_user_mmap_entry_remove(qp->sq_entry); rdma_user_mmap_entry_remove(qp->rq_entry); } vfree(qp->sendq); vfree(qp->recvq); err_atomic: atomic_dec(&sdev->num_qp); return rv; } /* * Minimum siw_query_qp() verb interface. * * @qp_attr_mask is not used but all available information is provided */ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct siw_qp *qp; struct net_device *ndev; if (base_qp && qp_attr && qp_init_attr) qp = to_siw_qp(base_qp); else return -EINVAL; ndev = ib_device_get_netdev(base_qp->device, SIW_PORT); if (!ndev) return -ENODEV; qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state]; qp_attr->cap.max_inline_data = SIW_MAX_INLINE; qp_attr->cap.max_send_wr = qp->attrs.sq_size; qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; qp_attr->cap.max_recv_wr = qp->attrs.rq_size; qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu)); qp_attr->max_rd_atomic = qp->attrs.irq_size; qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; qp_init_attr->qp_type = base_qp->qp_type; qp_init_attr->send_cq = base_qp->send_cq; qp_init_attr->recv_cq = base_qp->recv_cq; qp_init_attr->srq = base_qp->srq; qp_init_attr->cap = qp_attr->cap; dev_put(ndev); return 0; } int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct siw_qp_attrs new_attrs; enum siw_qp_attr_mask siw_attr_mask = 0; struct siw_qp *qp = to_siw_qp(base_qp); int rv = 0; if (!attr_mask) return 0; if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_ACCESS_FLAGS) { siw_attr_mask = SIW_QP_ATTR_ACCESS_FLAGS; if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) new_attrs.flags |= SIW_RDMA_READ_ENABLED; if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) new_attrs.flags |= SIW_RDMA_WRITE_ENABLED; if (attr->qp_access_flags & IB_ACCESS_MW_BIND) new_attrs.flags |= SIW_RDMA_BIND_ENABLED; } if (attr_mask & IB_QP_STATE) { siw_dbg_qp(qp, "desired IB QP state: %s\n", ib_qp_state_to_string[attr->qp_state]); new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state]; if (new_attrs.state > SIW_QP_STATE_RTS) qp->tx_ctx.tx_suspend = 1; siw_attr_mask |= SIW_QP_ATTR_STATE; } if (!siw_attr_mask) goto out; down_write(&qp->state_lock); rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask); up_write(&qp->state_lock); out: return rv; } int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) { struct siw_qp *qp = to_siw_qp(base_qp); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); struct siw_qp_attrs qp_attrs; siw_dbg_qp(qp, "state %d\n", qp->attrs.state); /* * Mark QP as in process of destruction to prevent from * any async callbacks to RDMA core */ qp->attrs.flags |= SIW_QP_IN_DESTROY; qp->rx_stream.rx_suspend = 1; if (uctx) { rdma_user_mmap_entry_remove(qp->sq_entry); rdma_user_mmap_entry_remove(qp->rq_entry); } down_write(&qp->state_lock); qp_attrs.state = SIW_QP_STATE_ERROR; siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE); if (qp->cep) { siw_cep_put(qp->cep); qp->cep = NULL; } up_write(&qp->state_lock); qp->scq = qp->rcq = NULL; siw_qp_put(qp); wait_for_completion(&qp->qp_free); return 0; } /* * siw_copy_inline_sgl() * * Prepare sgl of inlined data for sending. For userland callers * function checks if given buffer addresses and len's are within * process context bounds. * Data from all provided sge's are copied together into the wqe, * referenced by a single sge. */ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, struct siw_sqe *sqe) { struct ib_sge *core_sge = core_wr->sg_list; void *kbuf = &sqe->sge[1]; int num_sge = core_wr->num_sge, bytes = 0; sqe->sge[0].laddr = (uintptr_t)kbuf; sqe->sge[0].lkey = 0; while (num_sge--) { if (!core_sge->length) { core_sge++; continue; } bytes += core_sge->length; if (bytes > SIW_MAX_INLINE) { bytes = -EINVAL; break; } memcpy(kbuf, ib_virt_dma_to_ptr(core_sge->addr), core_sge->length); kbuf += core_sge->length; core_sge++; } sqe->sge[0].length = max(bytes, 0); sqe->num_sge = bytes > 0 ? 1 : 0; return bytes; } /* Complete SQ WR's without processing */ static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { int rv = 0; while (wr) { struct siw_sqe sqe = {}; switch (wr->opcode) { case IB_WR_RDMA_WRITE: sqe.opcode = SIW_OP_WRITE; break; case IB_WR_RDMA_READ: sqe.opcode = SIW_OP_READ; break; case IB_WR_RDMA_READ_WITH_INV: sqe.opcode = SIW_OP_READ_LOCAL_INV; break; case IB_WR_SEND: sqe.opcode = SIW_OP_SEND; break; case IB_WR_SEND_WITH_IMM: sqe.opcode = SIW_OP_SEND_WITH_IMM; break; case IB_WR_SEND_WITH_INV: sqe.opcode = SIW_OP_SEND_REMOTE_INV; break; case IB_WR_LOCAL_INV: sqe.opcode = SIW_OP_INVAL_STAG; break; case IB_WR_REG_MR: sqe.opcode = SIW_OP_REG_MR; break; default: rv = -EINVAL; break; } if (!rv) { sqe.id = wr->wr_id; rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); } if (rv) { if (bad_wr) *bad_wr = wr; break; } wr = wr->next; } return rv; } /* Complete RQ WR's without processing */ static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct siw_rqe rqe = {}; int rv = 0; while (wr) { rqe.id = wr->wr_id; rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR); if (rv) { if (bad_wr) *bad_wr = wr; break; } wr = wr->next; } return rv; } /* * siw_post_send() * * Post a list of S-WR's to a SQ. * * @base_qp: Base QP contained in siw QP * @wr: Null terminated list of user WR's * @bad_wr: Points to failing WR in case of synchronous failure. */ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct siw_qp *qp = to_siw_qp(base_qp); struct siw_wqe *wqe = tx_wqe(qp); unsigned long flags; int rv = 0; if (wr && !rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); *bad_wr = wr; return -EINVAL; } /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * ERROR state is final, so we can be sure * this state will not change as long as the QP * exists. * * This handles an ib_drain_sq() call with * a concurrent request to set the QP state * to ERROR. */ rv = siw_sq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } return rv; } if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * Immediately flush this WR to CQ, if QP * is in ERROR state. SQ is guaranteed to * be empty, so WR complets in-order. * * Typically triggered by ib_drain_sq(). */ rv = siw_sq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } up_read(&qp->state_lock); return rv; } spin_lock_irqsave(&qp->sq_lock, flags); while (wr) { u32 idx = qp->sq_put % qp->attrs.sq_size; struct siw_sqe *sqe = &qp->sendq[idx]; if (sqe->flags) { siw_dbg_qp(qp, "sq full\n"); rv = -ENOMEM; break; } if (wr->num_sge > qp->attrs.sq_max_sges) { siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } sqe->id = wr->wr_id; if ((wr->send_flags & IB_SEND_SIGNALED) || (qp->attrs.flags & SIW_SIGNAL_ALL_WR)) sqe->flags |= SIW_WQE_SIGNALLED; if (wr->send_flags & IB_SEND_FENCE) sqe->flags |= SIW_WQE_READ_FENCE; switch (wr->opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_INV: if (wr->send_flags & IB_SEND_SOLICITED) sqe->flags |= SIW_WQE_SOLICITED; if (!(wr->send_flags & IB_SEND_INLINE)) { siw_copy_sgl(wr->sg_list, sqe->sge, wr->num_sge); sqe->num_sge = wr->num_sge; } else { rv = siw_copy_inline_sgl(wr, sqe); if (rv <= 0) { rv = -EINVAL; break; } sqe->flags |= SIW_WQE_INLINE; sqe->num_sge = 1; } if (wr->opcode == IB_WR_SEND) sqe->opcode = SIW_OP_SEND; else { sqe->opcode = SIW_OP_SEND_REMOTE_INV; sqe->rkey = wr->ex.invalidate_rkey; } break; case IB_WR_RDMA_READ_WITH_INV: case IB_WR_RDMA_READ: /* * iWarp restricts RREAD sink to SGL containing * 1 SGE only. we could relax to SGL with multiple * elements referring the SAME ltag or even sending * a private per-rreq tag referring to a checked * local sgl with MULTIPLE ltag's. */ if (unlikely(wr->num_sge != 1)) { rv = -EINVAL; break; } siw_copy_sgl(wr->sg_list, &sqe->sge[0], 1); /* * NOTE: zero length RREAD is allowed! */ sqe->raddr = rdma_wr(wr)->remote_addr; sqe->rkey = rdma_wr(wr)->rkey; sqe->num_sge = 1; if (wr->opcode == IB_WR_RDMA_READ) sqe->opcode = SIW_OP_READ; else sqe->opcode = SIW_OP_READ_LOCAL_INV; break; case IB_WR_RDMA_WRITE: if (!(wr->send_flags & IB_SEND_INLINE)) { siw_copy_sgl(wr->sg_list, &sqe->sge[0], wr->num_sge); sqe->num_sge = wr->num_sge; } else { rv = siw_copy_inline_sgl(wr, sqe); if (unlikely(rv < 0)) { rv = -EINVAL; break; } sqe->flags |= SIW_WQE_INLINE; sqe->num_sge = 1; } sqe->raddr = rdma_wr(wr)->remote_addr; sqe->rkey = rdma_wr(wr)->rkey; sqe->opcode = SIW_OP_WRITE; break; case IB_WR_REG_MR: sqe->base_mr = (uintptr_t)reg_wr(wr)->mr; sqe->rkey = reg_wr(wr)->key; sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK; sqe->opcode = SIW_OP_REG_MR; break; case IB_WR_LOCAL_INV: sqe->rkey = wr->ex.invalidate_rkey; sqe->opcode = SIW_OP_INVAL_STAG; break; default: siw_dbg_qp(qp, "ib wr type %d unsupported\n", wr->opcode); rv = -EINVAL; break; } siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n", sqe->opcode, sqe->flags, (void *)(uintptr_t)sqe->id); if (unlikely(rv < 0)) break; /* make SQE only valid after completely written */ smp_wmb(); sqe->flags |= SIW_WQE_VALID; qp->sq_put++; wr = wr->next; } /* * Send directly if SQ processing is not in progress. * Eventual immediate errors (rv < 0) do not affect the involved * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ * processing, if new work is already pending. But rv must be passed * to caller. */ if (wqe->wr_status != SIW_WR_IDLE) { spin_unlock_irqrestore(&qp->sq_lock, flags); goto skip_direct_sending; } rv = siw_activate_tx(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); if (rv <= 0) goto skip_direct_sending; if (rdma_is_kernel_res(&qp->base_qp.res)) { rv = siw_sq_start(qp); } else { qp->tx_ctx.in_syscall = 1; if (siw_qp_sq_process(qp) != 0 && !(qp->tx_ctx.tx_suspend)) siw_qp_cm_drop(qp, 0); qp->tx_ctx.in_syscall = 0; } skip_direct_sending: up_read(&qp->state_lock); if (rv >= 0) return 0; /* * Immediate error */ siw_dbg_qp(qp, "error %d\n", rv); *bad_wr = wr; return rv; } /* * siw_post_receive() * * Post a list of R-WR's to a RQ. * * @base_qp: Base QP contained in siw QP * @wr: Null terminated list of user WR's * @bad_wr: Points to failing WR in case of synchronous failure. */ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct siw_qp *qp = to_siw_qp(base_qp); unsigned long flags; int rv = 0; if (qp->srq || qp->attrs.rq_size == 0) { *bad_wr = wr; return -EINVAL; } if (!rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); *bad_wr = wr; return -EINVAL; } /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * ERROR state is final, so we can be sure * this state will not change as long as the QP * exists. * * This handles an ib_drain_rq() call with * a concurrent request to set the QP state * to ERROR. */ rv = siw_rq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } return rv; } if (qp->attrs.state > SIW_QP_STATE_RTS) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * Immediately flush this WR to CQ, if QP * is in ERROR state. RQ is guaranteed to * be empty, so WR complets in-order. * * Typically triggered by ib_drain_rq(). */ rv = siw_rq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } up_read(&qp->state_lock); return rv; } /* * Serialize potentially multiple producers. * Not needed for single threaded consumer side. */ spin_lock_irqsave(&qp->rq_lock, flags); while (wr) { u32 idx = qp->rq_put % qp->attrs.rq_size; struct siw_rqe *rqe = &qp->recvq[idx]; if (rqe->flags) { siw_dbg_qp(qp, "RQ full\n"); rv = -ENOMEM; break; } if (wr->num_sge > qp->attrs.rq_max_sges) { siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } rqe->id = wr->wr_id; rqe->num_sge = wr->num_sge; siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); /* make sure RQE is completely written before valid */ smp_wmb(); rqe->flags = SIW_WQE_VALID; qp->rq_put++; wr = wr->next; } spin_unlock_irqrestore(&qp->rq_lock, flags); up_read(&qp->state_lock); if (rv < 0) { siw_dbg_qp(qp, "error %d\n", rv); *bad_wr = wr; } return rv; } int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) { struct siw_cq *cq = to_siw_cq(base_cq); struct siw_device *sdev = to_siw_dev(base_cq->device); struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); siw_dbg_cq(cq, "free CQ resources\n"); siw_cq_flush(cq); if (ctx) rdma_user_mmap_entry_remove(cq->cq_entry); atomic_dec(&sdev->num_cq); vfree(cq->queue); return 0; } /* * siw_create_cq() * * Populate CQ of requested size * * @base_cq: CQ as allocated by RDMA midlayer * @attr: Initial CQ attributes * @attrs: uverbs bundle */ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct uverbs_attr_bundle *attrs) { struct ib_udata *udata = &attrs->driver_udata; struct siw_device *sdev = to_siw_dev(base_cq->device); struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; if (attr->flags) return -EOPNOTSUPP; if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { siw_dbg(base_cq->device, "too many CQ's\n"); rv = -ENOMEM; goto err_out; } if (size < 1 || size > sdev->attrs.max_cqe) { siw_dbg(base_cq->device, "CQ size error: %d\n", size); rv = -EINVAL; goto err_out; } size = roundup_pow_of_two(size); cq->base_cq.cqe = size; cq->num_cqe = size; if (udata) cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) + sizeof(struct siw_cq_ctrl)); else cq->queue = vzalloc(size * sizeof(struct siw_cqe) + sizeof(struct siw_cq_ctrl)); if (cq->queue == NULL) { rv = -ENOMEM; goto err_out; } get_random_bytes(&cq->id, 4); siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id); spin_lock_init(&cq->lock); cq->notify = (struct siw_cq_ctrl *)&cq->queue[size]; if (udata) { struct siw_uresp_create_cq uresp = {}; struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); size_t length = size * sizeof(struct siw_cqe) + sizeof(struct siw_cq_ctrl); cq->cq_entry = siw_mmap_entry_insert(ctx, cq->queue, length, &uresp.cq_key); if (!cq->cq_entry) { rv = -ENOMEM; goto err_out; } uresp.cq_id = cq->id; uresp.num_cqe = size; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; } return 0; err_out: siw_dbg(base_cq->device, "CQ creation failed: %d", rv); if (cq->queue) { struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); if (ctx) rdma_user_mmap_entry_remove(cq->cq_entry); vfree(cq->queue); } atomic_dec(&sdev->num_cq); return rv; } /* * siw_poll_cq() * * Reap CQ entries if available and copy work completion status into * array of WC's provided by caller. Returns number of reaped CQE's. * * @base_cq: Base CQ contained in siw CQ. * @num_cqe: Maximum number of CQE's to reap. * @wc: Array of work completions to be filled by siw. */ int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc) { struct siw_cq *cq = to_siw_cq(base_cq); int i; for (i = 0; i < num_cqe; i++) { if (!siw_reap_cqe(cq, wc)) break; wc++; } return i; } /* * siw_req_notify_cq() * * Request notification for new CQE's added to that CQ. * Defined flags: * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification * event if a WQE with notification flag set enters the CQ * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification * event if a WQE enters the CQ. * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the * number of not reaped CQE's regardless of its notification * type and current or new CQ notification settings. * * @base_cq: Base CQ contained in siw CQ. * @flags: Requested notification flags. */ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags) { struct siw_cq *cq = to_siw_cq(base_cq); siw_dbg_cq(cq, "flags: 0x%02x\n", flags); if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) /* * Enable CQ event for next solicited completion. * and make it visible to all associated producers. */ smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED); else /* * Enable CQ event for any signalled completion. * and make it visible to all associated producers. */ smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL); if (flags & IB_CQ_REPORT_MISSED_EVENTS) return cq->cq_put - cq->cq_get; return 0; } /* * siw_dereg_mr() * * Release Memory Region. * * @base_mr: Base MR contained in siw MR. * @udata: points to user context, unused. */ int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata) { struct siw_mr *mr = to_siw_mr(base_mr); struct siw_device *sdev = to_siw_dev(base_mr->device); siw_dbg_mem(mr->mem, "deregister MR\n"); atomic_dec(&sdev->num_mr); siw_mr_drop_mem(mr); kfree_rcu(mr, rcu); return 0; } /* * siw_reg_user_mr() * * Register Memory Region. * * @pd: Protection Domain * @start: starting address of MR (virtual address) * @len: len of MR * @rnic_va: not used by siw * @rights: MR access rights * @dmah: dma handle * @udata: user buffer to communicate STag and Key. */ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, u64 rnic_va, int rights, struct ib_dmah *dmah, struct ib_udata *udata) { struct siw_mr *mr = NULL; struct siw_umem *umem = NULL; struct siw_ureq_reg_mr ureq; struct siw_device *sdev = to_siw_dev(pd->device); int rv; siw_dbg_pd(pd, "start: 0x%p, va: 0x%p, len: %llu\n", (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va, (unsigned long long)len); if (dmah) return ERR_PTR(-EOPNOTSUPP); if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { siw_dbg_pd(pd, "too many mr's\n"); rv = -ENOMEM; goto err_out; } if (!len) { rv = -EINVAL; goto err_out; } umem = siw_umem_get(pd->device, start, len, rights); if (IS_ERR(umem)) { rv = PTR_ERR(umem); siw_dbg_pd(pd, "getting user memory failed: %d\n", rv); umem = NULL; goto err_out; } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { rv = -ENOMEM; goto err_out; } rv = siw_mr_add_mem(mr, pd, umem, start, len, rights); if (rv) goto err_out; if (udata) { struct siw_uresp_reg_mr uresp = {}; struct siw_mem *mem = mr->mem; if (udata->inlen < sizeof(ureq)) { rv = -EINVAL; goto err_out; } rv = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rv) goto err_out; mr->base_mr.lkey |= ureq.stag_key; mr->base_mr.rkey |= ureq.stag_key; mem->stag |= ureq.stag_key; uresp.stag = mem->stag; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; } mr->mem->stag_valid = 1; return &mr->base_mr; err_out: atomic_dec(&sdev->num_mr); if (mr) { if (mr->mem) siw_mr_drop_mem(mr); kfree_rcu(mr, rcu); } else { if (umem) siw_umem_release(umem); } return ERR_PTR(rv); } struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_sge) { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mr *mr = NULL; struct siw_pbl *pbl = NULL; int rv; if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { siw_dbg_pd(pd, "too many mr's\n"); rv = -ENOMEM; goto err_out; } if (mr_type != IB_MR_TYPE_MEM_REG) { siw_dbg_pd(pd, "mr type %d unsupported\n", mr_type); rv = -EOPNOTSUPP; goto err_out; } if (max_sge > SIW_MAX_SGE_PBL) { siw_dbg_pd(pd, "too many sge's: %d\n", max_sge); rv = -ENOMEM; goto err_out; } pbl = siw_pbl_alloc(max_sge); if (IS_ERR(pbl)) { rv = PTR_ERR(pbl); siw_dbg_pd(pd, "pbl allocation failed: %d\n", rv); pbl = NULL; goto err_out; } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { rv = -ENOMEM; goto err_out; } rv = siw_mr_add_mem(mr, pd, pbl, 0, max_sge * PAGE_SIZE, 0); if (rv) goto err_out; mr->mem->is_pbl = 1; siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag); return &mr->base_mr; err_out: atomic_dec(&sdev->num_mr); if (!mr) { kfree(pbl); } else { if (mr->mem) siw_mr_drop_mem(mr); kfree_rcu(mr, rcu); } siw_dbg_pd(pd, "failed: %d\n", rv); return ERR_PTR(rv); } /* Just used to count number of pages being mapped */ static int siw_set_pbl_page(struct ib_mr *base_mr, u64 buf_addr) { return 0; } int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, unsigned int *sg_off) { struct scatterlist *slp; struct siw_mr *mr = to_siw_mr(base_mr); struct siw_mem *mem = mr->mem; struct siw_pbl *pbl = mem->pbl; struct siw_pble *pble; unsigned long pbl_size; int i, rv; if (!pbl) { siw_dbg_mem(mem, "no PBL allocated\n"); return -EINVAL; } pble = pbl->pbe; if (pbl->max_buf < num_sle) { siw_dbg_mem(mem, "too many SGE's: %d > %d\n", num_sle, pbl->max_buf); return -ENOMEM; } for_each_sg(sl, slp, num_sle, i) { if (sg_dma_len(slp) == 0) { siw_dbg_mem(mem, "empty SGE\n"); return -EINVAL; } if (i == 0) { pble->addr = sg_dma_address(slp); pble->size = sg_dma_len(slp); pble->pbl_off = 0; pbl_size = pble->size; pbl->num_buf = 1; } else { /* Merge PBL entries if adjacent */ if (pble->addr + pble->size == sg_dma_address(slp)) { pble->size += sg_dma_len(slp); } else { pble++; pbl->num_buf++; pble->addr = sg_dma_address(slp); pble->size = sg_dma_len(slp); pble->pbl_off = pbl_size; } pbl_size += sg_dma_len(slp); } siw_dbg_mem(mem, "sge[%d], size %u, addr 0x%p, total %lu\n", i, pble->size, ib_virt_dma_to_ptr(pble->addr), pbl_size); } rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); if (rv > 0) { mem->len = base_mr->length; mem->va = base_mr->iova; siw_dbg_mem(mem, "%llu bytes, start 0x%p, %u SLE to %u entries\n", mem->len, (void *)(uintptr_t)mem->va, num_sle, pbl->num_buf); } return rv; } /* * siw_get_dma_mr() * * Create a (empty) DMA memory region, where no umem is attached. */ struct ib_mr *siw_get_dma_mr(struct ib_pd *pd, int rights) { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mr *mr = NULL; int rv; if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { siw_dbg_pd(pd, "too many mr's\n"); rv = -ENOMEM; goto err_out; } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { rv = -ENOMEM; goto err_out; } rv = siw_mr_add_mem(mr, pd, NULL, 0, ULONG_MAX, rights); if (rv) goto err_out; mr->mem->stag_valid = 1; siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag); return &mr->base_mr; err_out: if (rv) kfree(mr); atomic_dec(&sdev->num_mr); return ERR_PTR(rv); } /* * siw_create_srq() * * Create Shared Receive Queue of attributes @init_attrs * within protection domain given by @pd. * * @base_srq: Base SRQ contained in siw SRQ. * @init_attrs: SRQ init attributes. * @udata: points to user context */ int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *init_attrs, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); struct ib_srq_attr *attrs = &init_attrs->attr; struct siw_device *sdev = to_siw_dev(base_srq->device); struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); int rv; if (init_attrs->srq_type != IB_SRQT_BASIC) return -EOPNOTSUPP; if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); rv = -ENOMEM; goto err_out; } if (attrs->max_wr == 0 || attrs->max_wr > SIW_MAX_SRQ_WR || attrs->max_sge > SIW_MAX_SGE || attrs->srq_limit > attrs->max_wr) { rv = -EINVAL; goto err_out; } srq->max_sge = attrs->max_sge; srq->num_rqe = roundup_pow_of_two(attrs->max_wr); srq->limit = attrs->srq_limit; if (srq->limit) srq->armed = true; srq->is_kernel_res = !udata; if (udata) srq->recvq = vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe)); else srq->recvq = vcalloc(srq->num_rqe, sizeof(struct siw_rqe)); if (srq->recvq == NULL) { rv = -ENOMEM; goto err_out; } if (udata) { struct siw_uresp_create_srq uresp = {}; size_t length = srq->num_rqe * sizeof(struct siw_rqe); srq->srq_entry = siw_mmap_entry_insert(ctx, srq->recvq, length, &uresp.srq_key); if (!srq->srq_entry) { rv = -ENOMEM; goto err_out; } uresp.num_rqe = srq->num_rqe; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; } spin_lock_init(&srq->lock); siw_dbg_pd(base_srq->pd, "[SRQ]: success\n"); return 0; err_out: if (srq->recvq) { if (ctx) rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); } atomic_dec(&sdev->num_srq); return rv; } /* * siw_modify_srq() * * Modify SRQ. The caller may resize SRQ and/or set/reset notification * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification. * * NOTE: it is unclear if RDMA core allows for changing the MAX_SGE * parameter. siw_modify_srq() does not check the attrs->max_sge param. */ int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); unsigned long flags; int rv = 0; spin_lock_irqsave(&srq->lock, flags); if (attr_mask & IB_SRQ_MAX_WR) { /* resize request not yet supported */ rv = -EOPNOTSUPP; goto out; } if (attr_mask & IB_SRQ_LIMIT) { if (attrs->srq_limit) { if (unlikely(attrs->srq_limit > srq->num_rqe)) { rv = -EINVAL; goto out; } srq->armed = true; } else { srq->armed = false; } srq->limit = attrs->srq_limit; } out: spin_unlock_irqrestore(&srq->lock, flags); return rv; } /* * siw_query_srq() * * Query SRQ attributes. */ int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs) { struct siw_srq *srq = to_siw_srq(base_srq); unsigned long flags; spin_lock_irqsave(&srq->lock, flags); attrs->max_wr = srq->num_rqe; attrs->max_sge = srq->max_sge; attrs->srq_limit = srq->limit; spin_unlock_irqrestore(&srq->lock, flags); return 0; } /* * siw_destroy_srq() * * Destroy SRQ. * It is assumed that the SRQ is not referenced by any * QP anymore - the code trusts the RDMA core environment to keep track * of QP references. */ int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); struct siw_device *sdev = to_siw_dev(base_srq->device); struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); if (ctx) rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); atomic_dec(&sdev->num_srq); return 0; } /* * siw_post_srq_recv() * * Post a list of receive queue elements to SRQ. * NOTE: The function does not check or lock a certain SRQ state * during the post operation. The code simply trusts the * RDMA core environment. * * @base_srq: Base SRQ contained in siw SRQ * @wr: List of R-WR's * @bad_wr: Updated to failing WR if posting fails. */ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct siw_srq *srq = to_siw_srq(base_srq); unsigned long flags; int rv = 0; if (unlikely(!srq->is_kernel_res)) { siw_dbg_pd(base_srq->pd, "[SRQ]: no kernel post_recv for mapped srq\n"); rv = -EINVAL; goto out; } /* * Serialize potentially multiple producers. * Also needed to serialize potentially multiple * consumers. */ spin_lock_irqsave(&srq->lock, flags); while (wr) { u32 idx = srq->rq_put % srq->num_rqe; struct siw_rqe *rqe = &srq->recvq[idx]; if (rqe->flags) { siw_dbg_pd(base_srq->pd, "SRQ full\n"); rv = -ENOMEM; break; } if (unlikely(wr->num_sge > srq->max_sge)) { siw_dbg_pd(base_srq->pd, "[SRQ]: too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } rqe->id = wr->wr_id; rqe->num_sge = wr->num_sge; siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); /* Make sure S-RQE is completely written before valid */ smp_wmb(); rqe->flags = SIW_WQE_VALID; srq->rq_put++; wr = wr->next; } spin_unlock_irqrestore(&srq->lock, flags); out: if (unlikely(rv < 0)) { siw_dbg_pd(base_srq->pd, "[SRQ]: error %d\n", rv); *bad_wr = wr; } return rv; } void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype) { struct ib_event event; struct ib_qp *base_qp = &qp->base_qp; /* * Do not report asynchronous errors on QP which gets * destroyed via verbs interface (siw_destroy_qp()) */ if (qp->attrs.flags & SIW_QP_IN_DESTROY) return; event.event = etype; event.device = base_qp->device; event.element.qp = base_qp; if (base_qp->event_handler) { siw_dbg_qp(qp, "reporting event %d\n", etype); base_qp->event_handler(&event, base_qp->qp_context); } } void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype) { struct ib_event event; struct ib_cq *base_cq = &cq->base_cq; event.event = etype; event.device = base_cq->device; event.element.cq = base_cq; if (base_cq->event_handler) { siw_dbg_cq(cq, "reporting CQ event %d\n", etype); base_cq->event_handler(&event, base_cq->cq_context); } } void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype) { struct ib_event event; struct ib_srq *base_srq = &srq->base_srq; event.event = etype; event.device = base_srq->device; event.element.srq = base_srq; if (base_srq->event_handler) { siw_dbg_pd(srq->base_srq.pd, "reporting SRQ event %d\n", etype); base_srq->event_handler(&event, base_srq->srq_context); } } void siw_port_event(struct siw_device *sdev, u32 port, enum ib_event_type etype) { struct ib_event event; event.event = etype; event.device = &sdev->base_dev; event.element.port_num = port; siw_dbg(&sdev->base_dev, "reporting port event %d\n", etype); ib_dispatch_event(&event); }
2 3 3 3 1 2 2 2 3 6 3 2 1 10 10 10 10 17 11 6 2 3 2 3 3 2 3 1 2 8 8 8 2 1 1 1 1 7 1 1 3 1 1 1 2 1 2 2 2 3 3 12 1 1 10 3 3 3 1 1 2 3 3 4 7 1 1 1 1 2 1 2 2 2 3 6 1 1 3 1 1 1 1 1 1 1 4 1 1 2 2 5 1 1 1 1 1 1 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_api.c Packet classifier API. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: * * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/idr.h> #include <linux/jhash.h> #include <linux/rculist.h> #include <linux/rhashtable.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_police.h> #include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_ct.h> #include <net/tc_act/tc_mpls.h> #include <net/tc_act/tc_gate.h> #include <net/flow_offload.h> #include <net/tc_wrapper.h> /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); static struct xarray tcf_exts_miss_cookies_xa; struct tcf_exts_miss_cookie_node { const struct tcf_chain *chain; const struct tcf_proto *tp; const struct tcf_exts *exts; u32 chain_index; u32 tp_prio; u32 handle; u32 miss_cookie_base; struct rcu_head rcu; }; /* Each tc action entry cookie will be comprised of 32bit miss_cookie_base + * action index in the exts tc actions array. */ union tcf_exts_miss_cookie { struct { u32 miss_cookie_base; u32 act_index; }; u64 miss_cookie; }; #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static int tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, u32 handle) { struct tcf_exts_miss_cookie_node *n; static u32 next; int err; if (WARN_ON(!handle || !tp->ops->get_exts)) return -EINVAL; n = kzalloc(sizeof(*n), GFP_KERNEL); if (!n) return -ENOMEM; n->chain_index = tp->chain->index; n->chain = tp->chain; n->tp_prio = tp->prio; n->tp = tp; n->exts = exts; n->handle = handle; err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base, n, xa_limit_32b, &next, GFP_KERNEL); if (err < 0) goto err_xa_alloc; exts->miss_cookie_node = n; return 0; err_xa_alloc: kfree(n); return err; } static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts) { struct tcf_exts_miss_cookie_node *n; if (!exts->miss_cookie_node) return; n = exts->miss_cookie_node; xa_erase(&tcf_exts_miss_cookies_xa, n->miss_cookie_base); kfree_rcu(n, rcu); } static struct tcf_exts_miss_cookie_node * tcf_exts_miss_cookie_lookup(u64 miss_cookie, int *act_index) { union tcf_exts_miss_cookie mc = { .miss_cookie = miss_cookie, }; *act_index = mc.act_index; return xa_load(&tcf_exts_miss_cookies_xa, mc.miss_cookie_base); } #else /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */ static int tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, u32 handle) { return 0; } static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts) { } #endif /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */ static u64 tcf_exts_miss_cookie_get(u32 miss_cookie_base, int act_index) { union tcf_exts_miss_cookie mc = { .act_index = act_index, }; if (!miss_cookie_base) return 0; mc.miss_cookie_base = miss_cookie_base; return mc.miss_cookie; } #ifdef CONFIG_NET_CLS_ACT DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc); EXPORT_SYMBOL(tc_skb_ext_tc); void tc_skb_ext_tc_enable(void) { static_branch_inc(&tc_skb_ext_tc); } EXPORT_SYMBOL(tc_skb_ext_tc_enable); void tc_skb_ext_tc_disable(void) { static_branch_dec(&tc_skb_ext_tc); } EXPORT_SYMBOL(tc_skb_ext_tc_disable); #endif static u32 destroy_obj_hashfn(const struct tcf_proto *tp) { return jhash_3words(tp->chain->index, tp->prio, (__force __u32)tp->protocol, 0); } static void tcf_proto_signal_destroying(struct tcf_chain *chain, struct tcf_proto *tp) { struct tcf_block *block = chain->block; mutex_lock(&block->proto_destroy_lock); hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node, destroy_obj_hashfn(tp)); mutex_unlock(&block->proto_destroy_lock); } static bool tcf_proto_cmp(const struct tcf_proto *tp1, const struct tcf_proto *tp2) { return tp1->chain->index == tp2->chain->index && tp1->prio == tp2->prio && tp1->protocol == tp2->protocol; } static bool tcf_proto_exists_destroying(struct tcf_chain *chain, struct tcf_proto *tp) { u32 hash = destroy_obj_hashfn(tp); struct tcf_proto *iter; bool found = false; rcu_read_lock(); hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter, destroy_ht_node, hash) { if (tcf_proto_cmp(tp, iter)) { found = true; break; } } rcu_read_unlock(); return found; } static void tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp) { struct tcf_block *block = chain->block; mutex_lock(&block->proto_destroy_lock); if (hash_hashed(&tp->destroy_ht_node)) hash_del_rcu(&tp->destroy_ht_node); mutex_unlock(&block->proto_destroy_lock); } /* Find classifier type by string name */ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) { const struct tcf_proto_ops *t, *res = NULL; if (kind) { read_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { if (strcmp(kind, t->kind) == 0) { if (try_module_get(t->owner)) res = t; break; } } read_unlock(&cls_mod_lock); } return res; } static const struct tcf_proto_ops * tcf_proto_lookup_ops(const char *kind, bool rtnl_held, struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; ops = __tcf_proto_lookup_ops(kind); if (ops) return ops; #ifdef CONFIG_MODULES if (rtnl_held) rtnl_unlock(); request_module(NET_CLS_ALIAS_PREFIX "%s", kind); if (rtnl_held) rtnl_lock(); ops = __tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to perform * the module load. So, even if we succeeded in loading * the module we have to replay the request. We indicate * this using -EAGAIN. */ if (ops) { module_put(ops->owner); return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "TC classifier not found"); return ERR_PTR(-ENOENT); } /* Register(unregister) new classifier type */ int register_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; int rc = -EEXIST; write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) if (!strcmp(ops->kind, t->kind)) goto out; list_add_tail(&ops->head, &tcf_proto_base); rc = 0; out: write_unlock(&cls_mod_lock); return rc; } EXPORT_SYMBOL(register_tcf_proto_ops); static struct workqueue_struct *tc_filter_wq; void unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; int rc = -ENOENT; /* Wait for outstanding call_rcu()s, if any, from a * tcf_proto_ops's destroy() handler. */ rcu_barrier(); flush_workqueue(tc_filter_wq); write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { if (t == ops) { list_del(&t->head); rc = 0; break; } } write_unlock(&cls_mod_lock); WARN(rc, "unregister tc filter kind(%s) failed %d\n", ops->kind, rc); } EXPORT_SYMBOL(unregister_tcf_proto_ops); bool tcf_queue_work(struct rcu_work *rwork, work_func_t func) { INIT_RCU_WORK(rwork, func); return queue_rcu_work(tc_filter_wq, rwork); } EXPORT_SYMBOL(tcf_queue_work); /* Select new prio value from the range, managed by kernel. */ static inline u32 tcf_auto_prio(struct tcf_proto *tp) { u32 first = TC_H_MAKE(0xC0000000U, 0U); if (tp) first = tp->prio - 1; return TC_H_MAJ(first); } static bool tcf_proto_check_kind(struct nlattr *kind, char *name) { if (kind) return nla_strscpy(name, kind, IFNAMSIZ) < 0; memset(name, 0, IFNAMSIZ); return false; } static bool tcf_proto_is_unlocked(const char *kind) { const struct tcf_proto_ops *ops; bool ret; if (strlen(kind) == 0) return false; ops = tcf_proto_lookup_ops(kind, false, NULL); /* On error return false to take rtnl lock. Proto lookup/create * functions will perform lookup again and properly handle errors. */ if (IS_ERR(ops)) return false; ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); module_put(ops->owner); return ret; } static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, u32 prio, struct tcf_chain *chain, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_proto *tp; int err; tp = kzalloc(sizeof(*tp), GFP_KERNEL); if (!tp) return ERR_PTR(-ENOBUFS); tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); if (IS_ERR(tp->ops)) { err = PTR_ERR(tp->ops); goto errout; } tp->classify = tp->ops->classify; tp->protocol = protocol; tp->prio = prio; tp->chain = chain; tp->usesw = !tp->ops->reoffload; spin_lock_init(&tp->lock); refcount_set(&tp->refcnt, 1); err = tp->ops->init(tp); if (err) { module_put(tp->ops->owner); goto errout; } return tp; errout: kfree(tp); return ERR_PTR(err); } static void tcf_proto_get(struct tcf_proto *tp) { refcount_inc(&tp->refcnt); } static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add) { #ifdef CONFIG_NET_CLS_ACT struct tcf_block *block = tp->chain->block; bool counted = false; if (!add) { if (tp->usesw && tp->counted) { if (!atomic_dec_return(&block->useswcnt)) static_branch_dec(&tcf_sw_enabled_key); tp->counted = false; } return; } spin_lock(&tp->lock); if (tp->usesw && !tp->counted) { counted = true; tp->counted = true; } spin_unlock(&tp->lock); if (counted && atomic_inc_return(&block->useswcnt) == 1) static_branch_inc(&tcf_sw_enabled_key); #endif } static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); tcf_proto_count_usesw(tp, false); if (sig_destroy) tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { if (refcount_dec_and_test(&tp->refcnt)) tcf_proto_destroy(tp, rtnl_held, true, extack); } static bool tcf_proto_check_delete(struct tcf_proto *tp) { if (tp->ops->delete_empty) return tp->ops->delete_empty(tp); tp->deleting = true; return tp->deleting; } static void tcf_proto_mark_delete(struct tcf_proto *tp) { spin_lock(&tp->lock); tp->deleting = true; spin_unlock(&tp->lock); } static bool tcf_proto_is_deleting(struct tcf_proto *tp) { bool deleting; spin_lock(&tp->lock); deleting = tp->deleting; spin_unlock(&tp->lock); return deleting; } #define ASSERT_BLOCK_LOCKED(block) \ lockdep_assert_held(&(block)->lock) struct tcf_filter_chain_list_item { struct list_head list; tcf_chain_head_change_t *chain_head_change; void *chain_head_change_priv; }; static struct tcf_chain *tcf_chain_create(struct tcf_block *block, u32 chain_index) { struct tcf_chain *chain; ASSERT_BLOCK_LOCKED(block); chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; list_add_tail_rcu(&chain->list, &block->chain_list); mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; chain->refcnt = 1; if (!chain->index) block->chain0.chain = chain; return chain; } static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item, struct tcf_proto *tp_head) { if (item->chain_head_change) item->chain_head_change(tp_head, item->chain_head_change_priv); } static void tcf_chain0_head_change(struct tcf_chain *chain, struct tcf_proto *tp_head) { struct tcf_filter_chain_list_item *item; struct tcf_block *block = chain->block; if (chain->index) return; mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) tcf_chain_head_change_item(item, tp_head); mutex_unlock(&block->lock); } /* Returns true if block can be safely freed. */ static bool tcf_chain_detach(struct tcf_chain *chain) { struct tcf_block *block = chain->block; ASSERT_BLOCK_LOCKED(block); list_del_rcu(&chain->list); if (!chain->index) block->chain0.chain = NULL; if (list_empty(&block->chain_list) && refcount_read(&block->refcnt) == 0) return true; return false; } static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); mutex_destroy(&block->proto_destroy_lock); xa_destroy(&block->ports); kfree_rcu(block, rcu); } static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) { struct tcf_block *block = chain->block; mutex_destroy(&chain->filter_chain_lock); kfree_rcu(chain, rcu); if (free_block) tcf_block_destroy(block); } static void tcf_chain_hold(struct tcf_chain *chain) { ASSERT_BLOCK_LOCKED(chain->block); ++chain->refcnt; } static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) { ASSERT_BLOCK_LOCKED(chain->block); /* In case all the references are action references, this * chain should not be shown to the user. */ return chain->refcnt == chain->action_refcnt; } static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, u32 chain_index) { struct tcf_chain *chain; ASSERT_BLOCK_LOCKED(block); list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; } return NULL; } #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block, u32 chain_index) { struct tcf_chain *chain; list_for_each_entry_rcu(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; } return NULL; } #endif static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, u32 seq, u16 flags, int event, bool unicast, struct netlink_ext_ack *extack); static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create, bool by_act) { struct tcf_chain *chain = NULL; bool is_first_reference; mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (chain) { tcf_chain_hold(chain); } else { if (!create) goto errout; chain = tcf_chain_create(block, chain_index); if (!chain) goto errout; } if (by_act) ++chain->action_refcnt; is_first_reference = chain->refcnt - chain->action_refcnt == 1; mutex_unlock(&block->lock); /* Send notification only in case we got the first * non-action reference. Until then, the chain acts only as * a placeholder for actions pointing to it and user ought * not know about them. */ if (is_first_reference && !by_act) tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false, NULL); return chain; errout: mutex_unlock(&block->lock); return chain; } static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create) { return __tcf_chain_get(block, chain_index, create, false); } struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) { return __tcf_chain_get(block, chain_index, true, true); } EXPORT_SYMBOL(tcf_chain_get_by_act); static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv); static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, u32 seq, u16 flags); static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, bool explicitly_created) { struct tcf_block *block = chain->block; const struct tcf_proto_ops *tmplt_ops; unsigned int refcnt, non_act_refcnt; bool free_block = false; void *tmplt_priv; mutex_lock(&block->lock); if (explicitly_created) { if (!chain->explicitly_created) { mutex_unlock(&block->lock); return; } chain->explicitly_created = false; } if (by_act) chain->action_refcnt--; /* tc_chain_notify_delete can't be called while holding block lock. * However, when block is unlocked chain can be changed concurrently, so * save these to temporary variables. */ refcnt = --chain->refcnt; non_act_refcnt = refcnt - chain->action_refcnt; tmplt_ops = chain->tmplt_ops; tmplt_priv = chain->tmplt_priv; if (non_act_refcnt == chain->explicitly_created && !by_act) { if (non_act_refcnt == 0) tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, block, NULL, 0, 0); /* Last reference to chain, no need to lock. */ chain->flushing = false; } if (refcnt == 0) free_block = tcf_chain_detach(chain); mutex_unlock(&block->lock); if (refcnt == 0) { tc_chain_tmplt_del(tmplt_ops, tmplt_priv); tcf_chain_destroy(chain, free_block); } } static void tcf_chain_put(struct tcf_chain *chain) { __tcf_chain_put(chain, false, false); } void tcf_chain_put_by_act(struct tcf_chain *chain) { __tcf_chain_put(chain, true, false); } EXPORT_SYMBOL(tcf_chain_put_by_act); static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) { __tcf_chain_put(chain, false, true); } static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) { struct tcf_proto *tp, *tp_next; mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_dereference(chain->filter_chain, chain); while (tp) { tp_next = rcu_dereference_protected(tp->next, 1); tcf_proto_signal_destroying(chain, tp); tp = tp_next; } tp = tcf_chain_dereference(chain->filter_chain, chain); RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); chain->flushing = true; mutex_unlock(&chain->filter_chain_lock); while (tp) { tp_next = rcu_dereference_protected(tp->next, 1); tcf_proto_put(tp, rtnl_held, NULL); tp = tp_next; } } static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); static void tcf_block_offload_init(struct flow_block_offload *bo, struct net_device *dev, struct Qdisc *sch, enum flow_block_command command, enum flow_block_binder_type binder_type, struct flow_block *flow_block, bool shared, struct netlink_ext_ack *extack) { bo->net = dev_net(dev); bo->command = command; bo->binder_type = binder_type; bo->block = flow_block; bo->block_shared = shared; bo->extack = extack; bo->sch = sch; bo->cb_list_head = &flow_block->cb_list; INIT_LIST_HEAD(&bo->cb_list); } static void tcf_block_unbind(struct tcf_block *block, struct flow_block_offload *bo); static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) { struct tcf_block *block = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; struct Qdisc *sch = block_cb->indr.sch; struct netlink_ext_ack extack = {}; struct flow_block_offload bo = {}; tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, block_cb->indr.binder_type, &block->flow_block, tcf_block_shared(block), &extack); rtnl_lock(); down_write(&block->cb_lock); list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); tcf_block_unbind(block, &bo); up_write(&block->cb_lock); rtnl_unlock(); } static bool tcf_block_offload_in_use(struct tcf_block *block) { return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, struct net_device *dev, struct Qdisc *sch, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { struct flow_block_offload bo = {}; tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type, &block->flow_block, tcf_block_shared(block), extack); if (dev->netdev_ops->ndo_setup_tc) { int err; err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); if (err < 0) { if (err != -EOPNOTSUPP) NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); return err; } return tcf_block_setup(block, &bo); } flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo, tc_block_indr_cleanup); tcf_block_setup(block, &bo); return -EOPNOTSUPP; } static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct net_device *dev = q->dev_queue->dev; int err; down_write(&block->cb_lock); /* If tc offload feature is disabled and the block we try to bind * to already has some offloaded filters, forbid to bind. */ if (dev->netdev_ops->ndo_setup_tc && !tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); err = -EOPNOTSUPP; goto err_unlock; } err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) goto err_unlock; up_write(&block->cb_lock); return 0; no_offload_dev_inc: if (tcf_block_offload_in_use(block)) goto err_unlock; err = 0; block->nooffloaddevcnt++; err_unlock: up_write(&block->cb_lock); return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct net_device *dev = q->dev_queue->dev; int err; down_write(&block->cb_lock); err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); up_write(&block->cb_lock); } static int tcf_chain0_head_change_cb_add(struct tcf_block *block, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct tcf_filter_chain_list_item *item; struct tcf_chain *chain0; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) { NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed"); return -ENOMEM; } item->chain_head_change = ei->chain_head_change; item->chain_head_change_priv = ei->chain_head_change_priv; mutex_lock(&block->lock); chain0 = block->chain0.chain; if (chain0) tcf_chain_hold(chain0); else list_add(&item->list, &block->chain0.filter_chain_list); mutex_unlock(&block->lock); if (chain0) { struct tcf_proto *tp_head; mutex_lock(&chain0->filter_chain_lock); tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); if (tp_head) tcf_chain_head_change_item(item, tp_head); mutex_lock(&block->lock); list_add(&item->list, &block->chain0.filter_chain_list); mutex_unlock(&block->lock); mutex_unlock(&chain0->filter_chain_lock); tcf_chain_put(chain0); } return 0; } static void tcf_chain0_head_change_cb_del(struct tcf_block *block, struct tcf_block_ext_info *ei) { struct tcf_filter_chain_list_item *item; mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) { if ((!ei->chain_head_change && !ei->chain_head_change_priv) || (item->chain_head_change == ei->chain_head_change && item->chain_head_change_priv == ei->chain_head_change_priv)) { if (block->chain0.chain) tcf_chain_head_change_item(item, NULL); list_del(&item->list); mutex_unlock(&block->lock); kfree(item); return; } } mutex_unlock(&block->lock); WARN_ON(1); } struct tcf_net { spinlock_t idr_lock; /* Protects idr */ struct idr idr; }; static unsigned int tcf_net_id; static int tcf_block_insert(struct tcf_block *block, struct net *net, struct netlink_ext_ack *extack) { struct tcf_net *tn = net_generic(net, tcf_net_id); int err; idr_preload(GFP_KERNEL); spin_lock(&tn->idr_lock); err = idr_alloc_u32(&tn->idr, block, &block->index, block->index, GFP_NOWAIT); spin_unlock(&tn->idr_lock); idr_preload_end(); return err; } static void tcf_block_remove(struct tcf_block *block, struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); spin_lock(&tn->idr_lock); idr_remove(&tn->idr, block->index); spin_unlock(&tn->idr_lock); } static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, u32 block_index, struct netlink_ext_ack *extack) { struct tcf_block *block; block = kzalloc(sizeof(*block), GFP_KERNEL); if (!block) { NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); mutex_init(&block->proto_destroy_lock); init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); INIT_LIST_HEAD(&block->chain0.filter_chain_list); refcount_set(&block->refcnt, 1); block->net = net; block->index = block_index; xa_init(&block->ports); /* Don't store q pointer for blocks which are shared */ if (!tcf_block_shared(block)) block->q = q; return block; } struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) { struct tcf_net *tn = net_generic(net, tcf_net_id); return idr_find(&tn->idr, block_index); } EXPORT_SYMBOL(tcf_block_lookup); static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) { struct tcf_block *block; rcu_read_lock(); block = tcf_block_lookup(net, block_index); if (block && !refcount_inc_not_zero(&block->refcnt)) block = NULL; rcu_read_unlock(); return block; } static struct tcf_chain * __tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { mutex_lock(&block->lock); if (chain) chain = list_is_last(&chain->list, &block->chain_list) ? NULL : list_next_entry(chain, list); else chain = list_first_entry_or_null(&block->chain_list, struct tcf_chain, list); /* skip all action-only chains */ while (chain && tcf_chain_held_by_acts_only(chain)) chain = list_is_last(&chain->list, &block->chain_list) ? NULL : list_next_entry(chain, list); if (chain) tcf_chain_hold(chain); mutex_unlock(&block->lock); return chain; } /* Function to be used by all clients that want to iterate over all chains on * block. It properly obtains block->lock and takes reference to chain before * returning it. Users of this function must be tolerant to concurrent chain * insertion/deletion or ensure that no concurrent chain modification is * possible. Note that all netlink dump callbacks cannot guarantee to provide * consistent dump because rtnl lock is released each time skb is filled with * data and sent to user-space. */ struct tcf_chain * tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); if (chain) tcf_chain_put(chain); return chain_next; } EXPORT_SYMBOL(tcf_get_next_chain); static struct tcf_proto * __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { u32 prio = 0; ASSERT_RTNL(); mutex_lock(&chain->filter_chain_lock); if (!tp) { tp = tcf_chain_dereference(chain->filter_chain, chain); } else if (tcf_proto_is_deleting(tp)) { /* 'deleting' flag is set and chain->filter_chain_lock was * unlocked, which means next pointer could be invalid. Restart * search. */ prio = tp->prio + 1; tp = tcf_chain_dereference(chain->filter_chain, chain); for (; tp; tp = tcf_chain_dereference(tp->next, chain)) if (!tp->deleting && tp->prio >= prio) break; } else { tp = tcf_chain_dereference(tp->next, chain); } if (tp) tcf_proto_get(tp); mutex_unlock(&chain->filter_chain_lock); return tp; } /* Function to be used by all clients that want to iterate over all tp's on * chain. Users of this function must be tolerant to concurrent tp * insertion/deletion or ensure that no concurrent chain modification is * possible. Note that all netlink dump callbacks cannot guarantee to provide * consistent dump because rtnl lock is released each time skb is filled with * data and sent to user-space. */ struct tcf_proto * tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); if (tp) tcf_proto_put(tp, true, NULL); return tp_next; } EXPORT_SYMBOL(tcf_get_next_proto); static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) { struct tcf_chain *chain; /* Last reference to block. At this point chains cannot be added or * removed concurrently. */ for (chain = tcf_get_next_chain(block, NULL); chain; chain = tcf_get_next_chain(block, chain)) { tcf_chain_put_explicitly_created(chain); tcf_chain_flush(chain, rtnl_held); } } /* Lookup Qdisc and increments its reference counter. * Set parent, if necessary. */ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, u32 *parent, int ifindex, bool rtnl_held, struct netlink_ext_ack *extack) { const struct Qdisc_class_ops *cops; struct net_device *dev; int err = 0; if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) return 0; rcu_read_lock(); /* Find link */ dev = dev_get_by_index_rcu(net, ifindex); if (!dev) { rcu_read_unlock(); return -ENODEV; } /* Find qdisc */ if (!*parent) { *q = rcu_dereference(dev->qdisc); *parent = (*q)->handle; } else { *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); err = -EINVAL; goto errout_rcu; } } *q = qdisc_refcount_inc_nz(*q); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); err = -EINVAL; goto errout_rcu; } /* Is it classful? */ cops = (*q)->ops->cl_ops; if (!cops) { NL_SET_ERR_MSG(extack, "Qdisc not classful"); err = -EINVAL; goto errout_qdisc; } if (!cops->tcf_block) { NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); err = -EOPNOTSUPP; goto errout_qdisc; } errout_rcu: /* At this point we know that qdisc is not noop_qdisc, * which means that qdisc holds a reference to net_device * and we hold a reference to qdisc, so it is safe to release * rcu read lock. */ rcu_read_unlock(); return err; errout_qdisc: rcu_read_unlock(); if (rtnl_held) qdisc_put(*q); else qdisc_put_unlocked(*q); *q = NULL; return err; } static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, int ifindex, struct netlink_ext_ack *extack) { if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) return 0; /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { const struct Qdisc_class_ops *cops = q->ops->cl_ops; *cl = cops->find(q, parent); if (*cl == 0) { NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); return -ENOENT; } } return 0; } static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, unsigned long cl, int ifindex, u32 block_index, struct netlink_ext_ack *extack) { struct tcf_block *block; if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_refcnt_get(net, block_index); if (!block) { NL_SET_ERR_MSG(extack, "Block of given index was not found"); return ERR_PTR(-EINVAL); } } else { const struct Qdisc_class_ops *cops = q->ops->cl_ops; block = cops->tcf_block(q, cl, extack); if (!block) return ERR_PTR(-EINVAL); if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); return ERR_PTR(-EOPNOTSUPP); } /* Always take reference to block in order to support execution * of rules update path of cls API without rtnl lock. Caller * must release block when it is finished using it. 'if' block * of this conditional obtain reference to block by calling * tcf_block_refcnt_get(). */ refcount_inc(&block->refcnt); } return block; } static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, bool rtnl_held) { if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { /* Flushing/putting all chains will cause the block to be * deallocated when last chain is freed. However, if chain_list * is empty, block has to be manually deallocated. After block * reference counter reached 0, it is no longer possible to * increment it or add new chains to block. */ bool free_block = list_empty(&block->chain_list); mutex_unlock(&block->lock); if (tcf_block_shared(block)) tcf_block_remove(block, block->net); if (q) tcf_block_offload_unbind(block, q, ei); if (free_block) tcf_block_destroy(block); else tcf_block_flush_all_chains(block, rtnl_held); } else if (q) { tcf_block_offload_unbind(block, q, ei); } } static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) { __tcf_block_put(block, NULL, NULL, rtnl_held); } /* Find tcf block. * Set q, parent, cl when appropriate. */ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, u32 *parent, unsigned long *cl, int ifindex, u32 block_index, struct netlink_ext_ack *extack) { struct tcf_block *block; int err = 0; ASSERT_RTNL(); err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); if (err) goto errout; err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); if (err) goto errout_qdisc; block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout_qdisc; } return block; errout_qdisc: if (*q) qdisc_put(*q); errout: *q = NULL; return ERR_PTR(err); } static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, bool rtnl_held) { if (!IS_ERR_OR_NULL(block)) tcf_block_refcnt_put(block, rtnl_held); if (q) { if (rtnl_held) qdisc_put(q); else qdisc_put_unlocked(q); } } struct tcf_block_owner_item { struct list_head list; struct Qdisc *q; enum flow_block_binder_type binder_type; }; static void tcf_block_owner_netif_keep_dst(struct tcf_block *block, struct Qdisc *q, enum flow_block_binder_type binder_type) { if (block->keep_dst && binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) netif_keep_dst(qdisc_dev(q)); } void tcf_block_netif_keep_dst(struct tcf_block *block) { struct tcf_block_owner_item *item; block->keep_dst = true; list_for_each_entry(item, &block->owner_list, list) tcf_block_owner_netif_keep_dst(block, item->q, item->binder_type); } EXPORT_SYMBOL(tcf_block_netif_keep_dst); static int tcf_block_owner_add(struct tcf_block *block, struct Qdisc *q, enum flow_block_binder_type binder_type) { struct tcf_block_owner_item *item; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) return -ENOMEM; item->q = q; item->binder_type = binder_type; list_add(&item->list, &block->owner_list); return 0; } static void tcf_block_owner_del(struct tcf_block *block, struct Qdisc *q, enum flow_block_binder_type binder_type) { struct tcf_block_owner_item *item; list_for_each_entry(item, &block->owner_list, list) { if (item->q == q && item->binder_type == binder_type) { list_del(&item->list); kfree(item); return; } } WARN_ON(1); } static bool tcf_block_tracks_dev(struct tcf_block *block, struct tcf_block_ext_info *ei) { return tcf_block_shared(block) && (ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS || ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS); } int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(q); struct net *net = qdisc_net(q); struct tcf_block *block = NULL; int err; if (ei->block_index) /* block_index not 0 means the shared block is requested */ block = tcf_block_refcnt_get(net, ei->block_index); if (!block) { block = tcf_block_create(net, q, ei->block_index, extack); if (IS_ERR(block)) return PTR_ERR(block); if (tcf_block_shared(block)) { err = tcf_block_insert(block, net, extack); if (err) goto err_block_insert; } } err = tcf_block_owner_add(block, q, ei->binder_type); if (err) goto err_block_owner_add; tcf_block_owner_netif_keep_dst(block, q, ei->binder_type); err = tcf_chain0_head_change_cb_add(block, ei, extack); if (err) goto err_chain0_head_change_cb_add; err = tcf_block_offload_bind(block, q, ei, extack); if (err) goto err_block_offload_bind; if (tcf_block_tracks_dev(block, ei)) { err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); if (err) { NL_SET_ERR_MSG(extack, "block dev insert failed"); goto err_dev_insert; } } *p_block = block; return 0; err_dev_insert: tcf_block_offload_unbind(block, q, ei); err_block_offload_bind: tcf_chain0_head_change_cb_del(block, ei); err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: err_block_insert: tcf_block_refcnt_put(block, true); return err; } EXPORT_SYMBOL(tcf_block_get_ext); static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) { struct tcf_proto __rcu **p_filter_chain = priv; rcu_assign_pointer(*p_filter_chain, tp_head); } int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, struct netlink_ext_ack *extack) { struct tcf_block_ext_info ei = { .chain_head_change = tcf_chain_head_change_dflt, .chain_head_change_priv = p_filter_chain, }; WARN_ON(!p_filter_chain); return tcf_block_get_ext(p_block, q, &ei, extack); } EXPORT_SYMBOL(tcf_block_get); /* XXX: Standalone actions are not allowed to jump to any chain, and bound * actions should be all removed after flushing. */ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct net_device *dev = qdisc_dev(q); if (!block) return; if (tcf_block_tracks_dev(block, ei)) xa_erase(&block->ports, dev->ifindex); tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); __tcf_block_put(block, q, ei, true); } EXPORT_SYMBOL(tcf_block_put_ext); void tcf_block_put(struct tcf_block *block) { struct tcf_block_ext_info ei = {0, }; if (!block) return; tcf_block_put_ext(block, block->q, &ei); } EXPORT_SYMBOL(tcf_block_put); static int tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, void *cb_priv, bool add, bool offload_in_use, struct netlink_ext_ack *extack) { struct tcf_chain *chain, *chain_prev; struct tcf_proto *tp, *tp_prev; int err; lockdep_assert_held(&block->cb_lock); for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, chain = __tcf_get_next_chain(block, chain), tcf_chain_put(chain_prev)) { if (chain->tmplt_ops && add) chain->tmplt_ops->tmplt_reoffload(chain, true, cb, cb_priv); for (tp = __tcf_get_next_proto(chain, NULL); tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), tcf_proto_put(tp_prev, true, NULL)) { if (tp->ops->reoffload) { err = tp->ops->reoffload(tp, add, cb, cb_priv, extack); if (err && add) goto err_playback_remove; } else if (add && offload_in_use) { err = -EOPNOTSUPP; NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support"); goto err_playback_remove; } } if (chain->tmplt_ops && !add) chain->tmplt_ops->tmplt_reoffload(chain, false, cb, cb_priv); } return 0; err_playback_remove: tcf_proto_put(tp, true, NULL); tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); return err; } static int tcf_block_bind(struct tcf_block *block, struct flow_block_offload *bo) { struct flow_block_cb *block_cb, *next; int err, i = 0; lockdep_assert_held(&block->cb_lock); list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, tcf_block_offload_in_use(block), bo->extack); if (err) goto err_unroll; if (!bo->unlocked_driver_cb) block->lockeddevcnt++; i++; } list_splice(&bo->cb_list, &block->flow_block.cb_list); return 0; err_unroll: list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { list_del(&block_cb->driver_list); if (i-- > 0) { list_del(&block_cb->list); tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); if (!bo->unlocked_driver_cb) block->lockeddevcnt--; } flow_block_cb_free(block_cb); } return err; } static void tcf_block_unbind(struct tcf_block *block, struct flow_block_offload *bo) { struct flow_block_cb *block_cb, *next; lockdep_assert_held(&block->cb_lock); list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); if (!bo->unlocked_driver_cb) block->lockeddevcnt--; } } static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo) { int err; switch (bo->command) { case FLOW_BLOCK_BIND: err = tcf_block_bind(block, bo); break; case FLOW_BLOCK_UNBIND: err = 0; tcf_block_unbind(block, bo); break; default: WARN_ON_ONCE(1); err = -EOPNOTSUPP; } return err; } /* Main classifier routine: scans classifier chain attached * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ static inline int __tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, const struct tcf_proto *orig_tp, struct tcf_result *res, bool compat_mode, struct tcf_exts_miss_cookie_node *n, int act_index, u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; int limit = 0; reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { __be16 protocol = skb_protocol(skb, false); int err = 0; if (n) { struct tcf_exts *exts; if (n->tp_prio != tp->prio) continue; /* We re-lookup the tp and chain based on index instead * of having hard refs and locks to them, so do a sanity * check if any of tp,chain,exts was replaced by the * time we got here with a cookie from hardware. */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || !tp->ops->get_exts)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } exts = tp->ops->get_exts(tp, n->handle); if (unlikely(!exts || n->exts != exts)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } n = NULL; err = tcf_exts_exec_ex(skb, exts, act_index, res); } else { if (tp->protocol != protocol && tp->protocol != htons(ETH_P_ALL)) continue; err = tc_classify(skb, tp, res); } #ifdef CONFIG_NET_CLS_ACT if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { first_tp = orig_tp; *last_executed_chain = first_tp->chain->index; goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; *last_executed_chain = err & TC_ACT_EXT_VAL_MASK; goto reset; } #endif if (err >= 0) return err; } if (unlikely(n)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT reset: if (unlikely(limit++ >= max_reclassify_loop)) { net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n", tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP); return TC_ACT_SHOT; } tp = first_tp; goto reclassify; #endif } int tcf_classify(struct sk_buff *skb, const struct tcf_block *block, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 last_executed_chain = 0; return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0, &last_executed_chain); #else u32 last_executed_chain = tp ? tp->chain->index : 0; struct tcf_exts_miss_cookie_node *n = NULL; const struct tcf_proto *orig_tp = tp; struct tc_skb_ext *ext; int act_index = 0; int ret; if (block) { ext = skb_ext_find(skb, TC_SKB_EXT); if (ext && (ext->chain || ext->act_miss)) { struct tcf_chain *fchain; u32 chain; if (ext->act_miss) { n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); if (!n) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } chain = n->chain_index; } else { chain = ext->chain; } fchain = tcf_chain_lookup_rcu(block, chain); if (!fchain) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } /* Consume, so cloned/redirect skbs won't inherit ext */ skb_ext_del(skb, TC_SKB_EXT); tp = rcu_dereference_bh(fchain->filter_chain); last_executed_chain = fchain->index; } } ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index, &last_executed_chain); if (tc_skb_ext_tc_enabled()) { /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; ext->post_ct_snat = cb->post_ct_snat; ext->post_ct_dnat = cb->post_ct_dnat; ext->zone = cb->zone; } } return ret; #endif } EXPORT_SYMBOL(tcf_classify); struct tcf_chain_info { struct tcf_proto __rcu **pprev; struct tcf_proto __rcu *next; }; static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, struct tcf_chain_info *chain_info) { return tcf_chain_dereference(*chain_info->pprev, chain); } static int tcf_chain_tp_insert(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { if (chain->flushing) return -EAGAIN; RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); rcu_assign_pointer(*chain_info->pprev, tp); return 0; } static void tcf_chain_tp_remove(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); tcf_proto_mark_delete(tp); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, struct tcf_chain_info *chain_info, u32 protocol, u32 prio, bool prio_allocate, struct netlink_ext_ack *extack); /* Try to insert new proto. * If proto with specified priority already exists, free new proto * and return existing one. */ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, struct tcf_proto *tp_new, u32 protocol, u32 prio, bool rtnl_held) { struct tcf_chain_info chain_info; struct tcf_proto *tp; int err = 0; mutex_lock(&chain->filter_chain_lock); if (tcf_proto_exists_destroying(chain, tp_new)) { mutex_unlock(&chain->filter_chain_lock); tcf_proto_destroy(tp_new, rtnl_held, false, NULL); return ERR_PTR(-EAGAIN); } tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, NULL); if (!tp) err = tcf_chain_tp_insert(chain, &chain_info, tp_new); mutex_unlock(&chain->filter_chain_lock); if (tp) { tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = tp; } else if (err) { tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = ERR_PTR(err); } return tp_new; } static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_chain_info chain_info; struct tcf_proto *tp_iter; struct tcf_proto **pprev; struct tcf_proto *next; mutex_lock(&chain->filter_chain_lock); /* Atomically find and remove tp from chain. */ for (pprev = &chain->filter_chain; (tp_iter = tcf_chain_dereference(*pprev, chain)); pprev = &tp_iter->next) { if (tp_iter == tp) { chain_info.pprev = pprev; chain_info.next = tp_iter->next; WARN_ON(tp_iter->deleting); break; } } /* Verify that tp still exists and no new filters were inserted * concurrently. * Mark tp for deletion if it is empty. */ if (!tp_iter || !tcf_proto_check_delete(tp)) { mutex_unlock(&chain->filter_chain_lock); return; } tcf_proto_signal_destroying(chain, tp); next = tcf_chain_dereference(chain_info.next, chain); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info.pprev, next); mutex_unlock(&chain->filter_chain_lock); tcf_proto_put(tp, rtnl_held, extack); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, struct tcf_chain_info *chain_info, u32 protocol, u32 prio, bool prio_allocate, struct netlink_ext_ack *extack) { struct tcf_proto **pprev; struct tcf_proto *tp; /* Check the chain for existence of proto-tcf with this priority */ for (pprev = &chain->filter_chain; (tp = tcf_chain_dereference(*pprev, chain)); pprev = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (prio_allocate) { NL_SET_ERR_MSG(extack, "Lowest ID from auto-alloc range already in use"); return ERR_PTR(-ENOSPC); } if (tp->protocol != protocol && protocol) { NL_SET_ERR_MSG(extack, "Protocol mismatch for filter with specified priority"); return ERR_PTR(-EINVAL); } } else { tp = NULL; } break; } } chain_info->pprev = pprev; if (tp) { chain_info->next = tp->next; tcf_proto_get(tp); } else { chain_info->next = NULL; } return tp; } static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, u32 portid, u32 seq, u16 flags, int event, bool terse_dump, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); int ret = -EMSGSIZE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; if (q) { tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = parent; } else { tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; tcm->tcm_block_index = block->index; } tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) goto nla_put_failure; if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) goto nla_put_failure; if (!fh) { tcm->tcm_handle = 0; } else if (terse_dump) { if (tp->ops->terse_dump) { if (tp->ops->terse_dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } else { goto cls_op_not_supp; } } else { if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; cls_op_not_supp: ret = -EOPNOTSUPP; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return ret; } static struct sk_buff *tfilter_notify_prep(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, int event, u32 portid, bool rtnl_held, struct netlink_ext_ack *extack) { unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE; struct sk_buff *skb; int ret; retry: skb = alloc_skb(size, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, n->nlmsg_seq, n->nlmsg_flags, event, false, rtnl_held, extack); if (ret <= 0) { kfree_skb(skb); if (ret == -EMSGSIZE) { size += NLMSG_GOODSIZE; goto retry; } return ERR_PTR(-EINVAL); } return skb; } static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, int event, bool unicast, bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err = 0; if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return 0; skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event, portid, rtnl_held, extack); if (IS_ERR(skb)) return PTR_ERR(skb); if (unicast) err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); return err; } static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return tp->ops->delete(tp, fh, last, rtnl_held, extack); skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, RTM_DELTFILTER, portid, rtnl_held, extack); if (IS_ERR(skb)) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); return PTR_ERR(skb); } err = tp->ops->delete(tp, fh, last, rtnl_held, extack); if (err) { kfree_skb(skb); return err; } err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); return err; } static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct tcf_block *block, struct Qdisc *q, u32 parent, struct nlmsghdr *n, struct tcf_chain *chain, int event, struct netlink_ext_ack *extack) { struct tcf_proto *tp; for (tp = tcf_get_next_proto(chain, NULL); tp; tp = tcf_get_next_proto(chain, tp)) tfilter_notify(net, oskb, n, tp, block, q, parent, NULL, event, false, true, extack); } static void tfilter_put(struct tcf_proto *tp, void *fh) { if (tp->ops->put && fh) tp->ops->put(tp, fh); } static bool is_qdisc_ingress(__u32 classid) { return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS)); } static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; bool prio_allocate; u32 parent; u32 chain_index; struct Qdisc *q; struct tcf_chain_info chain_info; struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; void *fh; int err; int tp_created; bool rtnl_held = false; u32 flags; replay: tp_created = 0; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); prio_allocate = false; parent = t->tcm_parent; tp = NULL; cl = 0; block = NULL; q = NULL; chain = NULL; flags = 0; if (prio == 0) { /* If no priority is provided by the user, * we allocate one. */ if (n->nlmsg_flags & NLM_F_CREATE) { prio = TC_H_MAKE(0x80000000U, 0U); prio_allocate = true; } else { NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); return -ENOENT; } } /* Find head of filter chain. */ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); if (err) return err; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); err = -EINVAL; goto errout; } /* Take rtnl mutex if rtnl_held was set to true on previous iteration, * block is shared (no qdisc found), qdisc is not unlocked, classifier * type is not specified, classifier is not unlocked. */ if (rtnl_held || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); if (err) goto errout; block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; } block->classid = parent; chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, true); if (!chain) { NL_SET_ERR_MSG(extack, "Cannot create specified filter chain"); err = -ENOMEM; goto errout; } mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate, extack); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout_locked; } if (tp == NULL) { struct tcf_proto *tp_new = NULL; if (chain->flushing) { err = -EAGAIN; goto errout_locked; } /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; goto errout_locked; } if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout_locked; } if (prio_allocate) prio = tcf_auto_prio(tcf_chain_tp_prev(chain, &chain_info)); mutex_unlock(&chain->filter_chain_lock); tp_new = tcf_proto_create(name, protocol, prio, chain, rtnl_held, extack); if (IS_ERR(tp_new)) { err = PTR_ERR(tp_new); goto errout_tp; } tp_created = 1; tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, rtnl_held); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout_tp; } } else { mutex_unlock(&chain->filter_chain_lock); } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; } fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; } } else if (n->nlmsg_flags & NLM_F_EXCL) { tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; } if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); err = -EINVAL; goto errout; } if (!(n->nlmsg_flags & NLM_F_CREATE)) flags |= TCA_ACT_FLAGS_REPLACE; if (!rtnl_held) flags |= TCA_ACT_FLAGS_NO_RTNL; if (is_qdisc_ingress(parent)) flags |= TCA_ACT_FLAGS_AT_INGRESS; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack); if (err == 0) { tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held, extack); tfilter_put(tp, fh); tcf_proto_count_usesw(tp, true); /* q pointer is NULL for shared blocks */ if (q) q->flags &= ~TCQ_F_CAN_BYPASS; } errout: if (err && tp_created) tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); errout_tp: if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, rtnl_held, NULL); if (!tp_created) tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); if (rtnl_held) rtnl_unlock(); if (err == -EAGAIN) { /* Take rtnl lock in case EAGAIN is caused by concurrent flush * of target chain. */ rtnl_held = true; /* Replay the request. */ goto replay; } return err; errout_locked: mutex_unlock(&chain->filter_chain_lock); goto errout; } static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; u32 parent; u32 chain_index; struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; bool rtnl_held = false; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); parent = t->tcm_parent; if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) { NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); return -ENOENT; } /* Find head of filter chain. */ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); if (err) return err; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); err = -EINVAL; goto errout; } /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc * found), qdisc is not unlocked, classifier type is not specified, * classifier is not unlocked. */ if (!prio || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); if (err) goto errout; block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; } chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, false); if (!chain) { /* User requested flush on non-existent chain. Nothing to do, * so just return success. */ if (prio == 0) { err = 0; goto errout; } NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -ENOENT; goto errout; } if (prio == 0) { tfilter_notify_chain(net, skb, block, q, parent, n, chain, RTM_DELTFILTER, extack); tcf_chain_flush(chain, rtnl_held); err = 0; goto errout; } mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, extack); if (!tp) { err = -ENOENT; NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); goto errout_locked; } else if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout_locked; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout_locked; } else if (t->tcm_handle == 0) { tcf_proto_signal_destroying(chain, tp); tcf_chain_tp_remove(chain, &chain_info, tp); mutex_unlock(&chain->filter_chain_lock); tcf_proto_put(tp, rtnl_held, NULL); tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_DELTFILTER, false, rtnl_held, extack); err = 0; goto errout; } mutex_unlock(&chain->filter_chain_lock); fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { NL_SET_ERR_MSG(extack, "Specified filter handle not found"); err = -ENOENT; } else { bool last; err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, &last, rtnl_held, extack); if (err) goto errout; if (last) tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); } errout: if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); if (rtnl_held) rtnl_unlock(); return err; errout_locked: mutex_unlock(&chain->filter_chain_lock); goto errout; } static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; u32 parent; u32 chain_index; struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; bool rtnl_held = false; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); parent = t->tcm_parent; if (prio == 0) { NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); return -ENOENT; } /* Find head of filter chain. */ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); if (err) return err; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); err = -EINVAL; goto errout; } /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not * unlocked, classifier type is not specified, classifier is not * unlocked. */ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); if (err) goto errout; block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; } chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, false); if (!chain) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; goto errout; } mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, extack); mutex_unlock(&chain->filter_chain_lock); if (!tp) { err = -ENOENT; NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); goto errout; } else if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; } fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { NL_SET_ERR_MSG(extack, "Specified filter handle not found"); err = -ENOENT; } else { err = tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, true, rtnl_held, NULL); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); } tfilter_put(tp, fh); errout: if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); if (rtnl_held) rtnl_unlock(); return err; } struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; struct netlink_callback *cb; struct tcf_block *block; struct Qdisc *q; u32 parent; bool terse_dump; }; static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_dump_args *a = (void *)arg; struct net *net = sock_net(a->skb->sk); return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER, a->terse_dump, true, NULL); } static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, struct sk_buff *skb, struct netlink_callback *cb, long index_start, long *p_index, bool terse) { struct net *net = sock_net(skb->sk); struct tcf_block *block = chain->block; struct tcmsg *tcm = nlmsg_data(cb->nlh); struct tcf_proto *tp, *tp_prev; struct tcf_dump_args arg; for (tp = __tcf_get_next_proto(chain, NULL); tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), tcf_proto_put(tp_prev, true, NULL), (*p_index)++) { if (*p_index < index_start) continue; if (TC_H_MAJ(tcm->tcm_info) && TC_H_MAJ(tcm->tcm_info) != tp->prio) continue; if (TC_H_MIN(tcm->tcm_info) && TC_H_MIN(tcm->tcm_info) != tp->protocol) continue; if (*p_index > index_start) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (cb->args[1] == 0) { if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER, false, true, NULL) <= 0) goto errout; cb->args[1] = 1; } if (!tp->ops->walk) continue; arg.w.fn = tcf_node_dump; arg.skb = skb; arg.cb = cb; arg.block = block; arg.q = q; arg.parent = parent; arg.w.stop = 0; arg.w.skip = cb->args[1] - 1; arg.w.count = 0; arg.w.cookie = cb->args[2]; arg.terse_dump = terse; tp->ops->walk(tp, &arg.w, true); cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) goto errout; } return true; errout: tcf_proto_put(tp, true, NULL); return false; } static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = { [TCA_CHAIN] = { .type = NLA_U32 }, [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE), }; /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; struct tcmsg *tcm = nlmsg_data(cb->nlh); bool terse_dump = false; long index_start; long index; u32 parent; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, tcf_tfilter_dump_policy, cb->extack); if (err) return err; if (tca[TCA_DUMP_FLAGS]) { struct nla_bitfield32 flags = nla_get_bitfield32(tca[TCA_DUMP_FLAGS]); terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE; } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_refcnt_get(net, tcm->tcm_block_index); if (!block) goto out; /* If we work with block index, q is NULL and parent value * will never be used in the following code. The check * in tcf_fill_node prevents it. However, compiler does not * see that far, so set parent to zero to silence the warning * about parent being uninitialized. */ parent = 0; } else { const struct Qdisc_class_ops *cops; struct net_device *dev; unsigned long cl = 0; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return skb->len; parent = tcm->tcm_parent; if (!parent) q = rtnl_dereference(dev->qdisc); else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) goto out; cops = q->ops->cl_ops; if (!cops) goto out; if (!cops->tcf_block) goto out; if (TC_H_MIN(tcm->tcm_parent)) { cl = cops->find(q, tcm->tcm_parent); if (cl == 0) goto out; } block = cops->tcf_block(q, cl, NULL); if (!block) goto out; parent = block->classid; if (tcf_block_shared(block)) q = NULL; } index_start = cb->args[0]; index = 0; for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, chain = __tcf_get_next_chain(block, chain), tcf_chain_put(chain_prev)) { if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, index_start, &index, terse_dump)) { tcf_chain_put(chain); err = -EMSGSIZE; break; } } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) tcf_block_refcnt_put(block, true); cb->args[0] = index; out: /* If we did no progress, the error (EMSGSIZE) is real */ if (skb->len == 0 && err) return err; return skb->len; } static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct net *net, struct sk_buff *skb, struct tcf_block *block, u32 portid, u32 seq, u16 flags, int event, struct netlink_ext_ack *extack) { unsigned char *b = skb_tail_pointer(skb); const struct tcf_proto_ops *ops; struct nlmsghdr *nlh; struct tcmsg *tcm; void *priv; ops = tmplt_ops; priv = tmplt_priv; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_handle = 0; if (block->q) { tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; tcm->tcm_parent = block->q->handle; } else { tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; tcm->tcm_block_index = block->index; } if (nla_put_u32(skb, TCA_CHAIN, chain_index)) goto nla_put_failure; if (ops) { if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; if (ops->tmplt_dump(skb, net, priv) < 0) goto nla_put_failure; } if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -EMSGSIZE; } static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, u32 seq, u16 flags, int event, bool unicast, struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct tcf_block *block = chain->block; struct net *net = block->net; struct sk_buff *skb; int err = 0; if (!unicast && !rtnl_notify_needed(net, flags, RTNLGRP_TC)) return 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, chain->index, net, skb, block, portid, seq, flags, event, extack) <= 0) { kfree_skb(skb); return -EINVAL; } if (unicast) err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); return err; } static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, u32 seq, u16 flags) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct net *net = block->net; struct sk_buff *skb; if (!rtnl_notify_needed(net, flags, RTNLGRP_TC)) return 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, block, portid, seq, flags, RTM_DELCHAIN, NULL) <= 0) { kfree_skb(skb); return -EINVAL; } return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, struct nlattr **tca, struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; char name[IFNAMSIZ]; void *tmplt_priv; /* If kind is not set, user did not specify template. */ if (!tca[TCA_KIND]) return 0; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC chain template name too long"); return -EINVAL; } ops = tcf_proto_lookup_ops(name, true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump || !ops->tmplt_reoffload) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); module_put(ops->owner); return -EOPNOTSUPP; } tmplt_priv = ops->tmplt_create(net, chain, tca, extack); if (IS_ERR(tmplt_priv)) { module_put(ops->owner); return PTR_ERR(tmplt_priv); } chain->tmplt_ops = ops; chain->tmplt_priv = tmplt_priv; return 0; } static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv) { /* If template ops are set, no work to do for us. */ if (!tmplt_ops) return; tmplt_ops->tmplt_destroy(tmplt_priv); module_put(tmplt_ops->owner); } /* Add/delete/get a chain */ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct tcmsg *t; u32 parent; u32 chain_index; struct Qdisc *q; struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; replay: q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); parent = t->tcm_parent; cl = 0; block = tcf_block_find(net, &q, &parent, &cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) return PTR_ERR(block); chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout_block; } mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { if (chain) { if (tcf_chain_held_by_acts_only(chain)) { /* The chain exists only because there is * some action referencing it. */ tcf_chain_hold(chain); } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); err = -EEXIST; goto errout_block_locked; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); err = -ENOENT; goto errout_block_locked; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); err = -ENOMEM; goto errout_block_locked; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; goto errout_block_locked; } tcf_chain_hold(chain); } if (n->nlmsg_type == RTM_NEWCHAIN) { /* Modifying chain requires holding parent block lock. In case * the chain was successfully added, take a reference to the * chain. This ensures that an empty chain does not disappear at * the end of this function. */ tcf_chain_hold(chain); chain->explicitly_created = true; } mutex_unlock(&block->lock); switch (n->nlmsg_type) { case RTM_NEWCHAIN: err = tc_chain_tmplt_add(chain, net, tca, extack); if (err) { tcf_chain_put_explicitly_created(chain); goto errout; } tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false, extack); break; case RTM_DELCHAIN: tfilter_notify_chain(net, skb, block, q, parent, n, chain, RTM_DELTFILTER, extack); /* Flush the chain first as the user requested chain removal. */ tcf_chain_flush(chain, true); /* In case the chain was successfully deleted, put a reference * to the chain previously taken during addition. */ tcf_chain_put_explicitly_created(chain); break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, n->nlmsg_flags, n->nlmsg_type, true, extack); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); break; default: err = -EOPNOTSUPP; NL_SET_ERR_MSG(extack, "Unsupported message type"); goto errout; } errout: tcf_chain_put(chain); errout_block: tcf_block_release(q, block, true); if (err == -EAGAIN) /* Replay the request. */ goto replay; return err; errout_block_locked: mutex_unlock(&block->lock); goto errout_block; } /* called with RTNL */ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; struct tcmsg *tcm = nlmsg_data(cb->nlh); struct tcf_chain *chain; long index_start; long index; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, cb->extack); if (err) return err; if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_refcnt_get(net, tcm->tcm_block_index); if (!block) goto out; } else { const struct Qdisc_class_ops *cops; struct net_device *dev; unsigned long cl = 0; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return skb->len; if (!tcm->tcm_parent) q = rtnl_dereference(dev->qdisc); else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) goto out; cops = q->ops->cl_ops; if (!cops) goto out; if (!cops->tcf_block) goto out; if (TC_H_MIN(tcm->tcm_parent)) { cl = cops->find(q, tcm->tcm_parent); if (cl == 0) goto out; } block = cops->tcf_block(q, cl, NULL); if (!block) goto out; if (tcf_block_shared(block)) q = NULL; } index_start = cb->args[0]; index = 0; mutex_lock(&block->lock); list_for_each_entry(chain, &block->chain_list, list) { if ((tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index)) continue; if (index < index_start) { index++; continue; } if (tcf_chain_held_by_acts_only(chain)) continue; err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, chain->index, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN, NULL); if (err <= 0) break; index++; } mutex_unlock(&block->lock); if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) tcf_block_refcnt_put(block, true); cb->args[0] = index; out: /* If we did no progress, the error (EMSGSIZE) is real */ if (skb->len == 0 && err) return err; return skb->len; } int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action, int police, struct tcf_proto *tp, u32 handle, bool use_action_miss) { int err = 0; #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; exts->miss_cookie_node = NULL; /* Note: we do not own yet a reference on net. * This reference might be taken later from tcf_exts_get_net(). */ exts->net = net; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) return -ENOMEM; #endif exts->action = action; exts->police = police; if (!use_action_miss) return 0; err = tcf_exts_miss_cookie_base_alloc(exts, tp, handle); if (err) goto err_miss_alloc; return 0; err_miss_alloc: tcf_exts_destroy(exts); #ifdef CONFIG_NET_CLS_ACT exts->actions = NULL; #endif return err; } EXPORT_SYMBOL(tcf_exts_init_ex); void tcf_exts_destroy(struct tcf_exts *exts) { tcf_exts_miss_cookie_base_destroy(exts); #ifdef CONFIG_NET_CLS_ACT if (exts->actions) { tcf_action_destroy(exts->actions, TCA_ACT_UNBIND); kfree(exts->actions); } exts->nr_actions = 0; #endif } EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { int init_res[TCA_ACT_MAX_PRIO] = {}; struct tc_action *act; size_t attr_size = 0; if (exts->police && tb[exts->police]) { struct tc_action_ops *a_o; flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; a_o = tc_action_load_ops(tb[exts->police], flags, extack); if (IS_ERR(a_o)) return PTR_ERR(a_o); act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, a_o, init_res, flags, extack); module_put(a_o->owner); if (IS_ERR(act)) return PTR_ERR(act); act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; tcf_idr_insert_many(exts->actions, init_res); } else if (exts->action && tb[exts->action]) { int err; flags |= TCA_ACT_FLAGS_BIND; err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, exts->actions, init_res, &attr_size, flags, fl_flags, extack); if (err < 0) return err; exts->nr_actions = err; } } #else if ((exts->action && tb[exts->action]) || (exts->police && tb[exts->police])) { NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)"); return -EOPNOTSUPP; } #endif return 0; } EXPORT_SYMBOL(tcf_exts_validate_ex); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack) { return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts, flags, 0, extack); } EXPORT_SYMBOL(tcf_exts_validate); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT struct tcf_exts old = *dst; *dst = *src; tcf_exts_destroy(&old); #endif } EXPORT_SYMBOL(tcf_exts_change); #ifdef CONFIG_NET_CLS_ACT static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts) { if (exts->nr_actions == 0) return NULL; else return exts->actions[0]; } #endif int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; if (exts->action && tcf_exts_has_actions(exts)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { nest = nla_nest_start_noflag(skb, exts->action); if (nest == NULL) goto nla_put_failure; if (tcf_action_dump(skb, exts->actions, 0, 0, false) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } else if (exts->police) { struct tc_action *act = tcf_exts_first_act(exts); nest = nla_nest_start_noflag(skb, exts->police); if (nest == NULL || !act) goto nla_put_failure; if (tcf_action_dump_old(skb, act, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } } return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -1; #else return 0; #endif } EXPORT_SYMBOL(tcf_exts_dump); int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; if (!exts->action || !tcf_exts_has_actions(exts)) return 0; nest = nla_nest_start_noflag(skb, exts->action); if (!nest) goto nla_put_failure; if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0) goto nla_put_failure; nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -1; #else return 0; #endif } EXPORT_SYMBOL(tcf_exts_terse_dump); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct tc_action *a = tcf_exts_first_act(exts); if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0) return -1; #endif return 0; } EXPORT_SYMBOL(tcf_exts_dump_stats); static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) { if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; atomic_inc(&block->offloadcnt); } static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) { if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; atomic_dec(&block->offloadcnt); } static void tc_cls_offload_cnt_update(struct tcf_block *block, struct tcf_proto *tp, u32 *cnt, u32 *flags, u32 diff, bool add) { lockdep_assert_held(&block->cb_lock); spin_lock(&tp->lock); if (add) { if (!*cnt) tcf_block_offload_inc(block, flags); *cnt += diff; } else { *cnt -= diff; if (!*cnt) tcf_block_offload_dec(block, flags); } spin_unlock(&tp->lock); } static void tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, u32 *cnt, u32 *flags) { lockdep_assert_held(&block->cb_lock); spin_lock(&tp->lock); tcf_block_offload_dec(block, flags); *cnt = 0; spin_unlock(&tp->lock); } static int __tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { if (err_stop) return err; } else { ok_count++; } } return ok_count; } int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); /* Non-destructive filter add. If filter that wasn't already in hardware is * successfully offloaded, increment block offloads counter. On failure, * previously offloaded filter is considered to be intact and offloads counter * is not decremented. */ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; goto err_unlock; } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); if (ok_count < 0) goto err_unlock; if (tp->ops->hw_add) tp->ops->hw_add(tp, type_data); if (ok_count > 0) tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, ok_count, true); err_unlock: up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_add); /* Destructive filter replace. If filter that wasn't already in hardware is * successfully offloaded, increment block offload counter. On failure, * previously offloaded filter is considered to be destroyed and offload counter * is decremented. */ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *old_flags, unsigned int *old_in_hw_count, u32 *new_flags, unsigned int *new_in_hw_count, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; goto err_unlock; } tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); if (tp->ops->hw_del) tp->ops->hw_del(tp, type_data); ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); if (ok_count < 0) goto err_unlock; if (tp->ops->hw_add) tp->ops->hw_add(tp, type_data); if (ok_count > 0) tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags, ok_count, true); err_unlock: up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_replace); /* Destroy filter and decrement block offload counter, if filter was previously * offloaded. */ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); if (tp->ops->hw_del) tp->ops->hw_del(tp, type_data); up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_destroy); int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, enum tc_setup_type type, void *type_data, void *cb_priv, u32 *flags, unsigned int *in_hw_count) { int err = cb(type, type_data, cb_priv); if (err) { if (add && tc_skip_sw(*flags)) return err; } else { tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, add); } return 0; } EXPORT_SYMBOL(tc_setup_cb_reoffload); static int tcf_act_get_user_cookie(struct flow_action_entry *entry, const struct tc_action *act) { struct tc_cookie *user_cookie; int err = 0; rcu_read_lock(); user_cookie = rcu_dereference(act->user_cookie); if (user_cookie) { entry->user_cookie = flow_action_cookie_create(user_cookie->data, user_cookie->len, GFP_ATOMIC); if (!entry->user_cookie) err = -ENOMEM; } rcu_read_unlock(); return err; } static void tcf_act_put_user_cookie(struct flow_action_entry *entry) { flow_action_cookie_destroy(entry->user_cookie); } void tc_cleanup_offload_action(struct flow_action *flow_action) { struct flow_action_entry *entry; int i; flow_action_for_each(i, entry, flow_action) { tcf_act_put_user_cookie(entry); if (entry->destructor) entry->destructor(entry->destructor_priv); } } EXPORT_SYMBOL(tc_cleanup_offload_action); static int tc_setup_offload_act(struct tc_action *act, struct flow_action_entry *entry, u32 *index_inc, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT if (act->ops->offload_act_setup) { return act->ops->offload_act_setup(act, entry, index_inc, true, extack); } else { NL_SET_ERR_MSG(extack, "Action does not support offload"); return -EOPNOTSUPP; } #else return 0; #endif } int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], u32 miss_cookie_base, struct netlink_ext_ack *extack) { int i, j, k, index, err = 0; struct tc_action *act; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE); BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED); if (!actions) return 0; j = 0; tcf_act_for_each_action(i, act, actions) { struct flow_action_entry *entry; entry = &flow_action->entries[j]; spin_lock_bh(&act->tcfa_lock); err = tcf_act_get_user_cookie(entry, act); if (err) goto err_out_locked; index = 0; err = tc_setup_offload_act(act, entry, &index, extack); if (err) goto err_out_locked; for (k = 0; k < index ; k++) { entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); entry[k].hw_index = act->tcfa_index; entry[k].cookie = (unsigned long)act; entry[k].miss_cookie = tcf_exts_miss_cookie_get(miss_cookie_base, i); } j += index; spin_unlock_bh(&act->tcfa_lock); } err_out: if (err) tc_cleanup_offload_action(flow_action); return err; err_out_locked: spin_unlock_bh(&act->tcfa_lock); goto err_out; } int tc_setup_offload_action(struct flow_action *flow_action, const struct tcf_exts *exts, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT u32 miss_cookie_base; if (!exts) return 0; miss_cookie_base = exts->miss_cookie_node ? exts->miss_cookie_node->miss_cookie_base : 0; return tc_setup_action(flow_action, exts->actions, miss_cookie_base, extack); #else return 0; #endif } EXPORT_SYMBOL(tc_setup_offload_action); unsigned int tcf_exts_num_actions(struct tcf_exts *exts) { unsigned int num_acts = 0; struct tc_action *act; int i; tcf_exts_for_each_action(i, act, exts) { if (is_tcf_pedit(act)) num_acts += tcf_pedit_nkeys(act); else num_acts++; } return num_acts; } EXPORT_SYMBOL(tcf_exts_num_actions); #ifdef CONFIG_NET_CLS_ACT static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr, u32 *p_block_index, struct netlink_ext_ack *extack) { *p_block_index = nla_get_u32(block_index_attr); if (!*p_block_index) { NL_SET_ERR_MSG(extack, "Block number may not be zero"); return -EINVAL; } return 0; } int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, enum flow_block_binder_type binder_type, struct nlattr *block_index_attr, struct netlink_ext_ack *extack) { u32 block_index; int err; if (!block_index_attr) return 0; err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); if (err) return err; qe->info.binder_type = binder_type; qe->info.chain_head_change = tcf_chain_head_change_dflt; qe->info.chain_head_change_priv = &qe->filter_chain; qe->info.block_index = block_index; return tcf_block_get_ext(&qe->block, sch, &qe->info, extack); } EXPORT_SYMBOL(tcf_qevent_init); void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch) { if (qe->info.block_index) tcf_block_put_ext(qe->block, sch, &qe->info); } EXPORT_SYMBOL(tcf_qevent_destroy); int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, struct netlink_ext_ack *extack) { u32 block_index; int err; if (!block_index_attr) return 0; err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); if (err) return err; /* Bounce newly-configured block or change in block. */ if (block_index != qe->info.block_index) { NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); return -EINVAL; } return 0; } EXPORT_SYMBOL(tcf_qevent_validate_change); struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, struct sk_buff **to_free, int *ret) { struct tcf_result cl_res; struct tcf_proto *fl; if (!qe->info.block_index) return skb; fl = rcu_dereference_bh(qe->filter_chain); switch (tcf_classify(skb, NULL, fl, &cl_res, false)) { case TC_ACT_SHOT: qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); *ret = __NET_XMIT_BYPASS; return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: __qdisc_drop(skb, to_free); *ret = __NET_XMIT_STOLEN; return NULL; case TC_ACT_REDIRECT: skb_do_redirect(skb); *ret = __NET_XMIT_STOLEN; return NULL; } return skb; } EXPORT_SYMBOL(tcf_qevent_handle); int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe) { if (!qe->info.block_index) return 0; return nla_put_u32(skb, attr_name, qe->info.block_index); } EXPORT_SYMBOL(tcf_qevent_dump); #endif static __net_init int tcf_net_init(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); spin_lock_init(&tn->idr_lock); idr_init(&tn->idr); return 0; } static void __net_exit tcf_net_exit(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); idr_destroy(&tn->idr); } static struct pernet_operations tcf_net_ops = { .init = tcf_net_init, .exit = tcf_net_exit, .id = &tcf_net_id, .size = sizeof(struct tcf_net), }; static const struct rtnl_msg_handler tc_filter_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWTFILTER, .doit = tc_new_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_DELTFILTER, .doit = tc_del_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_GETTFILTER, .doit = tc_get_tfilter, .dumpit = tc_dump_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_NEWCHAIN, .doit = tc_ctl_chain}, {.msgtype = RTM_DELCHAIN, .doit = tc_ctl_chain}, {.msgtype = RTM_GETCHAIN, .doit = tc_ctl_chain, .dumpit = tc_dump_chain}, }; static int __init tc_filter_init(void) { int err; tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); if (!tc_filter_wq) return -ENOMEM; err = register_pernet_subsys(&tcf_net_ops); if (err) goto err_register_pernet_subsys; xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1); rtnl_register_many(tc_filter_rtnl_msg_handlers); return 0; err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; } subsys_initcall(tc_filter_init);
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Weighted Least-Connection Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * * Changes: * Wensong Zhang : changed the ip_vs_wlc_schedule to return dest * Wensong Zhang : changed to use the inactconns in scheduling * Wensong Zhang : changed some comestics things for debugging * Wensong Zhang : changed for the d-linked destination list * Wensong Zhang : added the ip_vs_wlc_update_svc * Wensong Zhang : added any dest with weight=0 is quiesced */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> /* * Weighted Least Connection scheduling */ static struct ip_vs_dest * ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); /* * We calculate the load of each dest server as follows: * (dest overhead) / dest->weight * * Remember -- no floats in kernel mode!!! * The comparison of h1*w2 > h2*w1 is equivalent to that of * h1/w1 > h2/w2 * if every weight is larger than zero. * * The server with weight=0 is quiesced and will not receive any * new connections. */ list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) { least = dest; loh = ip_vs_dest_conn_overhead(least); goto nextstage; } } ip_vs_scheduler_err(svc, "no destination available"); return NULL; /* * Find the destination with the least load. */ nextstage: list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); if ((__s64)loh * atomic_read(&dest->weight) > (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } } IP_VS_DBG_BUF(6, "WLC: server %s:%u " "activeconns %d refcnt %d weight %d overhead %d\n", IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; } static struct ip_vs_scheduler ip_vs_wlc_scheduler = { .name = "wlc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), .schedule = ip_vs_wlc_schedule, }; static int __init ip_vs_wlc_init(void) { return register_ip_vs_scheduler(&ip_vs_wlc_scheduler); } static void __exit ip_vs_wlc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); synchronize_rcu(); } module_init(ip_vs_wlc_init); module_exit(ip_vs_wlc_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ipvs weighted least connection scheduler");
7 1 3 2 1 2 1 1 1 1 7 5 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match L2TP header parameters. */ /* (C) 2013 James Chapman <jchapman@katalix.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/if_ether.h> #include <net/ip.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/udp.h> #include <linux/l2tp.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_tcpudp.h> #include <linux/netfilter/xt_l2tp.h> /* L2TP header masks */ #define L2TP_HDR_T_BIT 0x8000 #define L2TP_HDR_L_BIT 0x4000 #define L2TP_HDR_VER 0x000f MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("Xtables: L2TP header match"); MODULE_ALIAS("ipt_l2tp"); MODULE_ALIAS("ip6t_l2tp"); /* The L2TP fields that can be matched */ struct l2tp_data { u32 tid; u32 sid; u8 type; u8 version; }; union l2tp_val { __be16 val16[2]; __be32 val32; }; static bool l2tp_match(const struct xt_l2tp_info *info, struct l2tp_data *data) { if ((info->flags & XT_L2TP_TYPE) && (info->type != data->type)) return false; if ((info->flags & XT_L2TP_VERSION) && (info->version != data->version)) return false; /* Check tid only for L2TPv3 control or any L2TPv2 packets */ if ((info->flags & XT_L2TP_TID) && ((data->type == XT_L2TP_TYPE_CONTROL) || (data->version == 2)) && (info->tid != data->tid)) return false; /* Check sid only for L2TP data packets */ if ((info->flags & XT_L2TP_SID) && (data->type == XT_L2TP_TYPE_DATA) && (info->sid != data->sid)) return false; return true; } /* Parse L2TP header fields when UDP encapsulation is used. Handles * L2TPv2 and L2TPv3. Note the L2TPv3 control and data packets have a * different format. See * RFC2661, Section 3.1, L2TPv2 Header Format * RFC3931, Section 3.2.1, L2TPv3 Control Message Header * RFC3931, Section 3.2.2, L2TPv3 Data Message Header * RFC3931, Section 4.1.2.1, L2TPv3 Session Header over UDP */ static bool l2tp_udp_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff) { const struct xt_l2tp_info *info = par->matchinfo; int uhlen = sizeof(struct udphdr); int offs = thoff + uhlen; union l2tp_val *lh; union l2tp_val lhbuf; u16 flags; struct l2tp_data data = { 0, }; if (par->fragoff != 0) return false; /* Extract L2TP header fields. The flags in the first 16 bits * tell us where the other fields are. */ lh = skb_header_pointer(skb, offs, 2, &lhbuf); if (lh == NULL) return false; flags = ntohs(lh->val16[0]); if (flags & L2TP_HDR_T_BIT) data.type = XT_L2TP_TYPE_CONTROL; else data.type = XT_L2TP_TYPE_DATA; data.version = (u8) flags & L2TP_HDR_VER; /* Now extract the L2TP tid/sid. These are in different places * for L2TPv2 (rfc2661) and L2TPv3 (rfc3931). For L2TPv2, we * must also check to see if the length field is present, * since this affects the offsets into the packet of the * tid/sid fields. */ if (data.version == 3) { lh = skb_header_pointer(skb, offs + 4, 4, &lhbuf); if (lh == NULL) return false; if (data.type == XT_L2TP_TYPE_CONTROL) data.tid = ntohl(lh->val32); else data.sid = ntohl(lh->val32); } else if (data.version == 2) { if (flags & L2TP_HDR_L_BIT) offs += 2; lh = skb_header_pointer(skb, offs + 2, 4, &lhbuf); if (lh == NULL) return false; data.tid = (u32) ntohs(lh->val16[0]); data.sid = (u32) ntohs(lh->val16[1]); } else return false; return l2tp_match(info, &data); } /* Parse L2TP header fields for IP encapsulation (no UDP header). * L2TPv3 data packets have a different form with IP encap. See * RC3931, Section 4.1.1.1, L2TPv3 Session Header over IP. * RC3931, Section 4.1.1.2, L2TPv3 Control and Data Traffic over IP. */ static bool l2tp_ip_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff) { const struct xt_l2tp_info *info = par->matchinfo; union l2tp_val *lh; union l2tp_val lhbuf; struct l2tp_data data = { 0, }; /* For IP encap, the L2TP sid is the first 32-bits. */ lh = skb_header_pointer(skb, thoff, sizeof(lhbuf), &lhbuf); if (lh == NULL) return false; if (lh->val32 == 0) { /* Must be a control packet. The L2TP tid is further * into the packet. */ data.type = XT_L2TP_TYPE_CONTROL; lh = skb_header_pointer(skb, thoff + 8, sizeof(lhbuf), &lhbuf); if (lh == NULL) return false; data.tid = ntohl(lh->val32); } else { data.sid = ntohl(lh->val32); data.type = XT_L2TP_TYPE_DATA; } data.version = 3; return l2tp_match(info, &data); } static bool l2tp_mt4(const struct sk_buff *skb, struct xt_action_param *par) { struct iphdr *iph = ip_hdr(skb); u8 ipproto = iph->protocol; /* l2tp_mt_check4 already restricts the transport protocol */ switch (ipproto) { case IPPROTO_UDP: return l2tp_udp_mt(skb, par, par->thoff); case IPPROTO_L2TP: return l2tp_ip_mt(skb, par, par->thoff); } return false; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static bool l2tp_mt6(const struct sk_buff *skb, struct xt_action_param *par) { unsigned int thoff = 0; unsigned short fragoff = 0; int ipproto; ipproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); if (fragoff != 0) return false; /* l2tp_mt_check6 already restricts the transport protocol */ switch (ipproto) { case IPPROTO_UDP: return l2tp_udp_mt(skb, par, thoff); case IPPROTO_L2TP: return l2tp_ip_mt(skb, par, thoff); } return false; } #endif static int l2tp_mt_check(const struct xt_mtchk_param *par) { const struct xt_l2tp_info *info = par->matchinfo; /* Check for invalid flags */ if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION | XT_L2TP_TYPE)) { pr_info_ratelimited("unknown flags: %x\n", info->flags); return -EINVAL; } /* At least one of tid, sid or type=control must be specified */ if ((!(info->flags & XT_L2TP_TID)) && (!(info->flags & XT_L2TP_SID)) && ((!(info->flags & XT_L2TP_TYPE)) || (info->type != XT_L2TP_TYPE_CONTROL))) { pr_info_ratelimited("invalid flags combination: %x\n", info->flags); return -EINVAL; } /* If version 2 is specified, check that incompatible params * are not supplied */ if (info->flags & XT_L2TP_VERSION) { if ((info->version < 2) || (info->version > 3)) { pr_info_ratelimited("wrong L2TP version: %u\n", info->version); return -EINVAL; } if (info->version == 2) { if ((info->flags & XT_L2TP_TID) && (info->tid > 0xffff)) { pr_info_ratelimited("v2 tid > 0xffff: %u\n", info->tid); return -EINVAL; } if ((info->flags & XT_L2TP_SID) && (info->sid > 0xffff)) { pr_info_ratelimited("v2 sid > 0xffff: %u\n", info->sid); return -EINVAL; } } } return 0; } static int l2tp_mt_check4(const struct xt_mtchk_param *par) { const struct xt_l2tp_info *info = par->matchinfo; const struct ipt_entry *e = par->entryinfo; const struct ipt_ip *ip = &e->ip; int ret; ret = l2tp_mt_check(par); if (ret != 0) return ret; if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } return 0; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static int l2tp_mt_check6(const struct xt_mtchk_param *par) { const struct xt_l2tp_info *info = par->matchinfo; const struct ip6t_entry *e = par->entryinfo; const struct ip6t_ip6 *ip = &e->ipv6; int ret; ret = l2tp_mt_check(par); if (ret != 0) return ret; if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } return 0; } #endif static struct xt_match l2tp_mt_reg[] __read_mostly = { { .name = "l2tp", .revision = 0, .family = NFPROTO_IPV4, .match = l2tp_mt4, .matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)), .checkentry = l2tp_mt_check4, .hooks = ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD)), .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "l2tp", .revision = 0, .family = NFPROTO_IPV6, .match = l2tp_mt6, .matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)), .checkentry = l2tp_mt_check6, .hooks = ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD)), .me = THIS_MODULE, }, #endif }; static int __init l2tp_mt_init(void) { return xt_register_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg)); } static void __exit l2tp_mt_exit(void) { xt_unregister_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg)); } module_init(l2tp_mt_init); module_exit(l2tp_mt_exit);
170 168 167 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 // SPDX-License-Identifier: GPL-2.0-or-later /* * x86 instruction attribute tables * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ #include <asm/insn.h> /* __ignore_sync_check__ */ /* Attribute tables are generated from opcode map */ #include "inat-tables.c" /* Attribute search APIs */ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) { return inat_primary_table[opcode]; } int inat_get_last_prefix_id(insn_byte_t last_pfx) { insn_attr_t lpfx_attr; lpfx_attr = inat_get_opcode_attribute(last_pfx); return inat_last_prefix_id(lpfx_attr); } insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr) { const insn_attr_t *table; int n; n = inat_escape_id(esc_attr); table = inat_escape_tables[n][0]; if (!table) return 0; if (inat_has_variant(table[opcode]) && lpfx_id) { table = inat_escape_tables[n][lpfx_id]; if (!table) return 0; } return table[opcode]; } insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t grp_attr) { const insn_attr_t *table; int n; n = inat_group_id(grp_attr); table = inat_group_tables[n][0]; if (!table) return inat_group_common_attribute(grp_attr); if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { table = inat_group_tables[n][lpfx_id]; if (!table) return inat_group_common_attribute(grp_attr); } return table[X86_MODRM_REG(modrm)] | inat_group_common_attribute(grp_attr); } insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, insn_byte_t vex_p) { const insn_attr_t *table; if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) return 0; /* At first, this checks the master table */ table = inat_avx_tables[vex_m][0]; if (!table) return 0; if (!inat_is_group(table[opcode]) && vex_p) { /* If this is not a group, get attribute directly */ table = inat_avx_tables[vex_m][vex_p]; if (!table) return 0; } return table[opcode]; } insn_attr_t inat_get_xop_attribute(insn_byte_t opcode, insn_byte_t map_select) { const insn_attr_t *table; if (map_select < X86_XOP_M_MIN || map_select > X86_XOP_M_MAX) return 0; map_select -= X86_XOP_M_MIN; /* At first, this checks the master table */ table = inat_xop_tables[map_select]; if (!table) return 0; return table[opcode]; }
2 2 2 2 2 2 2 2 2 2 2 2 10 1 3 3 3 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 /* * Davicom DM96xx USB 10/100Mbps ethernet devices * * Peter Korsgaard <jacmet@sunsite.dk> * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied. */ //#define DEBUG #include <linux/module.h> #include <linux/sched.h> #include <linux/stddef.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/crc32.h> #include <linux/usb/usbnet.h> #include <linux/slab.h> /* datasheet: http://ptm2.cc.utu.fi/ftp/network/cards/DM9601/From_NET/DM9601-DS-P01-930914.pdf */ /* control requests */ #define DM_READ_REGS 0x00 #define DM_WRITE_REGS 0x01 #define DM_READ_MEMS 0x02 #define DM_WRITE_REG 0x03 #define DM_WRITE_MEMS 0x05 #define DM_WRITE_MEM 0x07 /* registers */ #define DM_NET_CTRL 0x00 #define DM_RX_CTRL 0x05 #define DM_SHARED_CTRL 0x0b #define DM_SHARED_ADDR 0x0c #define DM_SHARED_DATA 0x0d /* low + high */ #define DM_PHY_ADDR 0x10 /* 6 bytes */ #define DM_MCAST_ADDR 0x16 /* 8 bytes */ #define DM_GPR_CTRL 0x1e #define DM_GPR_DATA 0x1f #define DM_CHIP_ID 0x2c #define DM_MODE_CTRL 0x91 /* only on dm9620 */ /* chip id values */ #define ID_DM9601 0 #define ID_DM9620 1 #define DM_MAX_MCAST 64 #define DM_MCAST_SIZE 8 #define DM_EEPROM_LEN 256 #define DM_TX_OVERHEAD 2 /* 2 byte header */ #define DM_RX_OVERHEAD 7 /* 3 byte header + 4 byte crc tail */ #define DM_TIMEOUT 1000 static int dm_read(struct usbnet *dev, u8 reg, u16 length, void *data) { int err; err = usbnet_read_cmd(dev, DM_READ_REGS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, reg, data, length); if(err != length && err >= 0) err = -EINVAL; return err; } static int dm_read_reg(struct usbnet *dev, u8 reg, u8 *value) { return dm_read(dev, reg, 1, value); } static int dm_write(struct usbnet *dev, u8 reg, u16 length, void *data) { int err; err = usbnet_write_cmd(dev, DM_WRITE_REGS, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, reg, data, length); if (err >= 0 && err < length) err = -EINVAL; return err; } static int dm_write_reg(struct usbnet *dev, u8 reg, u8 value) { return usbnet_write_cmd(dev, DM_WRITE_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, reg, NULL, 0); } static void dm_write_async(struct usbnet *dev, u8 reg, u16 length, const void *data) { usbnet_write_cmd_async(dev, DM_WRITE_REGS, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, reg, data, length); } static void dm_write_reg_async(struct usbnet *dev, u8 reg, u8 value) { usbnet_write_cmd_async(dev, DM_WRITE_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, reg, NULL, 0); } static int dm_read_shared_word(struct usbnet *dev, int phy, u8 reg, __le16 *value) { int ret, i; mutex_lock(&dev->phy_mutex); dm_write_reg(dev, DM_SHARED_ADDR, phy ? (reg | 0x40) : reg); dm_write_reg(dev, DM_SHARED_CTRL, phy ? 0xc : 0x4); for (i = 0; i < DM_TIMEOUT; i++) { u8 tmp = 0; udelay(1); ret = dm_read_reg(dev, DM_SHARED_CTRL, &tmp); if (ret < 0) goto out; /* ready */ if ((tmp & 1) == 0) break; } if (i == DM_TIMEOUT) { netdev_err(dev->net, "%s read timed out!\n", phy ? "phy" : "eeprom"); ret = -EIO; goto out; } dm_write_reg(dev, DM_SHARED_CTRL, 0x0); ret = dm_read(dev, DM_SHARED_DATA, 2, value); netdev_dbg(dev->net, "read shared %d 0x%02x returned 0x%04x, %d\n", phy, reg, *value, ret); out: mutex_unlock(&dev->phy_mutex); return ret; } static int dm_write_shared_word(struct usbnet *dev, int phy, u8 reg, __le16 value) { int ret, i; mutex_lock(&dev->phy_mutex); ret = dm_write(dev, DM_SHARED_DATA, 2, &value); if (ret < 0) goto out; dm_write_reg(dev, DM_SHARED_ADDR, phy ? (reg | 0x40) : reg); dm_write_reg(dev, DM_SHARED_CTRL, phy ? 0x1a : 0x12); for (i = 0; i < DM_TIMEOUT; i++) { u8 tmp = 0; udelay(1); ret = dm_read_reg(dev, DM_SHARED_CTRL, &tmp); if (ret < 0) goto out; /* ready */ if ((tmp & 1) == 0) break; } if (i == DM_TIMEOUT) { netdev_err(dev->net, "%s write timed out!\n", phy ? "phy" : "eeprom"); ret = -EIO; goto out; } dm_write_reg(dev, DM_SHARED_CTRL, 0x0); out: mutex_unlock(&dev->phy_mutex); return ret; } static int dm_read_eeprom_word(struct usbnet *dev, u8 offset, void *value) { return dm_read_shared_word(dev, 0, offset, value); } static int dm9601_get_eeprom_len(struct net_device *dev) { return DM_EEPROM_LEN; } static int dm9601_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, u8 * data) { struct usbnet *dev = netdev_priv(net); __le16 *ebuf = (__le16 *) data; int i; /* access is 16bit */ if ((eeprom->offset % 2) || (eeprom->len % 2)) return -EINVAL; for (i = 0; i < eeprom->len / 2; i++) { if (dm_read_eeprom_word(dev, eeprom->offset / 2 + i, &ebuf[i]) < 0) return -EINVAL; } return 0; } static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; int err; if (phy_id) { netdev_dbg(dev->net, "Only internal phy supported\n"); return 0; } err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); return 0; } netdev_dbg(dev->net, "dm9601_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", phy_id, loc, le16_to_cpu(res)); return le16_to_cpu(res); } static void dm9601_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); if (phy_id) { netdev_dbg(dev->net, "Only internal phy supported\n"); return; } netdev_dbg(dev->net, "dm9601_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); dm_write_shared_word(dev, 1, loc, res); } static void dm9601_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { /* Inherit standard device info */ usbnet_get_drvinfo(net, info); } static u32 dm9601_get_link(struct net_device *net) { struct usbnet *dev = netdev_priv(net); return mii_link_ok(&dev->mii); } static int dm9601_ioctl(struct net_device *net, struct ifreq *rq, int cmd) { struct usbnet *dev = netdev_priv(net); return generic_mii_ioctl(&dev->mii, if_mii(rq), cmd, NULL); } static const struct ethtool_ops dm9601_ethtool_ops = { .get_drvinfo = dm9601_get_drvinfo, .get_link = dm9601_get_link, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_eeprom_len = dm9601_get_eeprom_len, .get_eeprom = dm9601_get_eeprom, .nway_reset = usbnet_nway_reset, .get_link_ksettings = usbnet_get_link_ksettings_mii, .set_link_ksettings = usbnet_set_link_ksettings_mii, }; static void dm9601_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); /* We use the 20 byte dev->data for our 8 byte filter buffer * to avoid allocating memory that is tricky to free later */ u8 *hashes = (u8 *) & dev->data; u8 rx_ctl = 0x31; memset(hashes, 0x00, DM_MCAST_SIZE); hashes[DM_MCAST_SIZE - 1] |= 0x80; /* broadcast address */ if (net->flags & IFF_PROMISC) { rx_ctl |= 0x02; } else if (net->flags & IFF_ALLMULTI || netdev_mc_count(net) > DM_MAX_MCAST) { rx_ctl |= 0x08; } else if (!netdev_mc_empty(net)) { struct netdev_hw_addr *ha; netdev_for_each_mc_addr(ha, net) { u32 crc = ether_crc(ETH_ALEN, ha->addr) >> 26; hashes[crc >> 3] |= 1 << (crc & 0x7); } } dm_write_async(dev, DM_MCAST_ADDR, DM_MCAST_SIZE, hashes); dm_write_reg_async(dev, DM_RX_CTRL, rx_ctl); } static void __dm9601_set_mac_address(struct usbnet *dev) { dm_write_async(dev, DM_PHY_ADDR, ETH_ALEN, dev->net->dev_addr); } static int dm9601_set_mac_address(struct net_device *net, void *p) { struct sockaddr *addr = p; struct usbnet *dev = netdev_priv(net); if (!is_valid_ether_addr(addr->sa_data)) { dev_err(&net->dev, "not setting invalid mac address %pM\n", addr->sa_data); return -EINVAL; } eth_hw_addr_set(net, addr->sa_data); __dm9601_set_mac_address(dev); return 0; } static const struct net_device_ops dm9601_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, .ndo_eth_ioctl = dm9601_ioctl, .ndo_set_rx_mode = dm9601_set_multicast, .ndo_set_mac_address = dm9601_set_mac_address, }; static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) { int ret; u8 mac[ETH_ALEN], id; ret = usbnet_get_endpoints(dev, intf); if (ret) goto out; dev->net->netdev_ops = &dm9601_netdev_ops; dev->net->ethtool_ops = &dm9601_ethtool_ops; dev->net->hard_header_len += DM_TX_OVERHEAD; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; /* dm9620/21a require room for 4 byte padding, even in dm9601 * mode, so we need +1 to be able to receive full size * ethernet frames. */ dev->rx_urb_size = dev->net->mtu + ETH_HLEN + DM_RX_OVERHEAD + 1; dev->mii.dev = dev->net; dev->mii.mdio_read = dm9601_mdio_read; dev->mii.mdio_write = dm9601_mdio_write; dev->mii.phy_id_mask = 0x1f; dev->mii.reg_num_mask = 0x1f; /* reset */ dm_write_reg(dev, DM_NET_CTRL, 1); udelay(20); /* read MAC */ if (dm_read(dev, DM_PHY_ADDR, ETH_ALEN, mac) < 0) { printk(KERN_ERR "Error reading MAC address\n"); ret = -ENODEV; goto out; } /* * Overwrite the auto-generated address only with good ones. */ if (is_valid_ether_addr(mac)) eth_hw_addr_set(dev->net, mac); else { printk(KERN_WARNING "dm9601: No valid MAC address in EEPROM, using %pM\n", dev->net->dev_addr); __dm9601_set_mac_address(dev); } if (dm_read_reg(dev, DM_CHIP_ID, &id) < 0) { netdev_err(dev->net, "Error reading chip ID\n"); ret = -ENODEV; goto out; } /* put dm9620 devices in dm9601 mode */ if (id == ID_DM9620) { u8 mode; if (dm_read_reg(dev, DM_MODE_CTRL, &mode) < 0) { netdev_err(dev->net, "Error reading MODE_CTRL\n"); ret = -ENODEV; goto out; } dm_write_reg(dev, DM_MODE_CTRL, mode & 0x7f); } /* power up phy */ dm_write_reg(dev, DM_GPR_CTRL, 1); dm_write_reg(dev, DM_GPR_DATA, 0); /* receive broadcast packets */ dm9601_set_multicast(dev->net); dm9601_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); dm9601_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); mii_nway_restart(&dev->mii); out: return ret; } static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { u8 status; int len; /* format: b1: rx status b2: packet length (incl crc) low b3: packet length (incl crc) high b4..n-4: packet data bn-3..bn: ethernet crc */ if (unlikely(skb->len < DM_RX_OVERHEAD)) { dev_err(&dev->udev->dev, "unexpected tiny rx frame\n"); return 0; } status = skb->data[0]; len = (skb->data[1] | (skb->data[2] << 8)) - 4; if (unlikely(status & 0xbf)) { if (status & 0x01) dev->net->stats.rx_fifo_errors++; if (status & 0x02) dev->net->stats.rx_crc_errors++; if (status & 0x04) dev->net->stats.rx_frame_errors++; if (status & 0x20) dev->net->stats.rx_missed_errors++; if (status & 0x90) dev->net->stats.rx_length_errors++; return 0; } skb_pull(skb, 3); skb_trim(skb, len); return 1; } static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int len, pad; /* format: b1: packet length low b2: packet length high b3..n: packet data */ len = skb->len + DM_TX_OVERHEAD; /* workaround for dm962x errata with tx fifo getting out of * sync if a USB bulk transfer retry happens right after a * packet with odd / maxpacket length by adding up to 3 bytes * padding. */ while ((len & 1) || !(len % dev->maxpacket)) len++; len -= DM_TX_OVERHEAD; /* hw header doesn't count as part of length */ pad = len - skb->len; if (skb_headroom(skb) < DM_TX_OVERHEAD || skb_tailroom(skb) < pad) { struct sk_buff *skb2; skb2 = skb_copy_expand(skb, DM_TX_OVERHEAD, pad, flags); dev_kfree_skb_any(skb); skb = skb2; if (!skb) return NULL; } __skb_push(skb, DM_TX_OVERHEAD); if (pad) { memset(skb->data + skb->len, 0, pad); __skb_put(skb, pad); } skb->data[0] = len; skb->data[1] = len >> 8; return skb; } static void dm9601_status(struct usbnet *dev, struct urb *urb) { int link; u8 *buf; /* format: b0: net status b1: tx status 1 b2: tx status 2 b3: rx status b4: rx overflow b5: rx count b6: tx count b7: gpr */ if (urb->actual_length < 8) return; buf = urb->transfer_buffer; link = !!(buf[0] & 0x40); if (netif_carrier_ok(dev->net) != link) { usbnet_link_change(dev, link, 1); netdev_dbg(dev->net, "Link Status is: %d\n", link); } } static int dm9601_link_reset(struct usbnet *dev) { struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; mii_check_media(&dev->mii, 1, 1); mii_ethtool_gset(&dev->mii, &ecmd); netdev_dbg(dev->net, "link_reset() speed: %u duplex: %d\n", ethtool_cmd_speed(&ecmd), ecmd.duplex); return 0; } static const struct driver_info dm9601_info = { .description = "Davicom DM96xx USB 10/100 Ethernet", .flags = FLAG_ETHER | FLAG_LINK_INTR, .bind = dm9601_bind, .rx_fixup = dm9601_rx_fixup, .tx_fixup = dm9601_tx_fixup, .status = dm9601_status, .link_reset = dm9601_link_reset, .reset = dm9601_link_reset, }; static const struct usb_device_id products[] = { { USB_DEVICE(0x07aa, 0x9601), /* Corega FEther USB-TXC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9601), /* Davicom USB-100 */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x6688), /* ZT6688 USB NIC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x0268), /* ShanTou ST268 USB NIC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x8515), /* ADMtek ADM8515 USB NIC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a47, 0x9601), /* Hirose USB-100 */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0fe6, 0x8101), /* DM9601 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0fe6, 0x9700), /* DM9601 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9000), /* DM9000E */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9620), /* DM9620 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9621), /* DM9621A USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9622), /* DM9622 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x0269), /* DM962OA USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x1269), /* DM9621A USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0586, 0x3427), /* ZyXEL Keenetic Plus DSL xDSL modem */ .driver_info = (unsigned long)&dm9601_info, }, {}, // END }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver dm9601_driver = { .name = "dm9601", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .disable_hub_initiated_lpm = 1, }; module_usb_driver(dm9601_driver); MODULE_AUTHOR("Peter Korsgaard <jacmet@sunsite.dk>"); MODULE_DESCRIPTION("Davicom DM96xx USB 10/100 ethernet devices"); MODULE_LICENSE("GPL");
1 1 1 1 1 5 1 1 4 1 1 1 1 1 1 7 7 1 3 4 1 3 1 3 6 4 2 1 1 1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 // SPDX-License-Identifier: GPL-2.0-or-later /* * dir.c - Operations for configfs directories. * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * * configfs Copyright (C) 2005 Oracle. All rights reserved. */ #undef DEBUG #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/configfs.h> #include "configfs_internal.h" /* * Protects mutations of configfs_dirent linkage together with proper i_mutex * Also protects mutations of symlinks linkage to target configfs_dirent * Mutators of configfs_dirent linkage must *both* have the proper inode locked * and configfs_dirent_lock locked, in that order. * This allows one to safely traverse configfs_dirent trees and symlinks without * having to lock inodes. * * Protects setting of CONFIGFS_USET_DROPPING: checking the flag * unlocked is not reliable unless in detach_groups() called from * rmdir()/unregister() and from configfs_attach_group() */ DEFINE_SPINLOCK(configfs_dirent_lock); /* * All of link_obj/unlink_obj/link_group/unlink_group require that * subsys->su_mutex is held. * But parent configfs_subsystem is NULL when config_item is root. * Use this mutex when config_item is root. */ static DEFINE_MUTEX(configfs_subsystem_mutex); static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); /* * Set sd->s_dentry to null only when this dentry is the one * that is going to be killed. Otherwise configfs_d_iput may * run just after configfs_lookup and set sd->s_dentry to * NULL even it's still in use. */ if (sd->s_dentry == dentry) sd->s_dentry = NULL; spin_unlock(&configfs_dirent_lock); configfs_put(sd); } iput(inode); } const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, }; #ifdef CONFIG_LOCKDEP /* * Helpers to make lockdep happy with our recursive locking of default groups' * inodes (see configfs_attach_group() and configfs_detach_group()). * We put default groups i_mutexes in separate classes according to their depth * from the youngest non-default group ancestor. * * For a non-default group A having default groups A/B, A/C, and A/C/D, default * groups A/B and A/C will have their inode's mutex in class * default_group_class[0], and default group A/C/D will be in * default_group_class[1]. * * The lock classes are declared and assigned in inode.c, according to the * s_depth value. * The s_depth value is initialized to -1, adjusted to >= 0 when attaching * default groups, and reset to -1 when all default groups are attached. During * attachment, if configfs_create() sees s_depth > 0, the lock class of the new * inode's mutex is set to default_group_class[s_depth - 1]. */ static void configfs_init_dirent_depth(struct configfs_dirent *sd) { sd->s_depth = -1; } static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, struct configfs_dirent *sd) { int parent_depth = parent_sd->s_depth; if (parent_depth >= 0) sd->s_depth = parent_depth + 1; } static void configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) { /* * item's i_mutex class is already setup, so s_depth is now only * used to set new sub-directories s_depth, which is always done * with item's i_mutex locked. */ /* * sd->s_depth == -1 iff we are a non default group. * else (we are a default group) sd->s_depth > 0 (see * create_dir()). */ if (sd->s_depth == -1) /* * We are a non default group and we are going to create * default groups. */ sd->s_depth = 0; } static void configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) { /* We will not create default groups anymore. */ sd->s_depth = -1; } #else /* CONFIG_LOCKDEP */ static void configfs_init_dirent_depth(struct configfs_dirent *sd) { } static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, struct configfs_dirent *sd) { } static void configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) { } static void configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) { } #endif /* CONFIG_LOCKDEP */ static struct configfs_fragment *new_fragment(void) { struct configfs_fragment *p; p = kmalloc(sizeof(struct configfs_fragment), GFP_KERNEL); if (p) { atomic_set(&p->frag_count, 1); init_rwsem(&p->frag_sem); p->frag_dead = false; } return p; } void put_fragment(struct configfs_fragment *frag) { if (frag && atomic_dec_and_test(&frag->frag_count)) kfree(frag); } struct configfs_fragment *get_fragment(struct configfs_fragment *frag) { if (likely(frag)) atomic_inc(&frag->frag_count); return frag; } /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, void *element, int type, struct configfs_fragment *frag) { struct configfs_dirent * sd; sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL); if (!sd) return ERR_PTR(-ENOMEM); atomic_set(&sd->s_count, 1); INIT_LIST_HEAD(&sd->s_children); sd->s_element = element; sd->s_type = type; configfs_init_dirent_depth(sd); spin_lock(&configfs_dirent_lock); if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); kmem_cache_free(configfs_dir_cachep, sd); return ERR_PTR(-ENOENT); } sd->s_frag = get_fragment(frag); /* * configfs_lookup scans only for unpinned items. s_children is * partitioned so that configfs_lookup can bail out early. * CONFIGFS_PINNED and CONFIGFS_NOT_PINNED are not symmetrical. readdir * cursors still need to be inserted at the front of the list. */ if (sd->s_type & CONFIGFS_PINNED) list_add_tail(&sd->s_sibling, &parent_sd->s_children); else list_add(&sd->s_sibling, &parent_sd->s_children); spin_unlock(&configfs_dirent_lock); return sd; } /* * * Return -EEXIST if there is already a configfs element with the same * name for the same parent. * * called with parent inode's i_mutex held */ static int configfs_dirent_exists(struct dentry *dentry) { struct configfs_dirent *parent_sd = dentry->d_parent->d_fsdata; const unsigned char *new = dentry->d_name.name; struct configfs_dirent *sd; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (sd->s_element) { const unsigned char *existing = configfs_get_name(sd); if (strcmp(existing, new)) continue; else return -EEXIST; } } return 0; } int configfs_make_dirent(struct configfs_dirent * parent_sd, struct dentry * dentry, void * element, umode_t mode, int type, struct configfs_fragment *frag) { struct configfs_dirent * sd; sd = configfs_new_dirent(parent_sd, element, type, frag); if (IS_ERR(sd)) return PTR_ERR(sd); sd->s_mode = mode; sd->s_dentry = dentry; if (dentry) dentry->d_fsdata = configfs_get(sd); return 0; } static void configfs_remove_dirent(struct dentry *dentry) { struct configfs_dirent *sd = dentry->d_fsdata; if (!sd) return; spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); spin_unlock(&configfs_dirent_lock); configfs_put(sd); } /** * configfs_create_dir - create a directory for an config_item. * @item: config_itemwe're creating directory for. * @dentry: config_item's dentry. * @frag: config_item's fragment. * * Note: user-created entries won't be allowed under this new directory * until it is validated by configfs_dir_set_ready() */ static int configfs_create_dir(struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag) { int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; struct dentry *p = dentry->d_parent; struct inode *inode; BUG_ON(!item); error = configfs_make_dirent(p->d_fsdata, dentry, item, mode, CONFIGFS_DIR | CONFIGFS_USET_CREATING, frag); if (unlikely(error)) return error; configfs_set_dir_dirent_depth(p->d_fsdata, dentry->d_fsdata); inode = configfs_create(dentry, mode); if (IS_ERR(inode)) goto out_remove; inode->i_op = &configfs_dir_inode_operations; inode->i_fop = &configfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); /* already hashed */ dget(dentry); /* pin directory dentries in core */ inc_nlink(d_inode(p)); item->ci_dentry = dentry; return 0; out_remove: configfs_put(dentry->d_fsdata); configfs_remove_dirent(dentry); return PTR_ERR(inode); } /* * Allow userspace to create new entries under a new directory created with * configfs_create_dir(), and under all of its chidlren directories recursively. * @sd configfs_dirent of the new directory to validate * * Caller must hold configfs_dirent_lock. */ static void configfs_dir_set_ready(struct configfs_dirent *sd) { struct configfs_dirent *child_sd; sd->s_type &= ~CONFIGFS_USET_CREATING; list_for_each_entry(child_sd, &sd->s_children, s_sibling) if (child_sd->s_type & CONFIGFS_USET_CREATING) configfs_dir_set_ready(child_sd); } /* * Check that a directory does not belong to a directory hierarchy being * attached and not validated yet. * @sd configfs_dirent of the directory to check * * @return non-zero iff the directory was validated * * Note: takes configfs_dirent_lock, so the result may change from false to true * in two consecutive calls, but never from true to false. */ int configfs_dirent_is_ready(struct configfs_dirent *sd) { int ret; spin_lock(&configfs_dirent_lock); ret = !(sd->s_type & CONFIGFS_USET_CREATING); spin_unlock(&configfs_dirent_lock); return ret; } int configfs_create_link(struct configfs_dirent *target, struct dentry *parent, struct dentry *dentry, char *body) { int err = 0; umode_t mode = S_IFLNK | S_IRWXUGO; struct configfs_dirent *p = parent->d_fsdata; struct inode *inode; err = configfs_make_dirent(p, dentry, target, mode, CONFIGFS_ITEM_LINK, p->s_frag); if (err) return err; inode = configfs_create(dentry, mode); if (IS_ERR(inode)) goto out_remove; inode->i_link = body; inode->i_op = &configfs_symlink_inode_operations; d_instantiate(dentry, inode); dget(dentry); /* pin link dentries in core */ return 0; out_remove: configfs_put(dentry->d_fsdata); configfs_remove_dirent(dentry); return PTR_ERR(inode); } static void remove_dir(struct dentry * d) { struct dentry * parent = dget(d->d_parent); configfs_remove_dirent(d); if (d_really_is_positive(d)) simple_rmdir(d_inode(parent),d); pr_debug(" o %pd removing done (%d)\n", d, d_count(d)); dput(parent); } /** * configfs_remove_dir - remove an config_item's directory. * @item: config_item we're removing. * * The only thing special about this is that we remove any files in * the directory before we remove the directory, and we've inlined * what used to be configfs_rmdir() below, instead of calling separately. * * Caller holds the mutex of the item's inode */ static void configfs_remove_dir(struct config_item * item) { struct dentry * dentry = dget(item->ci_dentry); if (!dentry) return; remove_dir(dentry); /** * Drop reference from dget() on entrance. */ dput(dentry); } static struct dentry * configfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct configfs_dirent * sd; struct inode *inode = NULL; if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); /* * Fake invisibility if dir belongs to a group/default groups hierarchy * being attached * * This forbids userspace to read/write attributes of items which may * not complete their initialization, since the dentries of the * attributes won't be instantiated. */ if (!configfs_dirent_is_ready(parent_sd)) return ERR_PTR(-ENOENT); spin_lock(&configfs_dirent_lock); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { /* * s_children is partitioned, see configfs_new_dirent. The first * pinned item indicates we can stop scanning. */ if (sd->s_type & CONFIGFS_PINNED) break; /* * Note: CONFIGFS_PINNED and CONFIGFS_NOT_PINNED are asymmetric. * there may be a readdir cursor in this list */ if ((sd->s_type & CONFIGFS_NOT_PINNED) && !strcmp(configfs_get_name(sd), dentry->d_name.name)) { struct configfs_attribute *attr = sd->s_element; umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; spin_unlock(&configfs_dirent_lock); inode = configfs_create(dentry, mode); if (IS_ERR(inode)) { configfs_put(sd); return ERR_CAST(inode); } if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) { inode->i_size = 0; inode->i_fop = &configfs_bin_file_operations; } else { inode->i_size = PAGE_SIZE; inode->i_fop = &configfs_file_operations; } goto done; } } spin_unlock(&configfs_dirent_lock); done: d_add(dentry, inode); return NULL; } /* * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are * attributes and are removed by rmdir(). We recurse, setting * CONFIGFS_USET_DROPPING on all children that are candidates for * default detach. * If there is an error, the caller will reset the flags via * configfs_detach_rollback(). */ static int configfs_detach_prep(struct dentry *dentry, struct dentry **wait) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; int ret; /* Mark that we're trying to drop the group */ parent_sd->s_type |= CONFIGFS_USET_DROPPING; ret = -EBUSY; if (parent_sd->s_links) goto out; ret = 0; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element || (sd->s_type & CONFIGFS_NOT_PINNED)) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { /* Abort if racing with mkdir() */ if (sd->s_type & CONFIGFS_USET_IN_MKDIR) { if (wait) *wait= dget(sd->s_dentry); return -EAGAIN; } /* * Yup, recursive. If there's a problem, blame * deep nesting of default_groups */ ret = configfs_detach_prep(sd->s_dentry, wait); if (!ret) continue; } else ret = -ENOTEMPTY; break; } out: return ret; } /* * Walk the tree, resetting CONFIGFS_USET_DROPPING wherever it was * set. */ static void configfs_detach_rollback(struct dentry *dentry) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; parent_sd->s_type &= ~CONFIGFS_USET_DROPPING; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) if (sd->s_type & CONFIGFS_USET_DEFAULT) configfs_detach_rollback(sd->s_dentry); } static void detach_attrs(struct config_item * item) { struct dentry * dentry = dget(item->ci_dentry); struct configfs_dirent * parent_sd; struct configfs_dirent * sd, * tmp; if (!dentry) return; pr_debug("configfs %s: dropping attrs for dir\n", dentry->d_name.name); parent_sd = dentry->d_fsdata; list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED)) continue; spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dentry); configfs_put(sd); } /** * Drop reference from dget() on entrance. */ dput(dentry); } static int populate_attrs(struct config_item *item) { const struct config_item_type *t = item->ci_type; struct configfs_group_operations *ops; struct configfs_attribute *attr; struct configfs_bin_attribute *bin_attr; int error = 0; int i; if (!t) return -EINVAL; ops = t->ct_group_ops; if (t->ct_attrs) { for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) { if (ops && ops->is_visible && !ops->is_visible(item, attr, i)) continue; if ((error = configfs_create_file(item, attr))) break; } } if (!error && t->ct_bin_attrs) { for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) { if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i)) continue; error = configfs_create_bin_file(item, bin_attr); if (error) break; } } if (error) detach_attrs(item); return error; } static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag); static void configfs_detach_group(struct config_item *item); static void detach_groups(struct config_group *group) { struct dentry * dentry = dget(group->cg_item.ci_dentry); struct dentry *child; struct configfs_dirent *parent_sd; struct configfs_dirent *sd, *tmp; if (!dentry) return; parent_sd = dentry->d_fsdata; list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & CONFIGFS_USET_DEFAULT)) continue; child = sd->s_dentry; inode_lock(d_inode(child)); configfs_detach_group(sd->s_element); d_inode(child)->i_flags |= S_DEAD; dont_mount(child); inode_unlock(d_inode(child)); d_delete(child); dput(child); } /** * Drop reference from dget() on entrance. */ dput(dentry); } /* * This fakes mkdir(2) on a default_groups[] entry. It * creates a dentry, attachs it, and then does fixup * on the sd->s_type. * * We could, perhaps, tweak our parent's ->mkdir for a minute and * try using vfs_mkdir. Just a thought. */ static int create_default_group(struct config_group *parent_group, struct config_group *group, struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; /* We trust the caller holds a reference to parent */ struct dentry *child, *parent = parent_group->cg_item.ci_dentry; if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; ret = -ENOMEM; child = d_alloc_name(parent, group->cg_item.ci_name); if (child) { d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, &group->cg_item, child, frag); if (!ret) { sd = child->d_fsdata; sd->s_type |= CONFIGFS_USET_DEFAULT; } else { BUG_ON(d_inode(child)); d_drop(child); dput(child); } } return ret; } static int populate_groups(struct config_group *group, struct configfs_fragment *frag) { struct config_group *new_group; int ret = 0; list_for_each_entry(new_group, &group->default_groups, group_entry) { ret = create_default_group(group, new_group, frag); if (ret) { detach_groups(group); break; } } return ret; } void configfs_remove_default_groups(struct config_group *group) { struct config_group *g, *n; list_for_each_entry_safe(g, n, &group->default_groups, group_entry) { list_del(&g->group_entry); config_item_put(&g->cg_item); } } EXPORT_SYMBOL(configfs_remove_default_groups); /* * All of link_obj/unlink_obj/link_group/unlink_group require that * subsys->su_mutex is held. */ static void unlink_obj(struct config_item *item) { struct config_group *group; group = item->ci_group; if (group) { list_del_init(&item->ci_entry); item->ci_group = NULL; item->ci_parent = NULL; /* Drop the reference for ci_entry */ config_item_put(item); /* Drop the reference for ci_parent */ config_group_put(group); } } static void link_obj(struct config_item *parent_item, struct config_item *item) { /* * Parent seems redundant with group, but it makes certain * traversals much nicer. */ item->ci_parent = parent_item; /* * We hold a reference on the parent for the child's ci_parent * link. */ item->ci_group = config_group_get(to_config_group(parent_item)); list_add_tail(&item->ci_entry, &item->ci_group->cg_children); /* * We hold a reference on the child for ci_entry on the parent's * cg_children */ config_item_get(item); } static void unlink_group(struct config_group *group) { struct config_group *new_group; list_for_each_entry(new_group, &group->default_groups, group_entry) unlink_group(new_group); group->cg_subsys = NULL; unlink_obj(&group->cg_item); } static void link_group(struct config_group *parent_group, struct config_group *group) { struct config_group *new_group; struct configfs_subsystem *subsys = NULL; /* gcc is a turd */ link_obj(&parent_group->cg_item, &group->cg_item); if (parent_group->cg_subsys) subsys = parent_group->cg_subsys; else if (configfs_is_root(&parent_group->cg_item)) subsys = to_configfs_subsystem(group); else BUG(); group->cg_subsys = subsys; list_for_each_entry(new_group, &group->default_groups, group_entry) link_group(group, new_group); } /* * The goal is that configfs_attach_item() (and * configfs_attach_group()) can be called from either the VFS or this * module. That is, they assume that the items have been created, * the dentry allocated, and the dcache is all ready to go. * * If they fail, they must clean up after themselves as if they * had never been called. The caller (VFS or local function) will * handle cleaning up the dcache bits. * * configfs_detach_group() and configfs_detach_item() behave similarly on * the way out. They assume that the proper semaphores are held, they * clean up the configfs items, and they expect their callers will * handle the dcache bits. */ static int configfs_attach_item(struct config_item *parent_item, struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag) { int ret; ret = configfs_create_dir(item, dentry, frag); if (!ret) { ret = populate_attrs(item); if (ret) { /* * We are going to remove an inode and its dentry but * the VFS may already have hit and used them. Thus, * we must lock them as rmdir() would. */ inode_lock(d_inode(dentry)); configfs_remove_dir(item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); inode_unlock(d_inode(dentry)); d_delete(dentry); } } return ret; } /* Caller holds the mutex of the item's inode */ static void configfs_detach_item(struct config_item *item) { detach_attrs(item); configfs_remove_dir(item); } static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; ret = configfs_attach_item(parent_item, item, dentry, frag); if (!ret) { sd = dentry->d_fsdata; sd->s_type |= CONFIGFS_USET_DIR; /* * FYI, we're faking mkdir in populate_groups() * We must lock the group's inode to avoid races with the VFS * which can already hit the inode and try to add/remove entries * under it. * * We must also lock the inode to remove it safely in case of * error, as rmdir() would. */ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); configfs_adjust_dir_dirent_depth_before_populate(sd); ret = populate_groups(to_config_group(item), frag); if (ret) { configfs_detach_item(item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); } configfs_adjust_dir_dirent_depth_after_populate(sd); inode_unlock(d_inode(dentry)); if (ret) d_delete(dentry); } return ret; } /* Caller holds the mutex of the group's inode */ static void configfs_detach_group(struct config_item *item) { detach_groups(to_config_group(item)); configfs_detach_item(item); } /* * After the item has been detached from the filesystem view, we are * ready to tear it out of the hierarchy. Notify the client before * we do that so they can perform any cleanup that requires * navigating the hierarchy. A client does not need to provide this * callback. The subsystem semaphore MUST be held by the caller, and * references must be valid for both items. It also assumes the * caller has validated ci_type. */ static void client_disconnect_notify(struct config_item *parent_item, struct config_item *item) { const struct config_item_type *type; type = parent_item->ci_type; BUG_ON(!type); if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) type->ct_group_ops->disconnect_notify(to_config_group(parent_item), item); } /* * Drop the initial reference from make_item()/make_group() * This function assumes that reference is held on item * and that item holds a valid reference to the parent. Also, it * assumes the caller has validated ci_type. */ static void client_drop_item(struct config_item *parent_item, struct config_item *item) { const struct config_item_type *type; type = parent_item->ci_type; BUG_ON(!type); /* * If ->drop_item() exists, it is responsible for the * config_item_put(). */ if (type->ct_group_ops && type->ct_group_ops->drop_item) type->ct_group_ops->drop_item(to_config_group(parent_item), item); else config_item_put(item); } #ifdef DEBUG static void configfs_dump_one(struct configfs_dirent *sd, int level) { pr_info("%*s\"%s\":\n", level, " ", configfs_get_name(sd)); #define type_print(_type) if (sd->s_type & _type) pr_info("%*s %s\n", level, " ", #_type) type_print(CONFIGFS_ROOT); type_print(CONFIGFS_DIR); type_print(CONFIGFS_ITEM_ATTR); type_print(CONFIGFS_ITEM_LINK); type_print(CONFIGFS_USET_DIR); type_print(CONFIGFS_USET_DEFAULT); type_print(CONFIGFS_USET_DROPPING); #undef type_print } static int configfs_dump(struct configfs_dirent *sd, int level) { struct configfs_dirent *child_sd; int ret = 0; configfs_dump_one(sd, level); if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT))) return 0; list_for_each_entry(child_sd, &sd->s_children, s_sibling) { ret = configfs_dump(child_sd, level + 2); if (ret) break; } return ret; } #endif /* * configfs_depend_item() and configfs_undepend_item() * * WARNING: Do not call these from a configfs callback! * * This describes these functions and their helpers. * * Allow another kernel system to depend on a config_item. If this * happens, the item cannot go away until the dependent can live without * it. The idea is to give client modules as simple an interface as * possible. When a system asks them to depend on an item, they just * call configfs_depend_item(). If the item is live and the client * driver is in good shape, we'll happily do the work for them. * * Why is the locking complex? Because configfs uses the VFS to handle * all locking, but this function is called outside the normal * VFS->configfs path. So it must take VFS locks to prevent the * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is * why you can't call these functions underneath configfs callbacks. * * Note, btw, that this can be called at *any* time, even when a configfs * subsystem isn't registered, or when configfs is loading or unloading. * Just like configfs_register_subsystem(). So we take the same * precautions. We pin the filesystem. We lock configfs_dirent_lock. * If we can find the target item in the * configfs tree, it must be part of the subsystem tree as well, so we * do not need the subsystem semaphore. Holding configfs_dirent_lock helps * locking out mkdir() and rmdir(), who might be racing us. */ /* * configfs_depend_prep() * * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are * attributes. This is similar but not the same to configfs_detach_prep(). * Note that configfs_detach_prep() expects the parent to be locked when it * is called, but we lock the parent *inside* configfs_depend_prep(). We * do that so we can unlock it if we find nothing. * * Here we do a depth-first search of the dentry hierarchy looking for * our object. * We deliberately ignore items tagged as dropping since they are virtually * dead, as well as items in the middle of attachment since they virtually * do not exist yet. This completes the locking out of racing mkdir() and * rmdir(). * Note: subdirectories in the middle of attachment start with s_type = * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of * s_type is in configfs_new_dirent(), which has configfs_dirent_lock. * * If the target is not found, -ENOENT is bubbled up. * * This adds a requirement that all config_items be unique! * * This is recursive. There isn't * much on the stack, though, so folks that need this function - be careful * about your stack! Patches will be accepted to make it iterative. */ static int configfs_depend_prep(struct dentry *origin, struct config_item *target) { struct configfs_dirent *child_sd, *sd; int ret = 0; BUG_ON(!origin || !origin->d_fsdata); sd = origin->d_fsdata; if (sd->s_element == target) /* Boo-yah */ goto out; list_for_each_entry(child_sd, &sd->s_children, s_sibling) { if ((child_sd->s_type & CONFIGFS_DIR) && !(child_sd->s_type & CONFIGFS_USET_DROPPING) && !(child_sd->s_type & CONFIGFS_USET_CREATING)) { ret = configfs_depend_prep(child_sd->s_dentry, target); if (!ret) goto out; /* Child path boo-yah */ } } /* We looped all our children and didn't find target */ ret = -ENOENT; out: return ret; } static int configfs_do_depend_item(struct dentry *subsys_dentry, struct config_item *target) { struct configfs_dirent *p; int ret; spin_lock(&configfs_dirent_lock); /* Scan the tree, return 0 if found */ ret = configfs_depend_prep(subsys_dentry, target); if (ret) goto out_unlock_dirent_lock; /* * We are sure that the item is not about to be removed by rmdir(), and * not in the middle of attachment by mkdir(). */ p = target->ci_dentry->d_fsdata; p->s_dependent_count += 1; out_unlock_dirent_lock: spin_unlock(&configfs_dirent_lock); return ret; } static inline struct configfs_dirent * configfs_find_subsys_dentry(struct configfs_dirent *root_sd, struct config_item *subsys_item) { struct configfs_dirent *p; struct configfs_dirent *ret = NULL; list_for_each_entry(p, &root_sd->s_children, s_sibling) { if (p->s_type & CONFIGFS_DIR && p->s_element == subsys_item) { ret = p; break; } } return ret; } int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target) { int ret; struct configfs_dirent *subsys_sd; struct config_item *s_item = &subsys->su_group.cg_item; struct dentry *root; /* * Pin the configfs filesystem. This means we can safely access * the root of the configfs filesystem. */ root = configfs_pin_fs(); if (IS_ERR(root)) return PTR_ERR(root); /* * Next, lock the root directory. We're going to check that the * subsystem is really registered, and so we need to lock out * configfs_[un]register_subsystem(). */ inode_lock(d_inode(root)); subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item); if (!subsys_sd) { ret = -ENOENT; goto out_unlock_fs; } /* Ok, now we can trust subsys/s_item */ ret = configfs_do_depend_item(subsys_sd->s_dentry, target); out_unlock_fs: inode_unlock(d_inode(root)); /* * If we succeeded, the fs is pinned via other methods. If not, * we're done with it anyway. So release_fs() is always right. */ configfs_release_fs(); return ret; } EXPORT_SYMBOL(configfs_depend_item); /* * Release the dependent linkage. This is much simpler than * configfs_depend_item() because we know that the client driver is * pinned, thus the subsystem is pinned, and therefore configfs is pinned. */ void configfs_undepend_item(struct config_item *target) { struct configfs_dirent *sd; /* * Since we can trust everything is pinned, we just need * configfs_dirent_lock. */ spin_lock(&configfs_dirent_lock); sd = target->ci_dentry->d_fsdata; BUG_ON(sd->s_dependent_count < 1); sd->s_dependent_count -= 1; /* * After this unlock, we cannot trust the item to stay alive! * DO NOT REFERENCE item after this unlock. */ spin_unlock(&configfs_dirent_lock); } EXPORT_SYMBOL(configfs_undepend_item); /* * caller_subsys is a caller's subsystem not target's. This is used to * determine if we should lock root and check subsys or not. When we are * in the same subsystem as our target there is no need to do locking as * we know that subsys is valid and is not unregistered during this function * as we are called from callback of one of his children and VFS holds a lock * on some inode. Otherwise we have to lock our root to ensure that target's * subsystem it is not unregistered during this function. */ int configfs_depend_item_unlocked(struct configfs_subsystem *caller_subsys, struct config_item *target) { struct configfs_subsystem *target_subsys; struct config_group *root, *parent; struct configfs_dirent *subsys_sd; int ret = -ENOENT; /* Disallow this function for configfs root */ if (configfs_is_root(target)) return -EINVAL; parent = target->ci_group; /* * This may happen when someone is trying to depend root * directory of some subsystem */ if (configfs_is_root(&parent->cg_item)) { target_subsys = to_configfs_subsystem(to_config_group(target)); root = parent; } else { target_subsys = parent->cg_subsys; /* Find a cofnigfs root as we may need it for locking */ for (root = parent; !configfs_is_root(&root->cg_item); root = root->cg_item.ci_group) ; } if (target_subsys != caller_subsys) { /* * We are in other configfs subsystem, so we have to do * additional locking to prevent other subsystem from being * unregistered */ inode_lock(d_inode(root->cg_item.ci_dentry)); /* * As we are trying to depend item from other subsystem * we have to check if this subsystem is still registered */ subsys_sd = configfs_find_subsys_dentry( root->cg_item.ci_dentry->d_fsdata, &target_subsys->su_group.cg_item); if (!subsys_sd) goto out_root_unlock; } else { subsys_sd = target_subsys->su_group.cg_item.ci_dentry->d_fsdata; } /* Now we can execute core of depend item */ ret = configfs_do_depend_item(subsys_sd->s_dentry, target); if (target_subsys != caller_subsys) out_root_unlock: /* * We were called from subsystem other than our target so we * took some locks so now it's time to release them */ inode_unlock(d_inode(root->cg_item.ci_dentry)); return ret; } EXPORT_SYMBOL(configfs_depend_item_unlocked); static struct dentry *configfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int ret = 0; int module_got = 0; struct config_group *group = NULL; struct config_item *item = NULL; struct config_item *parent_item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; const struct config_item_type *type; struct module *subsys_owner = NULL, *new_item_owner = NULL; struct configfs_fragment *frag; char *name; sd = dentry->d_parent->d_fsdata; /* * Fake invisibility if dir belongs to a group/default groups hierarchy * being attached */ if (!configfs_dirent_is_ready(sd)) { ret = -ENOENT; goto out; } if (!(sd->s_type & CONFIGFS_USET_DIR)) { ret = -EPERM; goto out; } frag = new_fragment(); if (!frag) { ret = -ENOMEM; goto out; } /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; subsys = to_config_group(parent_item)->cg_subsys; BUG_ON(!subsys); if (!type || !type->ct_group_ops || (!type->ct_group_ops->make_group && !type->ct_group_ops->make_item)) { ret = -EPERM; /* Lack-of-mkdir returns -EPERM */ goto out_put; } /* * The subsystem may belong to a different module than the item * being created. We don't want to safely pin the new item but * fail to pin the subsystem it sits under. */ if (!subsys->su_group.cg_item.ci_type) { ret = -EINVAL; goto out_put; } subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; if (!try_module_get(subsys_owner)) { ret = -EINVAL; goto out_put; } name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); if (!name) { ret = -ENOMEM; goto out_subsys_put; } snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); mutex_lock(&subsys->su_mutex); if (type->ct_group_ops->make_group) { group = type->ct_group_ops->make_group(to_config_group(parent_item), name); if (!group) group = ERR_PTR(-ENOMEM); if (!IS_ERR(group)) { link_group(to_config_group(parent_item), group); item = &group->cg_item; } else ret = PTR_ERR(group); } else { item = type->ct_group_ops->make_item(to_config_group(parent_item), name); if (!item) item = ERR_PTR(-ENOMEM); if (!IS_ERR(item)) link_obj(parent_item, item); else ret = PTR_ERR(item); } mutex_unlock(&subsys->su_mutex); kfree(name); if (ret) { /* * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ goto out_subsys_put; } /* * link_obj() has been called (via link_group() for groups). * From here on out, errors must clean that up. */ type = item->ci_type; if (!type) { ret = -EINVAL; goto out_unlink; } new_item_owner = type->ct_owner; if (!try_module_get(new_item_owner)) { ret = -EINVAL; goto out_unlink; } /* * I hate doing it this way, but if there is * an error, module_put() probably should * happen after any cleanup. */ module_got = 1; /* * Make racing rmdir() fail if it did not tag parent with * CONFIGFS_USET_DROPPING * Note: if CONFIGFS_USET_DROPPING is already set, attach_group() will * fail and let rmdir() terminate correctly */ spin_lock(&configfs_dirent_lock); /* This will make configfs_detach_prep() fail */ sd->s_type |= CONFIGFS_USET_IN_MKDIR; spin_unlock(&configfs_dirent_lock); if (group) ret = configfs_attach_group(parent_item, item, dentry, frag); else ret = configfs_attach_item(parent_item, item, dentry, frag); spin_lock(&configfs_dirent_lock); sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; if (!ret) configfs_dir_set_ready(dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); out_unlink: if (ret) { /* Tear down everything we built up */ mutex_lock(&subsys->su_mutex); client_disconnect_notify(parent_item, item); if (group) unlink_group(group); else unlink_obj(item); client_drop_item(parent_item, item); mutex_unlock(&subsys->su_mutex); if (module_got) module_put(new_item_owner); } out_subsys_put: if (ret) module_put(subsys_owner); out_put: /* * link_obj()/link_group() took a reference from child->parent, * so the parent is safely pinned. We can drop our working * reference. */ config_item_put(parent_item); put_fragment(frag); out: return ERR_PTR(ret); } static int configfs_rmdir(struct inode *dir, struct dentry *dentry) { struct config_item *parent_item; struct config_item *item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; struct configfs_fragment *frag; struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; sd = dentry->d_fsdata; if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; BUG_ON(!subsys); if (!parent_item->ci_type) { config_item_put(parent_item); return -EINVAL; } /* configfs_mkdir() shouldn't have allowed this */ BUG_ON(!subsys->su_group.cg_item.ci_type); subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; /* * Ensure that no racing symlink() will make detach_prep() fail while * the new link is temporarily attached */ do { struct dentry *wait; mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); /* * Here's where we check for dependents. We're protected by * configfs_dirent_lock. * If no dependent, atomically tag the item as dropping. */ ret = sd->s_dependent_count ? -EBUSY : 0; if (!ret) { ret = configfs_detach_prep(dentry, &wait); if (ret) configfs_detach_rollback(dentry); } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); if (ret) { if (ret != -EAGAIN) { config_item_put(parent_item); return ret; } /* Wait until the racing operation terminates */ inode_lock(d_inode(wait)); inode_unlock(d_inode(wait)); dput(wait); } } while (ret == -EAGAIN); frag = sd->s_frag; if (down_write_killable(&frag->frag_sem)) { spin_lock(&configfs_dirent_lock); configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); config_item_put(parent_item); return -EINTR; } frag->frag_dead = true; up_write(&frag->frag_sem); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); /* Drop reference from above, item already holds one. */ config_item_put(parent_item); if (item->ci_type) dead_item_owner = item->ci_type->ct_owner; if (sd->s_type & CONFIGFS_USET_DIR) { configfs_detach_group(item); mutex_lock(&subsys->su_mutex); client_disconnect_notify(parent_item, item); unlink_group(to_config_group(item)); } else { configfs_detach_item(item); mutex_lock(&subsys->su_mutex); client_disconnect_notify(parent_item, item); unlink_obj(item); } client_drop_item(parent_item, item); mutex_unlock(&subsys->su_mutex); /* Drop our reference from above */ config_item_put(item); module_put(dead_item_owner); module_put(subsys_owner); return 0; } const struct inode_operations configfs_dir_inode_operations = { .mkdir = configfs_mkdir, .rmdir = configfs_rmdir, .symlink = configfs_symlink, .unlink = configfs_unlink, .lookup = configfs_lookup, .setattr = configfs_setattr, }; const struct inode_operations configfs_root_inode_operations = { .lookup = configfs_lookup, .setattr = configfs_setattr, }; static int configfs_dir_open(struct inode *inode, struct file *file) { struct dentry * dentry = file->f_path.dentry; struct configfs_dirent * parent_sd = dentry->d_fsdata; int err; inode_lock(d_inode(dentry)); /* * Fake invisibility if dir belongs to a group/default groups hierarchy * being attached */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { file->private_data = configfs_new_dirent(parent_sd, NULL, 0, NULL); err = PTR_ERR_OR_ZERO(file->private_data); } inode_unlock(d_inode(dentry)); return err; } static int configfs_dir_close(struct inode *inode, struct file *file) { struct dentry * dentry = file->f_path.dentry; struct configfs_dirent * cursor = file->private_data; inode_lock(d_inode(dentry)); spin_lock(&configfs_dirent_lock); list_del_init(&cursor->s_sibling); spin_unlock(&configfs_dirent_lock); inode_unlock(d_inode(dentry)); release_configfs_dirent(cursor); return 0; } static int configfs_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct super_block *sb = dentry->d_sb; struct configfs_dirent * parent_sd = dentry->d_fsdata; struct configfs_dirent *cursor = file->private_data; struct list_head *p, *q = &cursor->s_sibling; ino_t ino = 0; if (!dir_emit_dots(file, ctx)) return 0; spin_lock(&configfs_dirent_lock); if (ctx->pos == 2) list_move(q, &parent_sd->s_children); for (p = q->next; p != &parent_sd->s_children; p = p->next) { struct configfs_dirent *next; const char *name; int len; struct inode *inode = NULL; next = list_entry(p, struct configfs_dirent, s_sibling); if (!next->s_element) continue; /* * We'll have a dentry and an inode for * PINNED items and for open attribute * files. We lock here to prevent a race * with configfs_d_iput() clearing * s_dentry before calling iput(). * * Why do we go to the trouble? If * someone has an attribute file open, * the inode number should match until * they close it. Beyond that, we don't * care. */ dentry = next->s_dentry; if (dentry) inode = d_inode(dentry); if (inode) ino = inode->i_ino; spin_unlock(&configfs_dirent_lock); if (!inode) ino = iunique(sb, 2); name = configfs_get_name(next); len = strlen(name); if (!dir_emit(ctx, name, len, ino, fs_umode_to_dtype(next->s_mode))) return 0; spin_lock(&configfs_dirent_lock); list_move(q, p); p = q; ctx->pos++; } spin_unlock(&configfs_dirent_lock); return 0; } static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry * dentry = file->f_path.dentry; switch (whence) { case 1: offset += file->f_pos; fallthrough; case 0: if (offset >= 0) break; fallthrough; default: return -EINVAL; } if (offset != file->f_pos) { file->f_pos = offset; if (file->f_pos >= 2) { struct configfs_dirent *sd = dentry->d_fsdata; struct configfs_dirent *cursor = file->private_data; struct list_head *p; loff_t n = file->f_pos - 2; spin_lock(&configfs_dirent_lock); list_del(&cursor->s_sibling); p = sd->s_children.next; while (n && p != &sd->s_children) { struct configfs_dirent *next; next = list_entry(p, struct configfs_dirent, s_sibling); if (next->s_element) n--; p = p->next; } list_add_tail(&cursor->s_sibling, p); spin_unlock(&configfs_dirent_lock); } } return offset; } const struct file_operations configfs_dir_operations = { .open = configfs_dir_open, .release = configfs_dir_close, .llseek = configfs_dir_lseek, .read = generic_read_dir, .iterate_shared = configfs_readdir, }; /** * configfs_register_group - creates a parent-child relation between two groups * @parent_group: parent group * @group: child group * * link groups, creates dentry for the child and attaches it to the * parent dentry. * * Return: 0 on success, negative errno code on error */ int configfs_register_group(struct config_group *parent_group, struct config_group *group) { struct configfs_subsystem *subsys = parent_group->cg_subsys; struct dentry *parent; struct configfs_fragment *frag; int ret; frag = new_fragment(); if (!frag) return -ENOMEM; mutex_lock(&subsys->su_mutex); link_group(parent_group, group); mutex_unlock(&subsys->su_mutex); parent = parent_group->cg_item.ci_dentry; inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); ret = create_default_group(parent_group, group, frag); if (ret) goto err_out; spin_lock(&configfs_dirent_lock); configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); inode_unlock(d_inode(parent)); put_fragment(frag); return 0; err_out: inode_unlock(d_inode(parent)); mutex_lock(&subsys->su_mutex); unlink_group(group); mutex_unlock(&subsys->su_mutex); put_fragment(frag); return ret; } EXPORT_SYMBOL(configfs_register_group); /** * configfs_unregister_group() - unregisters a child group from its parent * @group: parent group to be unregistered * * Undoes configfs_register_group() */ void configfs_unregister_group(struct config_group *group) { struct configfs_subsystem *subsys = group->cg_subsys; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; struct configfs_dirent *sd = dentry->d_fsdata; struct configfs_fragment *frag = sd->s_frag; down_write(&frag->frag_sem); frag->frag_dead = true; up_write(&frag->frag_sem); inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); configfs_detach_prep(dentry, NULL); spin_unlock(&configfs_dirent_lock); configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); d_drop(dentry); fsnotify_rmdir(d_inode(parent), dentry); inode_unlock(d_inode(parent)); dput(dentry); mutex_lock(&subsys->su_mutex); unlink_group(group); mutex_unlock(&subsys->su_mutex); } EXPORT_SYMBOL(configfs_unregister_group); /** * configfs_register_default_group() - allocates and registers a child group * @parent_group: parent group * @name: child group name * @item_type: child item type description * * boilerplate to allocate and register a child group with its parent. We need * kzalloc'ed memory because child's default_group is initially empty. * * Return: allocated config group or ERR_PTR() on error */ struct config_group * configfs_register_default_group(struct config_group *parent_group, const char *name, const struct config_item_type *item_type) { int ret; struct config_group *group; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); config_group_init_type_name(group, name, item_type); ret = configfs_register_group(parent_group, group); if (ret) { kfree(group); return ERR_PTR(ret); } return group; } EXPORT_SYMBOL(configfs_register_default_group); /** * configfs_unregister_default_group() - unregisters and frees a child group * @group: the group to act on */ void configfs_unregister_default_group(struct config_group *group) { configfs_unregister_group(group); kfree(group); } EXPORT_SYMBOL(configfs_unregister_default_group); int configfs_register_subsystem(struct configfs_subsystem *subsys) { int err; struct config_group *group = &subsys->su_group; struct dentry *dentry; struct dentry *root; struct configfs_dirent *sd; struct configfs_fragment *frag; frag = new_fragment(); if (!frag) return -ENOMEM; root = configfs_pin_fs(); if (IS_ERR(root)) { put_fragment(frag); return PTR_ERR(root); } if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; sd = root->d_fsdata; mutex_lock(&configfs_subsystem_mutex); link_group(to_config_group(sd->s_element), group); mutex_unlock(&configfs_subsystem_mutex); inode_lock_nested(d_inode(root), I_MUTEX_PARENT); err = -ENOMEM; dentry = d_alloc_name(root, group->cg_item.ci_name); if (dentry) { d_add(dentry, NULL); err = configfs_dirent_exists(dentry); if (!err) err = configfs_attach_group(sd->s_element, &group->cg_item, dentry, frag); if (err) { BUG_ON(d_inode(dentry)); d_drop(dentry); dput(dentry); } else { spin_lock(&configfs_dirent_lock); configfs_dir_set_ready(dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); } } inode_unlock(d_inode(root)); if (err) { mutex_lock(&configfs_subsystem_mutex); unlink_group(group); mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } put_fragment(frag); return err; } void configfs_unregister_subsystem(struct configfs_subsystem *subsys) { struct config_group *group = &subsys->su_group; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *root = dentry->d_sb->s_root; struct configfs_dirent *sd = dentry->d_fsdata; struct configfs_fragment *frag = sd->s_frag; if (dentry->d_parent != root) { pr_err("Tried to unregister non-subsystem!\n"); return; } down_write(&frag->frag_sem); frag->frag_dead = true; up_write(&frag->frag_sem); inode_lock_nested(d_inode(root), I_MUTEX_PARENT); inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); if (configfs_detach_prep(dentry, NULL)) { pr_err("Tried to unregister non-empty subsystem!\n"); } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); inode_unlock(d_inode(dentry)); d_drop(dentry); fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(root)); dput(dentry); mutex_lock(&configfs_subsystem_mutex); unlink_group(group); mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } EXPORT_SYMBOL(configfs_register_subsystem); EXPORT_SYMBOL(configfs_unregister_subsystem);
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 // SPDX-License-Identifier: GPL-2.0+ /* * fl512.c * Anders Gnistrup <ex18@kalman.iau.dtu.dk> * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 2000 David A. Schleef <ds@schleef.org> */ /* * Driver: fl512 * Description: unknown * Author: Anders Gnistrup <ex18@kalman.iau.dtu.dk> * Devices: [unknown] FL512 (fl512) * Status: unknown * * Digital I/O is not supported. * * Configuration options: * [0] - I/O port base address */ #include <linux/module.h> #include <linux/comedi/comedidev.h> #include <linux/delay.h> /* * Register I/O map */ #define FL512_AI_LSB_REG 0x02 #define FL512_AI_MSB_REG 0x03 #define FL512_AI_MUX_REG 0x02 #define FL512_AI_START_CONV_REG 0x03 #define FL512_AO_DATA_REG(x) (0x04 + ((x) * 2)) #define FL512_AO_TRIG_REG(x) (0x04 + ((x) * 2)) static const struct comedi_lrange range_fl512 = { 4, { BIP_RANGE(0.5), BIP_RANGE(1), BIP_RANGE(5), BIP_RANGE(10), UNI_RANGE(1), UNI_RANGE(5), UNI_RANGE(10) } }; static int fl512_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val; int i; outb(chan, dev->iobase + FL512_AI_MUX_REG); for (i = 0; i < insn->n; i++) { outb(0, dev->iobase + FL512_AI_START_CONV_REG); /* XXX should test "done" flag instead of delay */ usleep_range(30, 100); val = inb(dev->iobase + FL512_AI_LSB_REG); val |= (inb(dev->iobase + FL512_AI_MSB_REG) << 8); val &= s->maxdata; data[i] = val; } return insn->n; } static int fl512_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val = s->readback[chan]; int i; for (i = 0; i < insn->n; i++) { val = data[i]; /* write LSB, MSB then trigger conversion */ outb(val & 0x0ff, dev->iobase + FL512_AO_DATA_REG(chan)); outb((val >> 8) & 0xf, dev->iobase + FL512_AO_DATA_REG(chan)); inb(dev->iobase + FL512_AO_TRIG_REG(chan)); } s->readback[chan] = val; return insn->n; } static int fl512_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct comedi_subdevice *s; int ret; ret = comedi_request_region(dev, it->options[0], 0x10); if (ret) return ret; ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; /* Analog Input subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND; s->n_chan = 16; s->maxdata = 0x0fff; s->range_table = &range_fl512; s->insn_read = fl512_ai_insn_read; /* Analog Output subdevice */ s = &dev->subdevices[1]; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE; s->n_chan = 2; s->maxdata = 0x0fff; s->range_table = &range_fl512; s->insn_write = fl512_ao_insn_write; return comedi_alloc_subdev_readback(s); } static struct comedi_driver fl512_driver = { .driver_name = "fl512", .module = THIS_MODULE, .attach = fl512_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(fl512_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi low-level driver"); MODULE_LICENSE("GPL");
11 10 4 4 6 4 4 11 1 11 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 // SPDX-License-Identifier: GPL-2.0-only /* * vMTRR implementation * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * Copyright(C) 2015 Intel Corporation. * * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> * Marcelo Tosatti <mtosatti@redhat.com> * Paolo Bonzini <pbonzini@redhat.com> * Xiao Guangrong <guangrong.xiao@linux.intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <asm/mtrr.h> #include "cpuid.h" #include "x86.h" static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr) { int index; switch (msr) { case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1): index = msr - MTRRphysBase_MSR(0); return &vcpu->arch.mtrr_state.var[index]; case MSR_MTRRfix64K_00000: return &vcpu->arch.mtrr_state.fixed_64k; case MSR_MTRRfix16K_80000: case MSR_MTRRfix16K_A0000: index = msr - MSR_MTRRfix16K_80000; return &vcpu->arch.mtrr_state.fixed_16k[index]; case MSR_MTRRfix4K_C0000: case MSR_MTRRfix4K_C8000: case MSR_MTRRfix4K_D0000: case MSR_MTRRfix4K_D8000: case MSR_MTRRfix4K_E0000: case MSR_MTRRfix4K_E8000: case MSR_MTRRfix4K_F0000: case MSR_MTRRfix4K_F8000: index = msr - MSR_MTRRfix4K_C0000; return &vcpu->arch.mtrr_state.fixed_4k[index]; case MSR_MTRRdefType: return &vcpu->arch.mtrr_state.deftype; default: break; } return NULL; } static bool valid_mtrr_type(unsigned t) { return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ } static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) { int i; u64 mask; if (msr == MSR_MTRRdefType) { if (data & ~0xcff) return false; return valid_mtrr_type(data & 0xff); } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { for (i = 0; i < 8 ; i++) if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) return false; return true; } /* variable MTRRs */ if (WARN_ON_ONCE(!(msr >= MTRRphysBase_MSR(0) && msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1)))) return false; mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu); if ((msr & 1) == 0) { /* MTRR base */ if (!valid_mtrr_type(data & 0xff)) return false; mask |= 0xf00; } else { /* MTRR mask */ mask |= 0x7ff; } return (data & mask) == 0; } int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *mtrr; mtrr = find_mtrr(vcpu, msr); if (!mtrr) return 1; if (!kvm_mtrr_valid(vcpu, msr, data)) return 1; *mtrr = data; return 0; } int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 *mtrr; /* MSR_MTRRcap is a readonly MSR. */ if (msr == MSR_MTRRcap) { /* * SMRR = 0 * WC = 1 * FIX = 1 * VCNT = KVM_NR_VAR_MTRR */ *pdata = 0x500 | KVM_NR_VAR_MTRR; return 0; } mtrr = find_mtrr(vcpu, msr); if (!mtrr) return 1; *pdata = *mtrr; return 0; }
1 1 1 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 // SPDX-License-Identifier: GPL-2.0-only /* * Generic netlink handshake service * * Author: Chuck Lever <chuck.lever@oracle.com> * * Copyright (c) 2023, Oracle and/or its affiliates. */ #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/mm.h> #include <net/sock.h> #include <net/genetlink.h> #include <net/netns/generic.h> #include <kunit/visibility.h> #include <uapi/linux/handshake.h> #include "handshake.h" #include "genl.h" #include <trace/events/handshake.h> /** * handshake_genl_notify - Notify handlers that a request is waiting * @net: target network namespace * @proto: handshake protocol * @flags: memory allocation control flags * * Returns zero on success or a negative errno if notification failed. */ int handshake_genl_notify(struct net *net, const struct handshake_proto *proto, gfp_t flags) { struct sk_buff *msg; void *hdr; /* Disable notifications during unit testing */ if (!test_bit(HANDSHAKE_F_PROTO_NOTIFY, &proto->hp_flags)) return 0; if (!genl_has_listeners(&handshake_nl_family, net, proto->hp_handler_class)) return -ESRCH; msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, flags); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &handshake_nl_family, 0, HANDSHAKE_CMD_READY); if (!hdr) goto out_free; if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, proto->hp_handler_class) < 0) { genlmsg_cancel(msg, hdr); goto out_free; } genlmsg_end(msg, hdr); return genlmsg_multicast_netns(&handshake_nl_family, net, msg, 0, proto->hp_handler_class, flags); out_free: nlmsg_free(msg); return -EMSGSIZE; } /** * handshake_genl_put - Create a generic netlink message header * @msg: buffer in which to create the header * @info: generic netlink message context * * Returns a ready-to-use header, or NULL. */ struct nlmsghdr *handshake_genl_put(struct sk_buff *msg, struct genl_info *info) { return genlmsg_put(msg, info->snd_portid, info->snd_seq, &handshake_nl_family, 0, info->genlhdr->cmd); } EXPORT_SYMBOL(handshake_genl_put); int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct handshake_net *hn = handshake_pernet(net); struct handshake_req *req = NULL; struct socket *sock; int class, fd, err; err = -EOPNOTSUPP; if (!hn) goto out_status; err = -EINVAL; if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_ACCEPT_HANDLER_CLASS)) goto out_status; class = nla_get_u32(info->attrs[HANDSHAKE_A_ACCEPT_HANDLER_CLASS]); err = -EAGAIN; req = handshake_req_next(hn, class); if (!req) goto out_status; sock = req->hr_sk->sk_socket; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { err = fd; goto out_complete; } err = req->hr_proto->hp_accept(req, info, fd); if (err) { put_unused_fd(fd); goto out_complete; } fd_install(fd, get_file(sock->file)); trace_handshake_cmd_accept(net, req, req->hr_sk, fd); return 0; out_complete: handshake_complete(req, -EIO, NULL); out_status: trace_handshake_cmd_accept_err(net, req, NULL, err); return err; } int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct handshake_req *req; struct socket *sock; int fd, status, err; if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD)) return -EINVAL; fd = nla_get_s32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); sock = sockfd_lookup(fd, &err); if (!sock) return err; req = handshake_req_hash_lookup(sock->sk); if (!req) { err = -EBUSY; trace_handshake_cmd_done_err(net, req, sock->sk, err); sockfd_put(sock); return err; } trace_handshake_cmd_done(net, req, sock->sk, fd); status = -EIO; if (info->attrs[HANDSHAKE_A_DONE_STATUS]) status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); handshake_complete(req, status, info); sockfd_put(sock); return 0; } static unsigned int handshake_net_id; static int __net_init handshake_net_init(struct net *net) { struct handshake_net *hn = net_generic(net, handshake_net_id); unsigned long tmp; struct sysinfo si; /* * Arbitrary limit to prevent handshakes that do not make * progress from clogging up the system. The cap scales up * with the amount of physical memory on the system. */ si_meminfo(&si); tmp = si.totalram / (25 * si.mem_unit); hn->hn_pending_max = clamp(tmp, 3UL, 50UL); spin_lock_init(&hn->hn_lock); hn->hn_pending = 0; hn->hn_flags = 0; INIT_LIST_HEAD(&hn->hn_requests); return 0; } static void __net_exit handshake_net_exit(struct net *net) { struct handshake_net *hn = net_generic(net, handshake_net_id); struct handshake_req *req; LIST_HEAD(requests); /* * Drain the net's pending list. Requests that have been * accepted and are in progress will be destroyed when * the socket is closed. */ spin_lock(&hn->hn_lock); set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); list_splice_init(&requests, &hn->hn_requests); spin_unlock(&hn->hn_lock); while (!list_empty(&requests)) { req = list_first_entry(&requests, struct handshake_req, hr_list); list_del(&req->hr_list); /* * Requests on this list have not yet been * accepted, so they do not have an fd to put. */ handshake_complete(req, -ETIMEDOUT, NULL); } } static struct pernet_operations handshake_genl_net_ops = { .init = handshake_net_init, .exit = handshake_net_exit, .id = &handshake_net_id, .size = sizeof(struct handshake_net), }; /** * handshake_pernet - Get the handshake private per-net structure * @net: network namespace * * Returns a pointer to the net's private per-net structure for the * handshake module, or NULL if handshake_init() failed. */ struct handshake_net *handshake_pernet(struct net *net) { return handshake_net_id ? net_generic(net, handshake_net_id) : NULL; } EXPORT_SYMBOL_IF_KUNIT(handshake_pernet); static int __init handshake_init(void) { int ret; ret = handshake_req_hash_init(); if (ret) { pr_warn("handshake: hash initialization failed (%d)\n", ret); return ret; } ret = genl_register_family(&handshake_nl_family); if (ret) { pr_warn("handshake: netlink registration failed (%d)\n", ret); handshake_req_hash_destroy(); return ret; } /* * ORDER: register_pernet_subsys must be done last. * * If initialization does not make it past pernet_subsys * registration, then handshake_net_id will remain 0. That * shunts the handshake consumer API to return ENOTSUPP * to prevent it from dereferencing something that hasn't * been allocated. */ ret = register_pernet_subsys(&handshake_genl_net_ops); if (ret) { pr_warn("handshake: pernet registration failed (%d)\n", ret); genl_unregister_family(&handshake_nl_family); handshake_req_hash_destroy(); } return ret; } static void __exit handshake_exit(void) { unregister_pernet_subsys(&handshake_genl_net_ops); handshake_net_id = 0; handshake_req_hash_destroy(); genl_unregister_family(&handshake_nl_family); } module_init(handshake_init); module_exit(handshake_exit);
15 16 16 15 16 16 15 16 16 16 16 16 16 16 16 98 98 97 98 97 11 4 16 16 16 16 11 4 16 16 16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 /* * Rusty Russell (C)2000 -- This code is GPL. * Patrick McHardy (c) 2006-2012 */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/protocol.h> #include <net/netfilter/nf_queue.h> #include <net/dst.h> #include "nf_internals.h" static const struct nf_queue_handler __rcu *nf_queue_handler; /* * Hook for nfnetlink_queue to register its queue handler. * We do this so that most of the NFQUEUE code can be modular. * * Once the queue is registered it must reinject all packets it * receives, no matter what. */ void nf_register_queue_handler(const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ WARN_ON(rcu_access_pointer(nf_queue_handler)); rcu_assign_pointer(nf_queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ void nf_unregister_queue_handler(void) { RCU_INIT_POINTER(nf_queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); static void nf_queue_sock_put(struct sock *sk) { #ifdef CONFIG_INET sock_gen_put(sk); #else sock_put(sk); #endif } static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ dev_put(state->in); dev_put(state->out); if (state->sk) nf_queue_sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_put(entry->physin); dev_put(entry->physout); #endif } void nf_queue_entry_free(struct nf_queue_entry *entry) { nf_queue_entry_release_refs(entry); kfree(entry); } EXPORT_SYMBOL_GPL(nf_queue_entry_free); static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct sk_buff *skb = entry->skb; if (nf_bridge_info_exists(skb)) { entry->physin = nf_bridge_get_physindev(skb, entry->state.net); entry->physout = nf_bridge_get_physoutdev(skb); } else { entry->physin = NULL; entry->physout = NULL; } #endif } /* Bump dev refs so they don't vanish while packet is out */ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) return false; dev_hold(state->in); dev_hold(state->out); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_hold(entry->physin); dev_hold(entry->physout); #endif return true; } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); void nf_queue_nf_hook_drop(struct net *net) { const struct nf_queue_handler *qh; rcu_read_lock(); qh = rcu_dereference(nf_queue_handler); if (qh) qh->nf_hook_drop(net); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_queue_entry *entry) { struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); rt_info->tos = iph->tos; rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; rt_info->mark = skb->mark; } } static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; rt_info->mark = skb->mark; } } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, unsigned int index, unsigned int queuenum) { struct nf_queue_entry *entry = NULL; const struct nf_queue_handler *qh; unsigned int route_key_size; int status; /* QUEUE == DROP if no one is waiting, to be safe. */ qh = rcu_dereference(nf_queue_handler); if (!qh) return -ESRCH; switch (state->pf) { case AF_INET: route_key_size = sizeof(struct ip_rt_info); break; case AF_INET6: route_key_size = sizeof(struct ip6_rt_info); break; default: route_key_size = 0; break; } if (skb_sk_is_prefetched(skb)) { struct sock *sk = skb->sk; if (!sk_is_refcounted(sk)) { if (!refcount_inc_not_zero(&sk->sk_refcnt)) return -ENOTCONN; /* drop refcount on skb_orphan */ skb->destructor = sock_edemux; } } entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); if (!entry) return -ENOMEM; if (skb_dst(skb) && !skb_dst_force(skb)) { kfree(entry); return -ENETDOWN; } *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, .hook_index = index, .size = sizeof(*entry) + route_key_size, }; __nf_queue_entry_init_physdevs(entry); if (!nf_queue_entry_get_refs(entry)) { kfree(entry); return -ENOTCONN; } switch (entry->state.pf) { case AF_INET: nf_ip_saveroute(skb, entry); break; case AF_INET6: nf_ip6_saveroute(skb, entry); break; } status = qh->outfn(entry, queuenum); if (status < 0) { nf_queue_entry_free(entry); return status; } return 0; } /* Packets leaving via this function must come back through nf_reinject(). */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, unsigned int index, unsigned int verdict) { int ret; ret = __nf_queue(skb, state, index, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) return 1; kfree_skb(skb); } return 0; } EXPORT_SYMBOL_GPL(nf_queue);
1 1 1 8 4 3 1 20 17 10 2 10 3 4 4 4 2 2 1 2 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Facebook #include <linux/debugfs.h> #include <linux/random.h> #include <net/netdev_queues.h> #include "netdevsim.h" static void nsim_get_pause_stats(struct net_device *dev, struct ethtool_pause_stats *pause_stats) { struct netdevsim *ns = netdev_priv(dev); if (ns->ethtool.pauseparam.report_stats_rx) pause_stats->rx_pause_frames = 1; if (ns->ethtool.pauseparam.report_stats_tx) pause_stats->tx_pause_frames = 2; } static void nsim_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct netdevsim *ns = netdev_priv(dev); pause->autoneg = 0; /* We don't support ksettings, so can't pretend */ pause->rx_pause = ns->ethtool.pauseparam.rx; pause->tx_pause = ns->ethtool.pauseparam.tx; } static int nsim_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct netdevsim *ns = netdev_priv(dev); if (pause->autoneg) return -EINVAL; ns->ethtool.pauseparam.rx = pause->rx_pause; ns->ethtool.pauseparam.tx = pause->tx_pause; return 0; } static int nsim_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); memcpy(coal, &ns->ethtool.coalesce, sizeof(ns->ethtool.coalesce)); return 0; } static int nsim_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); memcpy(&ns->ethtool.coalesce, coal, sizeof(ns->ethtool.coalesce)); return 0; } static void nsim_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } static int nsim_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); ns->ethtool.ring.rx_pending = ring->rx_pending; ns->ethtool.ring.rx_jumbo_pending = ring->rx_jumbo_pending; ns->ethtool.ring.rx_mini_pending = ring->rx_mini_pending; ns->ethtool.ring.tx_pending = ring->tx_pending; return 0; } static void nsim_get_channels(struct net_device *dev, struct ethtool_channels *ch) { struct netdevsim *ns = netdev_priv(dev); ch->max_combined = ns->nsim_bus_dev->num_queues; ch->combined_count = ns->ethtool.channels; } static void nsim_wake_queues(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); struct netdevsim *peer; synchronize_net(); netif_tx_wake_all_queues(dev); rcu_read_lock(); peer = rcu_dereference(ns->peer); if (peer) netif_tx_wake_all_queues(peer->netdev); rcu_read_unlock(); } static int nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct netdevsim *ns = netdev_priv(dev); int err; err = netif_set_real_num_queues(dev, ch->combined_count, ch->combined_count); if (err) return err; ns->ethtool.channels = ch->combined_count; /* Only wake up queues if devices are linked */ if (rcu_access_pointer(ns->peer)) nsim_wake_queues(dev); return 0; } static int nsim_get_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) { struct netdevsim *ns = netdev_priv(dev); if (ns->ethtool.get_err) return -ns->ethtool.get_err; memcpy(fecparam, &ns->ethtool.fec, sizeof(ns->ethtool.fec)); return 0; } static int nsim_set_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) { struct netdevsim *ns = netdev_priv(dev); u32 fec; if (ns->ethtool.set_err) return -ns->ethtool.set_err; memcpy(&ns->ethtool.fec, fecparam, sizeof(ns->ethtool.fec)); fec = fecparam->fec; if (fec == ETHTOOL_FEC_AUTO) fec |= ETHTOOL_FEC_OFF; fec |= ETHTOOL_FEC_NONE; ns->ethtool.fec.active_fec = 1 << (fls(fec) - 1); return 0; } static void nsim_get_fec_stats(struct net_device *dev, struct ethtool_fec_stats *fec_stats) { fec_stats->corrected_blocks.total = 123; fec_stats->uncorrectable_blocks.total = 4; } static int nsim_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct netdevsim *ns = netdev_priv(dev); info->phc_index = mock_phc_index(ns->phc); return 0; } static const struct ethtool_ops nsim_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | ETHTOOL_RING_USE_HDS_THRS, .get_pause_stats = nsim_get_pause_stats, .get_pauseparam = nsim_get_pauseparam, .set_pauseparam = nsim_set_pauseparam, .set_coalesce = nsim_set_coalesce, .get_coalesce = nsim_get_coalesce, .get_ringparam = nsim_get_ringparam, .set_ringparam = nsim_set_ringparam, .get_channels = nsim_get_channels, .set_channels = nsim_set_channels, .get_fecparam = nsim_get_fecparam, .set_fecparam = nsim_set_fecparam, .get_fec_stats = nsim_get_fec_stats, .get_ts_info = nsim_get_ts_info, }; static void nsim_ethtool_ring_init(struct netdevsim *ns) { ns->ethtool.ring.rx_pending = 512; ns->ethtool.ring.rx_max_pending = 4096; ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_pending = 512; ns->ethtool.ring.tx_max_pending = 4096; } void nsim_ethtool_init(struct netdevsim *ns) { struct dentry *ethtool, *dir; ns->netdev->ethtool_ops = &nsim_ethtool_ops; nsim_ethtool_ring_init(ns); ns->ethtool.pauseparam.report_stats_rx = true; ns->ethtool.pauseparam.report_stats_tx = true; ns->ethtool.fec.fec = ETHTOOL_FEC_NONE; ns->ethtool.fec.active_fec = ETHTOOL_FEC_NONE; ns->ethtool.channels = ns->nsim_bus_dev->num_queues; ethtool = debugfs_create_dir("ethtool", ns->nsim_dev_port->ddir); debugfs_create_u32("get_err", 0600, ethtool, &ns->ethtool.get_err); debugfs_create_u32("set_err", 0600, ethtool, &ns->ethtool.set_err); dir = debugfs_create_dir("pause", ethtool); debugfs_create_bool("report_stats_rx", 0600, dir, &ns->ethtool.pauseparam.report_stats_rx); debugfs_create_bool("report_stats_tx", 0600, dir, &ns->ethtool.pauseparam.report_stats_tx); dir = debugfs_create_dir("ring", ethtool); debugfs_create_u32("rx_max_pending", 0600, dir, &ns->ethtool.ring.rx_max_pending); debugfs_create_u32("rx_jumbo_max_pending", 0600, dir, &ns->ethtool.ring.rx_jumbo_max_pending); debugfs_create_u32("rx_mini_max_pending", 0600, dir, &ns->ethtool.ring.rx_mini_max_pending); debugfs_create_u32("tx_max_pending", 0600, dir, &ns->ethtool.ring.tx_max_pending); }
1 1 8 5 1 1 1 6 1 2 1 1 2 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_redirect.h> #include <net/netfilter/nf_tables.h> struct nft_redir { u8 sreg_proto_min; u8 sreg_proto_max; u16 flags; }; static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { [NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 }, [NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 }, [NFTA_REDIR_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK), }; static int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { int err; err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)); } static int nft_redir_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_redir *priv = nft_expr_priv(expr); unsigned int plen; int err; plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) return err; } else { priv->sreg_proto_max = priv->sreg_proto_min; } priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[NFTA_REDIR_FLAGS]) priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS])); return nf_ct_netns_get(ctx->net, ctx->family); } static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_redir *priv = nft_expr_priv(expr); if (priv->sreg_proto_min) { if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN, priv->sreg_proto_min)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX, priv->sreg_proto_max)) goto nla_put_failure; } if (priv->flags != 0 && nla_put_be32(skb, NFTA_REDIR_FLAGS, htonl(priv->flags))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static void nft_redir_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_redir *priv = nft_expr_priv(expr); struct nf_nat_range2 range; memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { range.min_proto.all = (__force __be16) nft_reg_load16(&regs->data[priv->sreg_proto_min]); range.max_proto.all = (__force __be16) nft_reg_load16(&regs->data[priv->sreg_proto_max]); } switch (nft_pf(pkt)) { case NFPROTO_IPV4: regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range, nft_hook(pkt)); break; #ifdef CONFIG_NF_TABLES_IPV6 case NFPROTO_IPV6: regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); break; #endif default: WARN_ON_ONCE(1); break; } } static void nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nf_ct_netns_put(ctx->net, NFPROTO_IPV4); } static struct nft_expr_type nft_redir_ipv4_type; static const struct nft_expr_ops nft_redir_ipv4_ops = { .type = &nft_redir_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { .family = NFPROTO_IPV4, .name = "redir", .ops = &nft_redir_ipv4_ops, .policy = nft_redir_policy, .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; #ifdef CONFIG_NF_TABLES_IPV6 static void nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nf_ct_netns_put(ctx->net, NFPROTO_IPV6); } static struct nft_expr_type nft_redir_ipv6_type; static const struct nft_expr_ops nft_redir_ipv6_ops = { .type = &nft_redir_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { .family = NFPROTO_IPV6, .name = "redir", .ops = &nft_redir_ipv6_ops, .policy = nft_redir_policy, .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; #endif #ifdef CONFIG_NF_TABLES_INET static void nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nf_ct_netns_put(ctx->net, NFPROTO_INET); } static struct nft_expr_type nft_redir_inet_type; static const struct nft_expr_ops nft_redir_inet_ops = { .type = &nft_redir_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_inet_type __read_mostly = { .family = NFPROTO_INET, .name = "redir", .ops = &nft_redir_inet_ops, .policy = nft_redir_policy, .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; static int __init nft_redir_module_init_inet(void) { return nft_register_expr(&nft_redir_inet_type); } #else static inline int nft_redir_module_init_inet(void) { return 0; } #endif static int __init nft_redir_module_init(void) { int ret = nft_register_expr(&nft_redir_ipv4_type); if (ret) return ret; #ifdef CONFIG_NF_TABLES_IPV6 ret = nft_register_expr(&nft_redir_ipv6_type); if (ret) { nft_unregister_expr(&nft_redir_ipv4_type); return ret; } #endif ret = nft_redir_module_init_inet(); if (ret < 0) { nft_unregister_expr(&nft_redir_ipv4_type); #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_expr(&nft_redir_ipv6_type); #endif return ret; } return ret; } static void __exit nft_redir_module_exit(void) { nft_unregister_expr(&nft_redir_ipv4_type); #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_expr(&nft_redir_ipv6_type); #endif #ifdef CONFIG_NF_TABLES_INET nft_unregister_expr(&nft_redir_inet_type); #endif } module_init(nft_redir_module_init); module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_EXPR("redir"); MODULE_DESCRIPTION("Netfilter nftables redirect support");
3 1 1 1 1 3 44 39 11 43 1 44 52 1 1 1 31 43 1 52 48 17 1 1 48 16 3 12 12 3 10 2 4 4 3 69 69 68 9 37 37 36 37 37 36 36 36 36 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer Memory Manager * Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl> * Jaroslav Kysela <perex@perex.cz> * 2000 by Takashi Iwai <tiwai@suse.de> */ #include <linux/init.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/mm.h> #include <sound/core.h> #include <sound/seq_kernel.h> #include "seq_memory.h" #include "seq_queue.h" #include "seq_info.h" #include "seq_lock.h" static inline int snd_seq_pool_available(struct snd_seq_pool *pool) { return pool->total_elements - atomic_read(&pool->counter); } static inline int snd_seq_output_ok(struct snd_seq_pool *pool) { return snd_seq_pool_available(pool) >= pool->room; } /* * Variable length event: * The event like sysex uses variable length type. * The external data may be stored in three different formats. * 1) kernel space * This is the normal case. * ext.data.len = length * ext.data.ptr = buffer pointer * 2) user space * When an event is generated via read(), the external data is * kept in user space until expanded. * ext.data.len = length | SNDRV_SEQ_EXT_USRPTR * ext.data.ptr = userspace pointer * 3) chained cells * When the variable length event is enqueued (in prioq or fifo), * the external data is decomposed to several cells. * ext.data.len = length | SNDRV_SEQ_EXT_CHAINED * ext.data.ptr = the additiona cell head * -> cell.next -> cell.next -> .. */ /* * exported: * call dump function to expand external data. */ static int get_var_len(const struct snd_seq_event *event) { if ((event->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) return -EINVAL; return event->data.ext.len & ~SNDRV_SEQ_EXT_MASK; } static int dump_var_event(const struct snd_seq_event *event, snd_seq_dump_func_t func, void *private_data, int offset, int maxlen) { int len, err; struct snd_seq_event_cell *cell; len = get_var_len(event); if (len <= 0) return len; if (len <= offset) return 0; if (maxlen && len > offset + maxlen) len = offset + maxlen; if (event->data.ext.len & SNDRV_SEQ_EXT_USRPTR) { char buf[32]; char __user *curptr = (char __force __user *)event->data.ext.ptr; curptr += offset; len -= offset; while (len > 0) { int size = sizeof(buf); if (len < size) size = len; if (copy_from_user(buf, curptr, size)) return -EFAULT; err = func(private_data, buf, size); if (err < 0) return err; curptr += size; len -= size; } return 0; } if (!(event->data.ext.len & SNDRV_SEQ_EXT_CHAINED)) return func(private_data, event->data.ext.ptr + offset, len - offset); cell = (struct snd_seq_event_cell *)event->data.ext.ptr; for (; len > 0 && cell; cell = cell->next) { int size = sizeof(struct snd_seq_event); char *curptr = (char *)&cell->event; if (offset >= size) { offset -= size; len -= size; continue; } if (len < size) size = len; err = func(private_data, curptr + offset, size - offset); if (err < 0) return err; offset = 0; len -= size; } return 0; } int snd_seq_dump_var_event(const struct snd_seq_event *event, snd_seq_dump_func_t func, void *private_data) { return dump_var_event(event, func, private_data, 0, 0); } EXPORT_SYMBOL(snd_seq_dump_var_event); /* * exported: * expand the variable length event to linear buffer space. */ static int seq_copy_in_kernel(void *ptr, void *src, int size) { char **bufptr = ptr; memcpy(*bufptr, src, size); *bufptr += size; return 0; } static int seq_copy_in_user(void *ptr, void *src, int size) { char __user **bufptr = ptr; if (copy_to_user(*bufptr, src, size)) return -EFAULT; *bufptr += size; return 0; } static int expand_var_event(const struct snd_seq_event *event, int offset, int size, char *buf, bool in_kernel) { if (event->data.ext.len & SNDRV_SEQ_EXT_USRPTR) { if (! in_kernel) return -EINVAL; if (copy_from_user(buf, (char __force __user *)event->data.ext.ptr + offset, size)) return -EFAULT; return 0; } return dump_var_event(event, in_kernel ? seq_copy_in_kernel : seq_copy_in_user, &buf, offset, size); } int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char *buf, int in_kernel, int size_aligned) { int len, newlen, err; len = get_var_len(event); if (len < 0) return len; newlen = len; if (size_aligned > 0) newlen = roundup(len, size_aligned); if (count < newlen) return -EAGAIN; err = expand_var_event(event, 0, len, buf, in_kernel); if (err < 0) return err; if (len != newlen) { if (in_kernel) memset(buf + len, 0, newlen - len); else if (clear_user((__force void __user *)buf + len, newlen - len)) return -EFAULT; } return newlen; } EXPORT_SYMBOL(snd_seq_expand_var_event); int snd_seq_expand_var_event_at(const struct snd_seq_event *event, int count, char *buf, int offset) { int len, err; len = get_var_len(event); if (len < 0) return len; if (len <= offset) return 0; len -= offset; if (len > count) len = count; err = expand_var_event(event, offset, count, buf, true); if (err < 0) return err; return len; } EXPORT_SYMBOL_GPL(snd_seq_expand_var_event_at); /* * release this cell, free extended data if available */ static inline void free_cell(struct snd_seq_pool *pool, struct snd_seq_event_cell *cell) { cell->next = pool->free; pool->free = cell; atomic_dec(&pool->counter); } void snd_seq_cell_free(struct snd_seq_event_cell * cell) { struct snd_seq_pool *pool; if (snd_BUG_ON(!cell)) return; pool = cell->pool; if (snd_BUG_ON(!pool)) return; guard(spinlock_irqsave)(&pool->lock); free_cell(pool, cell); if (snd_seq_ev_is_variable(&cell->event)) { if (cell->event.data.ext.len & SNDRV_SEQ_EXT_CHAINED) { struct snd_seq_event_cell *curp, *nextptr; curp = cell->event.data.ext.ptr; for (; curp; curp = nextptr) { nextptr = curp->next; curp->next = pool->free; free_cell(pool, curp); } } } if (waitqueue_active(&pool->output_sleep)) { /* has enough space now? */ if (snd_seq_output_ok(pool)) wake_up(&pool->output_sleep); } } /* * allocate an event cell. */ static int snd_seq_cell_alloc(struct snd_seq_pool *pool, struct snd_seq_event_cell **cellp, int nonblock, struct file *file, struct mutex *mutexp) { struct snd_seq_event_cell *cell; unsigned long flags; int err = -EAGAIN; wait_queue_entry_t wait; if (pool == NULL) return -EINVAL; *cellp = NULL; init_waitqueue_entry(&wait, current); spin_lock_irqsave(&pool->lock, flags); if (pool->ptr == NULL) { /* not initialized */ pr_debug("ALSA: seq: pool is not initialized\n"); err = -EINVAL; goto __error; } while (pool->free == NULL && ! nonblock && ! pool->closing) { set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&pool->output_sleep, &wait); spin_unlock_irqrestore(&pool->lock, flags); if (mutexp) mutex_unlock(mutexp); schedule(); if (mutexp) mutex_lock(mutexp); spin_lock_irqsave(&pool->lock, flags); remove_wait_queue(&pool->output_sleep, &wait); /* interrupted? */ if (signal_pending(current)) { err = -ERESTARTSYS; goto __error; } } if (pool->closing) { /* closing.. */ err = -ENOMEM; goto __error; } cell = pool->free; if (cell) { int used; pool->free = cell->next; atomic_inc(&pool->counter); used = atomic_read(&pool->counter); if (pool->max_used < used) pool->max_used = used; pool->event_alloc_success++; /* clear cell pointers */ cell->next = NULL; err = 0; } else pool->event_alloc_failures++; *cellp = cell; __error: spin_unlock_irqrestore(&pool->lock, flags); return err; } /* * duplicate the event to a cell. * if the event has external data, the data is decomposed to additional * cells. */ int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event, struct snd_seq_event_cell **cellp, int nonblock, struct file *file, struct mutex *mutexp) { int ncells, err; unsigned int extlen; struct snd_seq_event_cell *cell; int size; *cellp = NULL; ncells = 0; extlen = 0; if (snd_seq_ev_is_variable(event)) { extlen = event->data.ext.len & ~SNDRV_SEQ_EXT_MASK; ncells = DIV_ROUND_UP(extlen, sizeof(struct snd_seq_event)); } if (ncells >= pool->total_elements) return -ENOMEM; err = snd_seq_cell_alloc(pool, &cell, nonblock, file, mutexp); if (err < 0) return err; /* copy the event */ size = snd_seq_event_packet_size(event); memcpy(&cell->ump, event, size); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) if (size < sizeof(cell->event)) cell->ump.raw.extra = 0; #endif /* decompose */ if (snd_seq_ev_is_variable(event)) { int len = extlen; int is_chained = event->data.ext.len & SNDRV_SEQ_EXT_CHAINED; int is_usrptr = event->data.ext.len & SNDRV_SEQ_EXT_USRPTR; struct snd_seq_event_cell *src, *tmp, *tail; char *buf; cell->event.data.ext.len = extlen | SNDRV_SEQ_EXT_CHAINED; cell->event.data.ext.ptr = NULL; src = (struct snd_seq_event_cell *)event->data.ext.ptr; buf = (char *)event->data.ext.ptr; tail = NULL; while (ncells-- > 0) { size = sizeof(struct snd_seq_event); if (len < size) size = len; err = snd_seq_cell_alloc(pool, &tmp, nonblock, file, mutexp); if (err < 0) goto __error; if (cell->event.data.ext.ptr == NULL) cell->event.data.ext.ptr = tmp; if (tail) tail->next = tmp; tail = tmp; /* copy chunk */ if (is_chained && src) { tmp->event = src->event; src = src->next; } else if (is_usrptr) { if (copy_from_user(&tmp->event, (char __force __user *)buf, size)) { err = -EFAULT; goto __error; } } else { memcpy(&tmp->event, buf, size); } buf += size; len -= size; } } *cellp = cell; return 0; __error: snd_seq_cell_free(cell); return err; } /* poll wait */ int snd_seq_pool_poll_wait(struct snd_seq_pool *pool, struct file *file, poll_table *wait) { poll_wait(file, &pool->output_sleep, wait); guard(spinlock_irq)(&pool->lock); return snd_seq_output_ok(pool); } /* allocate room specified number of events */ int snd_seq_pool_init(struct snd_seq_pool *pool) { int cell; struct snd_seq_event_cell *cellptr; if (snd_BUG_ON(!pool)) return -EINVAL; cellptr = kvmalloc_array(pool->size, sizeof(struct snd_seq_event_cell), GFP_KERNEL); if (!cellptr) return -ENOMEM; /* add new cells to the free cell list */ guard(spinlock_irq)(&pool->lock); if (pool->ptr) { kvfree(cellptr); return 0; } pool->ptr = cellptr; pool->free = NULL; for (cell = 0; cell < pool->size; cell++) { cellptr = pool->ptr + cell; cellptr->pool = pool; cellptr->next = pool->free; pool->free = cellptr; } pool->room = (pool->size + 1) / 2; /* init statistics */ pool->max_used = 0; pool->total_elements = pool->size; return 0; } /* refuse the further insertion to the pool */ void snd_seq_pool_mark_closing(struct snd_seq_pool *pool) { if (snd_BUG_ON(!pool)) return; guard(spinlock_irqsave)(&pool->lock); pool->closing = 1; } /* remove events */ int snd_seq_pool_done(struct snd_seq_pool *pool) { struct snd_seq_event_cell *ptr; if (snd_BUG_ON(!pool)) return -EINVAL; /* wait for closing all threads */ if (waitqueue_active(&pool->output_sleep)) wake_up(&pool->output_sleep); while (atomic_read(&pool->counter) > 0) schedule_timeout_uninterruptible(1); /* release all resources */ scoped_guard(spinlock_irq, &pool->lock) { ptr = pool->ptr; pool->ptr = NULL; pool->free = NULL; pool->total_elements = 0; } kvfree(ptr); guard(spinlock_irq)(&pool->lock); pool->closing = 0; return 0; } /* init new memory pool */ struct snd_seq_pool *snd_seq_pool_new(int poolsize) { struct snd_seq_pool *pool; /* create pool block */ pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return NULL; spin_lock_init(&pool->lock); pool->ptr = NULL; pool->free = NULL; pool->total_elements = 0; atomic_set(&pool->counter, 0); pool->closing = 0; init_waitqueue_head(&pool->output_sleep); pool->size = poolsize; /* init statistics */ pool->max_used = 0; return pool; } /* remove memory pool */ int snd_seq_pool_delete(struct snd_seq_pool **ppool) { struct snd_seq_pool *pool = *ppool; *ppool = NULL; if (pool == NULL) return 0; snd_seq_pool_mark_closing(pool); snd_seq_pool_done(pool); kfree(pool); return 0; } /* exported to seq_clientmgr.c */ void snd_seq_info_pool(struct snd_info_buffer *buffer, struct snd_seq_pool *pool, char *space) { if (pool == NULL) return; snd_iprintf(buffer, "%sPool size : %d\n", space, pool->total_elements); snd_iprintf(buffer, "%sCells in use : %d\n", space, atomic_read(&pool->counter)); snd_iprintf(buffer, "%sPeak cells in use : %d\n", space, pool->max_used); snd_iprintf(buffer, "%sAlloc success : %d\n", space, pool->event_alloc_success); snd_iprintf(buffer, "%sAlloc failures : %d\n", space, pool->event_alloc_failures); }
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_VXLAN_H #define __NET_VXLAN_H 1 #include <linux/if_vlan.h> #include <linux/rhashtable-types.h> #include <net/udp_tunnel.h> #include <net/dst_metadata.h> #include <net/rtnetlink.h> #include <net/switchdev.h> #include <net/nexthop.h> #define IANA_VXLAN_UDP_PORT 4789 #define IANA_VXLAN_GPE_UDP_PORT 4790 /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R| Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * I = VXLAN Network Identifier (VNI) present. */ struct vxlanhdr { __be32 vx_flags; __be32 vx_vni; }; /* VXLAN header flags. */ #define VXLAN_HF_VNI cpu_to_be32(BIT(27)) #define VXLAN_N_VID (1u << 24) #define VXLAN_VID_MASK (VXLAN_N_VID - 1) #define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) #define FDB_HASH_BITS 8 #define FDB_HASH_SIZE (1<<FDB_HASH_BITS) /* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R|R|R|C| Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) |O| Csum start | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * C = Remote checksum offload bit. When set indicates that the * remote checksum offload data is present. * * O = Offset bit. Indicates the checksum offset relative to * checksum start. * * Csum start = Checksum start divided by two. * * http://tools.ietf.org/html/draft-herbert-vxlan-rco */ /* VXLAN-RCO header flags. */ #define VXLAN_HF_RCO cpu_to_be32(BIT(21)) /* Remote checksum offload header option */ #define VXLAN_RCO_MASK cpu_to_be32(0x7f) /* Last byte of vni field */ #define VXLAN_RCO_UDP cpu_to_be32(0x80) /* Indicate UDP RCO (TCP when not set *) */ #define VXLAN_RCO_SHIFT 1 /* Left shift of start */ #define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) #define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT) /* * VXLAN Group Based Policy Extension (VXLAN_F_GBP): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * G = Group Policy ID present. * * D = Don't Learn bit. When set, this bit indicates that the egress * VTEP MUST NOT learn the source address of the encapsulated frame. * * A = Indicates that the group policy has already been applied to * this packet. Policies MUST NOT be applied by devices when the * A bit is set. * * https://tools.ietf.org/html/draft-smith-vxlan-group-policy */ struct vxlanhdr_gbp { u8 vx_flags; #ifdef __LITTLE_ENDIAN_BITFIELD u8 reserved_flags1:3, policy_applied:1, reserved_flags2:2, dont_learn:1, reserved_flags3:1; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags1:1, dont_learn:1, reserved_flags2:2, policy_applied:1, reserved_flags3:3; #else #error "Please fix <asm/byteorder.h>" #endif __be16 policy_id; __be32 vx_vni; }; /* VXLAN-GBP header flags. */ #define VXLAN_HF_GBP cpu_to_be32(BIT(31)) #define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF)) /* skb->mark mapping * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ #define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) #define VXLAN_GBP_MASK (VXLAN_GBP_DONT_LEARN | VXLAN_GBP_POLICY_APPLIED | \ VXLAN_GBP_ID_MASK) /* * VXLAN Generic Protocol Extension (VXLAN_F_GPE): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | VXLAN Network Identifier (VNI) | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Ver = Version. Indicates VXLAN GPE protocol version. * * P = Next Protocol Bit. The P bit is set to indicate that the * Next Protocol field is present. * * O = OAM Flag Bit. The O bit is set to indicate that the packet * is an OAM packet. * * Next Protocol = This 8 bit field indicates the protocol header * immediately following the VXLAN GPE header. * * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 */ struct vxlanhdr_gpe { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 oam_flag:1, reserved_flags1:1, np_applied:1, instance_applied:1, version:2, reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, instance_applied:1, np_applied:1, reserved_flags1:1, oam_flag:1; #endif u8 reserved_flags3; u8 reserved_flags4; u8 next_protocol; __be32 vx_vni; }; /* VXLAN-GPE header flags. */ #define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28)) #define VXLAN_HF_NP cpu_to_be32(BIT(26)) #define VXLAN_HF_OAM cpu_to_be32(BIT(24)) #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ cpu_to_be32(0xff)) struct vxlan_metadata { u32 gbp; }; /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; struct socket *sock; struct hlist_head vni_list[VNI_HASH_SIZE]; refcount_t refcnt; u32 flags; }; union vxlan_addr { struct sockaddr_in sin; struct sockaddr_in6 sin6; struct sockaddr sa; }; struct vxlan_rdst { union vxlan_addr remote_ip; __be16 remote_port; u8 offloaded:1; __be32 remote_vni; u32 remote_ifindex; struct net_device *remote_dev; struct list_head list; struct rcu_head rcu; struct dst_cache dst_cache; }; struct vxlan_config { union vxlan_addr remote_ip; union vxlan_addr saddr; __be32 vni; int remote_ifindex; int mtu; __be16 dst_port; u16 port_min; u16 port_max; u8 tos; u8 ttl; __be32 label; enum ifla_vxlan_label_policy label_policy; u32 flags; unsigned long age_interval; unsigned int addrmax; bool no_share; enum ifla_vxlan_df df; struct vxlanhdr reserved_bits; }; enum { VXLAN_VNI_STATS_RX, VXLAN_VNI_STATS_RX_DROPS, VXLAN_VNI_STATS_RX_ERRORS, VXLAN_VNI_STATS_TX, VXLAN_VNI_STATS_TX_DROPS, VXLAN_VNI_STATS_TX_ERRORS, }; struct vxlan_vni_stats { u64 rx_packets; u64 rx_bytes; u64 rx_drops; u64 rx_errors; u64 tx_packets; u64 tx_bytes; u64 tx_drops; u64 tx_errors; }; struct vxlan_vni_stats_pcpu { struct vxlan_vni_stats stats; struct u64_stats_sync syncp; }; struct vxlan_dev_node { struct hlist_node hlist; struct vxlan_dev *vxlan; }; struct vxlan_vni_node { struct rhash_head vnode; struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ #if IS_ENABLED(CONFIG_IPV6) struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ #endif struct list_head vlist; __be32 vni; union vxlan_addr remote_ip; /* default remote ip for this vni */ struct vxlan_vni_stats_pcpu __percpu *stats; struct rcu_head rcu; }; struct vxlan_vni_group { struct rhashtable vni_hash; struct list_head vni_list; u32 num_vnis; }; /* Pseudo network device */ struct vxlan_dev { struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ #if IS_ENABLED(CONFIG_IPV6) struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ #endif struct list_head next; /* vxlan's per namespace list */ struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */ #endif struct net_device *dev; struct net *net; /* netns for packet i/o */ struct vxlan_rdst default_dst; /* default destination */ struct timer_list age_timer; spinlock_t hash_lock; unsigned int addrcnt; struct gro_cells gro_cells; struct vxlan_config cfg; struct vxlan_vni_group __rcu *vnigrp; struct rhashtable fdb_hash_tbl; struct rhashtable mdb_tbl; struct hlist_head fdb_list; struct hlist_head mdb_list; unsigned int mdb_seq; }; #define VXLAN_F_LEARN 0x01 #define VXLAN_F_PROXY 0x02 #define VXLAN_F_RSC 0x04 #define VXLAN_F_L2MISS 0x08 #define VXLAN_F_L3MISS 0x10 #define VXLAN_F_IPV6 0x20 #define VXLAN_F_UDP_ZERO_CSUM_TX 0x40 #define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80 #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 #define VXLAN_F_REMCSUM_TX 0x200 #define VXLAN_F_REMCSUM_RX 0x400 #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 #define VXLAN_F_TTL_INHERIT 0x10000 #define VXLAN_F_VNIFILTER 0x20000 #define VXLAN_F_MDB 0x40000 #define VXLAN_F_LOCALBYPASS 0x80000 #define VXLAN_F_MC_ROUTE 0x100000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable */ #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ VXLAN_F_GPE | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ VXLAN_F_COLLECT_METADATA | \ VXLAN_F_VNIFILTER) /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ VXLAN_F_IPV6 | \ VXLAN_F_IPV6_LINKLOCAL | \ VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_COLLECT_METADATA | \ VXLAN_F_VNIFILTER | \ VXLAN_F_LOCALBYPASS | \ VXLAN_F_MC_ROUTE | \ 0) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) { u8 l4_hdr = 0; if (!skb->encapsulation) return features; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): l4_hdr = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: return features; } if ((l4_hdr == IPPROTO_UDP) && (skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)) || (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto)))) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } static inline int vxlan_headroom(u32 flags) { /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */ /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */ return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr)) + sizeof(struct udphdr) + sizeof(struct vxlanhdr) + (flags & VXLAN_F_GPE ? 0 : ETH_HLEN); } static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) { return (struct vxlanhdr *)(udp_hdr(skb) + 1); } static inline __be32 vxlan_vni(__be32 vni_field) { #if defined(__BIG_ENDIAN) return (__force __be32)((__force u32)vni_field >> 8); #else return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8); #endif } static inline __be32 vxlan_vni_field(__be32 vni) { #if defined(__BIG_ENDIAN) return (__force __be32)((__force u32)vni << 8); #else return (__force __be32)((__force u32)vni >> 8); #endif } static inline size_t vxlan_rco_start(__be32 vni_field) { return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; } static inline size_t vxlan_rco_offset(__be32 vni_field) { return (vni_field & VXLAN_RCO_UDP) ? offsetof(struct udphdr, check) : offsetof(struct tcphdr, check); } static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) { __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT); if (offset == offsetof(struct udphdr, check)) vni_field |= VXLAN_RCO_UDP; return vni_field; } static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { return vs->sock->sk->sk_family; } #if IS_ENABLED(CONFIG_IPV6) static inline bool vxlan_addr_any(const union vxlan_addr *ipa) { if (ipa->sa.sa_family == AF_INET6) return ipv6_addr_any(&ipa->sin6.sin6_addr); else return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); } static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) { if (ipa->sa.sa_family == AF_INET6) return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); else return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #else /* !IS_ENABLED(CONFIG_IPV6) */ static inline bool vxlan_addr_any(const union vxlan_addr *ipa) { return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); } static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) { return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #endif /* IS_ENABLED(CONFIG_IPV6) */ static inline bool netif_is_vxlan(const struct net_device *dev) { return dev->rtnl_link_ops && !strcmp(dev->rtnl_link_ops->kind, "vxlan"); } struct switchdev_notifier_vxlan_fdb_info { struct switchdev_notifier_info info; /* must be first */ union vxlan_addr remote_ip; __be16 remote_port; __be32 remote_vni; u32 remote_ifindex; u8 eth_addr[ETH_ALEN]; __be32 vni; bool offloaded; bool added_by_user; }; #if IS_ENABLED(CONFIG_VXLAN) int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, struct switchdev_notifier_vxlan_fdb_info *fdb_info); int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, struct notifier_block *nb, struct netlink_ext_ack *extack); void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni); #else static inline int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, struct switchdev_notifier_vxlan_fdb_info *fdb_info) { return -ENOENT; } static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, struct notifier_block *nb, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni) { } #endif static inline void vxlan_flag_attr_error(int attrtype, struct netlink_ext_ack *extack) { #define VXLAN_FLAG(flg) \ case IFLA_VXLAN_##flg: \ NL_SET_ERR_MSG_MOD(extack, \ "cannot change " #flg " flag"); \ break switch (attrtype) { VXLAN_FLAG(TTL_INHERIT); VXLAN_FLAG(LEARNING); VXLAN_FLAG(PROXY); VXLAN_FLAG(RSC); VXLAN_FLAG(L2MISS); VXLAN_FLAG(L3MISS); VXLAN_FLAG(COLLECT_METADATA); VXLAN_FLAG(UDP_ZERO_CSUM6_TX); VXLAN_FLAG(UDP_ZERO_CSUM6_RX); VXLAN_FLAG(REMCSUM_TX); VXLAN_FLAG(REMCSUM_RX); VXLAN_FLAG(GBP); VXLAN_FLAG(GPE); VXLAN_FLAG(REMCSUM_NOPARTIAL); default: NL_SET_ERR_MSG_MOD(extack, \ "cannot change flag"); break; } #undef VXLAN_FLAG } static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh, u32 hash, struct vxlan_rdst *rdst) { struct fib_nh_common *nhc; nhc = nexthop_path_fdb_result(nh, hash >> 1); if (unlikely(!nhc)) return false; switch (nhc->nhc_gw_family) { case AF_INET: rdst->remote_ip.sin.sin_addr.s_addr = nhc->nhc_gw.ipv4; rdst->remote_ip.sa.sa_family = AF_INET; break; case AF_INET6: rdst->remote_ip.sin6.sin6_addr = nhc->nhc_gw.ipv6; rdst->remote_ip.sa.sa_family = AF_INET6; break; } return true; } static inline void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, const struct vxlan_metadata *md) { struct vxlanhdr_gbp *gbp; if (!md->gbp) return; gbp = (struct vxlanhdr_gbp *)vxh; vxh->vx_flags |= VXLAN_HF_GBP; if (md->gbp & VXLAN_GBP_DONT_LEARN) gbp->dont_learn = 1; if (md->gbp & VXLAN_GBP_POLICY_APPLIED) gbp->policy_applied = 1; gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Stubs for the Network PHY library */ #include <linux/rtnetlink.h> struct ethtool_eth_phy_stats; struct ethtool_link_ext_stats; struct ethtool_phy_stats; struct kernel_hwtstamp_config; struct netlink_ext_ack; struct phy_device; #if IS_ENABLED(CONFIG_PHYLIB) extern const struct phylib_stubs *phylib_stubs; struct phylib_stubs { int (*hwtstamp_get)(struct phy_device *phydev, struct kernel_hwtstamp_config *config); int (*hwtstamp_set)(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); void (*get_phy_stats)(struct phy_device *phydev, struct ethtool_eth_phy_stats *phy_stats, struct ethtool_phy_stats *phydev_stats); void (*get_link_ext_stats)(struct phy_device *phydev, struct ethtool_link_ext_stats *link_stats); }; static inline int phy_hwtstamp_get(struct phy_device *phydev, struct kernel_hwtstamp_config *config) { /* phylib_register_stubs() and phylib_unregister_stubs() * also run under rtnl_lock(). */ ASSERT_RTNL(); if (!phylib_stubs) return -EOPNOTSUPP; return phylib_stubs->hwtstamp_get(phydev, config); } static inline int phy_hwtstamp_set(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { /* phylib_register_stubs() and phylib_unregister_stubs() * also run under rtnl_lock(). */ ASSERT_RTNL(); if (!phylib_stubs) return -EOPNOTSUPP; return phylib_stubs->hwtstamp_set(phydev, config, extack); } static inline void phy_ethtool_get_phy_stats(struct phy_device *phydev, struct ethtool_eth_phy_stats *phy_stats, struct ethtool_phy_stats *phydev_stats) { ASSERT_RTNL(); if (!phylib_stubs) return; phylib_stubs->get_phy_stats(phydev, phy_stats, phydev_stats); } static inline void phy_ethtool_get_link_ext_stats(struct phy_device *phydev, struct ethtool_link_ext_stats *link_stats) { ASSERT_RTNL(); if (!phylib_stubs) return; phylib_stubs->get_link_ext_stats(phydev, link_stats); } #else static inline int phy_hwtstamp_get(struct phy_device *phydev, struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } static inline int phy_hwtstamp_set(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline void phy_ethtool_get_phy_stats(struct phy_device *phydev, struct ethtool_eth_phy_stats *phy_stats, struct ethtool_phy_stats *phydev_stats) { } static inline void phy_ethtool_get_link_ext_stats(struct phy_device *phydev, struct ethtool_link_ext_stats *link_stats) { } #endif
4 1 3 11 11 3 5 3 1 1 1 1 2 2 2 2 2 1 8 1 2 1 1 3 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 // SPDX-License-Identifier: GPL-2.0-only #include <linux/slab.h> #include <linux/stat.h> #include <linux/sched/xacct.h> #include <linux/fcntl.h> #include <linux/file.h> #include <linux/uio.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/export.h> #include <linux/syscalls.h> #include <linux/pagemap.h> #include <linux/splice.h> #include <linux/compat.h> #include <linux/mount.h> #include <linux/fs.h> #include <linux/dax.h> #include <linux/overflow.h> #include "internal.h" #include <linux/uaccess.h> #include <asm/unistd.h> /* * Performs necessary checks before doing a clone. * * Can adjust amount of bytes to clone via @req_count argument. * Returns appropriate error code that caller should return or * zero in case the clone should be allowed. */ static int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *req_count, unsigned int remap_flags) { struct inode *inode_in = file_in->f_mapping->host; struct inode *inode_out = file_out->f_mapping->host; uint64_t count = *req_count; uint64_t bcount; loff_t size_in, size_out; loff_t bs = inode_out->i_sb->s_blocksize; int ret; /* The start of both ranges must be aligned to an fs block. */ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs)) return -EINVAL; /* Ensure offsets don't wrap. */ if (pos_in + count < pos_in || pos_out + count < pos_out) return -EINVAL; size_in = i_size_read(inode_in); size_out = i_size_read(inode_out); /* Dedupe requires both ranges to be within EOF. */ if ((remap_flags & REMAP_FILE_DEDUP) && (pos_in >= size_in || pos_in + count > size_in || pos_out >= size_out || pos_out + count > size_out)) return -EINVAL; /* Ensure the infile range is within the infile. */ if (pos_in >= size_in) return -EINVAL; count = min(count, size_in - (uint64_t)pos_in); ret = generic_write_check_limits(file_out, pos_out, &count); if (ret) return ret; /* * If the user wanted us to link to the infile's EOF, round up to the * next block boundary for this check. * * Otherwise, make sure the count is also block-aligned, having * already confirmed the starting offsets' block alignment. */ if (pos_in + count == size_in && (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { bcount = ALIGN(size_in, bs) - pos_in; } else { if (!IS_ALIGNED(count, bs)) count = ALIGN_DOWN(count, bs); bcount = count; } /* Don't allow overlapped cloning within the same file. */ if (inode_in == inode_out && pos_out + bcount > pos_in && pos_out < pos_in + bcount) return -EINVAL; /* * We shortened the request but the caller can't deal with that, so * bounce the request back to userspace. */ if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) return -EINVAL; *req_count = count; return 0; } int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write) { int mask = write ? MAY_WRITE : MAY_READ; loff_t tmp; int ret; if (unlikely(pos < 0 || len < 0)) return -EINVAL; if (unlikely(check_add_overflow(pos, len, &tmp))) return -EINVAL; ret = security_file_permission(file, mask); if (ret) return ret; return fsnotify_file_area_perm(file, mask, &pos, len); } EXPORT_SYMBOL_GPL(remap_verify_area); /* * Ensure that we don't remap a partial EOF block in the middle of something * else. Assume that the offsets have already been checked for block * alignment. * * For clone we only link a partial EOF block above or at the destination file's * EOF. For deduplication we accept a partial EOF block only if it ends at the * destination file's EOF (can not link it into the middle of a file). * * Shorten the request if possible. */ static int generic_remap_check_len(struct inode *inode_in, struct inode *inode_out, loff_t pos_out, loff_t *len, unsigned int remap_flags) { u64 blkmask = i_blocksize(inode_in) - 1; loff_t new_len = *len; if ((*len & blkmask) == 0) return 0; if (pos_out + *len < i_size_read(inode_out)) new_len &= ~blkmask; if (new_len == *len) return 0; if (remap_flags & REMAP_FILE_CAN_SHORTEN) { *len = new_len; return 0; } return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; } /* Read a page's worth of file data into the page cache. */ static struct folio *vfs_dedupe_get_folio(struct file *file, loff_t pos) { return read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file); } /* * Lock two folios, ensuring that we lock in offset order if the folios * are from the same file. */ static void vfs_lock_two_folios(struct folio *folio1, struct folio *folio2) { /* Always lock in order of increasing index. */ if (folio1->index > folio2->index) swap(folio1, folio2); folio_lock(folio1); if (folio1 != folio2) folio_lock(folio2); } /* Unlock two folios, being careful not to unlock the same folio twice. */ static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2) { folio_unlock(folio1); if (folio1 != folio2) folio_unlock(folio2); } /* * Compare extents of two files to see if they are the same. * Caller must have locked both inodes to prevent write races. */ static int vfs_dedupe_file_range_compare(struct file *src, loff_t srcoff, struct file *dest, loff_t dstoff, loff_t len, bool *is_same) { bool same = true; int error = -EINVAL; while (len) { struct folio *src_folio, *dst_folio; void *src_addr, *dst_addr; loff_t cmp_len = min(PAGE_SIZE - offset_in_page(srcoff), PAGE_SIZE - offset_in_page(dstoff)); cmp_len = min(cmp_len, len); if (cmp_len <= 0) goto out_error; src_folio = vfs_dedupe_get_folio(src, srcoff); if (IS_ERR(src_folio)) { error = PTR_ERR(src_folio); goto out_error; } dst_folio = vfs_dedupe_get_folio(dest, dstoff); if (IS_ERR(dst_folio)) { error = PTR_ERR(dst_folio); folio_put(src_folio); goto out_error; } vfs_lock_two_folios(src_folio, dst_folio); /* * Now that we've locked both folios, make sure they're still * mapped to the file data we're interested in. If not, * someone is invalidating pages on us and we lose. */ if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) || src_folio->mapping != src->f_mapping || dst_folio->mapping != dest->f_mapping) { same = false; goto unlock; } src_addr = kmap_local_folio(src_folio, offset_in_folio(src_folio, srcoff)); dst_addr = kmap_local_folio(dst_folio, offset_in_folio(dst_folio, dstoff)); flush_dcache_folio(src_folio); flush_dcache_folio(dst_folio); if (memcmp(src_addr, dst_addr, cmp_len)) same = false; kunmap_local(dst_addr); kunmap_local(src_addr); unlock: vfs_unlock_two_folios(src_folio, dst_folio); folio_put(dst_folio); folio_put(src_folio); if (!same) break; srcoff += cmp_len; dstoff += cmp_len; len -= cmp_len; } *is_same = same; return 0; out_error: return error; } /* * Check that the two inodes are eligible for cloning, the ranges make * sense, and then flush all dirty data. Caller must ensure that the * inodes have been locked against any other modifications. * * If there's an error, then the usual negative error code is returned. * Otherwise returns 0 with *len set to the request length. */ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, const struct iomap_ops *dax_read_ops) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); bool same_inode = (inode_in == inode_out); int ret; /* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode_out)) return -EPERM; if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) return -ETXTBSY; /* Don't reflink dirs, pipes, sockets... */ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) return -EINVAL; /* Zero length dedupe exits immediately; reflink goes to EOF. */ if (*len == 0) { loff_t isize = i_size_read(inode_in); if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) return 0; if (pos_in > isize) return -EINVAL; *len = isize - pos_in; if (*len == 0) return 0; } /* Check that we don't violate system file offset limits. */ ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, remap_flags); if (ret || *len == 0) return ret; /* Wait for the completion of any pending IOs on both files */ inode_dio_wait(inode_in); if (!same_inode) inode_dio_wait(inode_out); ret = filemap_write_and_wait_range(inode_in->i_mapping, pos_in, pos_in + *len - 1); if (ret) return ret; ret = filemap_write_and_wait_range(inode_out->i_mapping, pos_out, pos_out + *len - 1); if (ret) return ret; /* * Check that the extents are the same. */ if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; if (!IS_DAX(inode_in)) ret = vfs_dedupe_file_range_compare(file_in, pos_in, file_out, pos_out, *len, &is_same); else if (dax_read_ops) ret = dax_dedupe_file_range_compare(inode_in, pos_in, inode_out, pos_out, *len, &is_same, dax_read_ops); else return -EINVAL; if (ret) return ret; if (!is_same) return -EBADE; } ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, remap_flags); if (ret || *len == 0) return ret; /* If can't alter the file contents, we're done. */ if (!(remap_flags & REMAP_FILE_DEDUP)) ret = file_modified(file_out); return ret; } int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags) { return __generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags, NULL); } EXPORT_SYMBOL(generic_remap_file_range_prep); loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags) { loff_t ret; WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP); if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; ret = generic_file_rw_checks(file_in, file_out); if (ret < 0) return ret; if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; ret = remap_verify_area(file_in, pos_in, len, false); if (ret) return ret; ret = remap_verify_area(file_out, pos_out, len, true); if (ret) return ret; file_start_write(file_out); ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, len, remap_flags); file_end_write(file_out); if (ret < 0) return ret; fsnotify_access(file_in); fsnotify_modify(file_out); return ret; } EXPORT_SYMBOL(vfs_clone_file_range); /* Check whether we are allowed to dedupe the destination file */ static bool may_dedupe_file(struct file *file) { struct mnt_idmap *idmap = file_mnt_idmap(file); struct inode *inode = file_inode(file); if (capable(CAP_SYS_ADMIN)) return true; if (file->f_mode & FMODE_WRITE) return true; if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) return true; if (!inode_permission(idmap, inode, MAY_WRITE)) return true; return false; } loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags) { loff_t ret; WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)); /* * This is redundant if called from vfs_dedupe_file_range(), but other * callers need it and it's not performance sesitive... */ ret = remap_verify_area(src_file, src_pos, len, false); if (ret) return ret; ret = remap_verify_area(dst_file, dst_pos, len, true); if (ret) return ret; /* * This needs to be called after remap_verify_area() because of * sb_start_write() and before may_dedupe_file() because the mount's * MAY_WRITE need to be checked with mnt_get_write_access_file() held. */ ret = mnt_want_write_file(dst_file); if (ret) return ret; ret = -EPERM; if (!may_dedupe_file(dst_file)) goto out_drop_write; ret = -EXDEV; if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb) goto out_drop_write; ret = -EISDIR; if (S_ISDIR(file_inode(dst_file)->i_mode)) goto out_drop_write; ret = -EINVAL; if (!dst_file->f_op->remap_file_range) goto out_drop_write; if (len == 0) { ret = 0; goto out_drop_write; } ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file, dst_pos, len, remap_flags | REMAP_FILE_DEDUP); out_drop_write: mnt_drop_write_file(dst_file); return ret; } EXPORT_SYMBOL(vfs_dedupe_file_range_one); int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) { struct file_dedupe_range_info *info; struct inode *src = file_inode(file); u64 off; u64 len; int i; int ret; u16 count = same->dest_count; loff_t deduped; if (!(file->f_mode & FMODE_READ)) return -EINVAL; if (same->reserved1 || same->reserved2) return -EINVAL; off = same->src_offset; len = same->src_length; if (S_ISDIR(src->i_mode)) return -EISDIR; if (!S_ISREG(src->i_mode)) return -EINVAL; if (!file->f_op->remap_file_range) return -EOPNOTSUPP; ret = remap_verify_area(file, off, len, false); if (ret < 0) return ret; ret = 0; if (off + len > i_size_read(src)) return -EINVAL; /* Arbitrary 1G limit on a single dedupe request, can be raised. */ len = min_t(u64, len, 1 << 30); /* pre-format output fields to sane values */ for (i = 0; i < count; i++) { same->info[i].bytes_deduped = 0ULL; same->info[i].status = FILE_DEDUPE_RANGE_SAME; } for (i = 0, info = same->info; i < count; i++, info++) { CLASS(fd, dst_fd)(info->dest_fd); if (fd_empty(dst_fd)) { info->status = -EBADF; goto next_loop; } if (info->reserved) { info->status = -EINVAL; goto next_loop; } deduped = vfs_dedupe_file_range_one(file, off, fd_file(dst_fd), info->dest_offset, len, REMAP_FILE_CAN_SHORTEN); if (deduped == -EBADE) info->status = FILE_DEDUPE_RANGE_DIFFERS; else if (deduped < 0) info->status = deduped; else info->bytes_deduped = len; next_loop: if (fatal_signal_pending(current)) break; } return ret; } EXPORT_SYMBOL(vfs_dedupe_file_range);
3 3 3 3 3 3 2 2 2 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 // SPDX-License-Identifier: GPL-2.0 /* * xfrm6_input.c: based on net/ipv4/xfrm4_input.c * * Authors: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * YOSHIFUJI Hideaki @USAGI * IPv6 support */ #include <linux/module.h> #include <linux/string.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/ipv6.h> #include <net/xfrm.h> #include <net/protocol.h> #include <net/gro.h> int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) { XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); } EXPORT_SYMBOL(xfrm6_rcv_spi); static int xfrm6_transport_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { if (xfrm_trans_queue(skb, ip6_rcv_finish)) { kfree_skb(skb); return NET_RX_DROP; } return 0; } int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); int nhlen = -skb_network_offset(skb); skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; #ifndef CONFIG_NETFILTER if (!async) return 1; #endif __skb_push(skb, nhlen); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { /* The full l2 header needs to be preserved so that re-injecting the packet at l2 * works correctly in the presence of vlan tags. */ skb_mac_header_rebuild_full(skb, xo->orig_mac_len); skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm6_transport_finish2); return 0; } static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; struct ipv6hdr *ip6h; int len; int ip6hlen = sizeof(struct ipv6hdr); __u8 *udpdata; __be32 *udpdata32; u16 encap_type; encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; /* If this is a paged skb, make sure we pull up * whatever data we need to look at. */ len = skb->len - sizeof(struct udphdr); if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) return 1; /* Now we can get the pointers */ uh = udp_hdr(skb); udpdata = (__u8 *)uh + sizeof(struct udphdr); udpdata32 = (__be32 *)udpdata; switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); } else /* Must be an IKE packet.. pass it through */ return 1; break; } /* At this point we are sure that this is an ESPinUDP packet, * so we need to remove 'len' bytes from the packet (the UDP * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) return -EINVAL; /* Now we can update and verify the packet length... */ ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); if (skb->len < ip6hlen + len) { /* packet is too small!?! */ return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ if (pull) { __skb_pull(skb, len); skb_reset_transport_header(skb); } else { skb_set_transport_header(skb, len); } /* process ESP */ return 0; } /* If it's a keepalive packet, then just eat it. * If it's an encapsulated packet, then pass it to the * IPsec xfrm input. * Returns 0 if skb passed to xfrm or was dropped. * Returns >0 if skb should be passed to UDP. * Returns <0 if skb should be resubmitted (-ret is protocol) */ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) { int ret; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_udp_encap_rcv(sk, skb); ret = __xfrm6_udp_encap_rcv(sk, skb, true); if (!ret) return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, udp_sk(sk)->encap_type); if (ret < 0) { kfree_skb(skb); return 0; } return ret; } struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb) { int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; int len, dlen; __u8 *udpdata; __be32 *udpdata32; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_gro_udp_encap_rcv(sk, head, skb); len = skb->len - offset; dlen = offset + min(len, 8); udpdata = skb_gro_header(skb, dlen, offset); udpdata32 = (__be32 *)udpdata; if (unlikely(!udpdata)) return NULL; rcu_read_lock(); ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]); if (!ops || !ops->callbacks.gro_receive) goto out; /* check if it is a keepalive or IKE packet */ if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; /* set the transport header to ESP */ skb_set_transport_header(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); rcu_read_unlock(); return pp; out: rcu_read_unlock(); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; return NULL; } int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], 0, t); } EXPORT_SYMBOL(xfrm6_rcv_tnl); int xfrm6_rcv(struct sk_buff *skb) { return xfrm6_rcv_tnl(skb, NULL); } EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { struct net *net = dev_net(skb->dev); struct xfrm_state *x = NULL; struct sec_path *sp; int i = 0; sp = secpath_set(skb); if (!sp) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (1 + sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } for (i = 0; i < 3; i++) { xfrm_address_t *dst, *src; switch (i) { case 0: dst = daddr; src = saddr; break; case 1: /* lookup state with wild-card source address */ dst = daddr; src = (xfrm_address_t *)&in6addr_any; break; default: /* lookup state with wild-card addresses */ dst = (xfrm_address_t *)&in6addr_any; src = (xfrm_address_t *)&in6addr_any; break; } x = xfrm_state_lookup_byaddr(net, skb->mark, dst, src, proto, AF_INET6); if (!x) continue; if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); xfrm_state_put(x); x = NULL; continue; } spin_lock(&x->lock); if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) && likely(x->km.state == XFRM_STATE_VALID) && !xfrm_state_check_expire(x)) { spin_unlock(&x->lock); if (x->type->input(x, skb) > 0) { /* found a valid state */ break; } } else spin_unlock(&x->lock); xfrm_state_put(x); x = NULL; } if (!x) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; } sp->xvec[sp->len++] = x; spin_lock(&x->lock); x->curlft.bytes += skb->len; x->curlft.packets++; spin_unlock(&x->lock); return 1; drop: return -1; } EXPORT_SYMBOL(xfrm6_input_addr);
3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * * This code builds two trees of free clusters extents. * Trees are sorted by start of extent and by length of extent. * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. * In extreme case code reads on-disk bitmap to find free clusters. * */ #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> #include "ntfs.h" #include "ntfs_fs.h" /* * Maximum number of extents in tree. */ #define NTFS_MAX_WND_EXTENTS (32u * 1024u) struct rb_node_key { struct rb_node node; size_t key; }; struct e_node { struct rb_node_key start; /* Tree sorted by start. */ struct rb_node_key count; /* Tree sorted by len. */ }; static int wnd_rescan(struct wnd_bitmap *wnd); static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); static struct kmem_cache *ntfs_enode_cachep; int __init ntfs3_init_bitmap(void) { ntfs_enode_cachep = kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, SLAB_RECLAIM_ACCOUNT, NULL); return ntfs_enode_cachep ? 0 : -ENOMEM; } void ntfs3_exit_bitmap(void) { kmem_cache_destroy(ntfs_enode_cachep); } /* * wnd_scan * * b_pos + b_len - biggest fragment. * Scan range [wpos wbits) window @buf. * * Return: -1 if not found. */ static size_t wnd_scan(const void *buf, size_t wbit, u32 wpos, u32 wend, size_t to_alloc, size_t *prev_tail, size_t *b_pos, size_t *b_len) { while (wpos < wend) { size_t free_len; u32 free_bits, end; u32 used = find_next_zero_bit_le(buf, wend, wpos); if (used >= wend) { if (*b_len < *prev_tail) { *b_pos = wbit - *prev_tail; *b_len = *prev_tail; } *prev_tail = 0; return -1; } if (used > wpos) { wpos = used; if (*b_len < *prev_tail) { *b_pos = wbit - *prev_tail; *b_len = *prev_tail; } *prev_tail = 0; } /* * Now we have a fragment [wpos, wend) staring with 0. */ end = wpos + to_alloc - *prev_tail; free_bits = find_next_bit_le(buf, min(end, wend), wpos); free_len = *prev_tail + free_bits - wpos; if (*b_len < free_len) { *b_pos = wbit + wpos - *prev_tail; *b_len = free_len; } if (free_len >= to_alloc) return wbit + wpos - *prev_tail; if (free_bits >= wend) { *prev_tail += free_bits - wpos; return -1; } wpos = free_bits + 1; *prev_tail = 0; } return -1; } /* * wnd_close - Frees all resources. */ void wnd_close(struct wnd_bitmap *wnd) { struct rb_node *node, *next; kvfree(wnd->free_bits); wnd->free_bits = NULL; run_close(&wnd->run); node = rb_first(&wnd->start_tree); while (node) { next = rb_next(node); rb_erase(node, &wnd->start_tree); kmem_cache_free(ntfs_enode_cachep, rb_entry(node, struct e_node, start.node)); node = next; } } static struct rb_node *rb_lookup(struct rb_root *root, size_t v) { struct rb_node **p = &root->rb_node; struct rb_node *r = NULL; while (*p) { struct rb_node_key *k; k = rb_entry(*p, struct rb_node_key, node); if (v < k->key) { p = &(*p)->rb_left; } else if (v > k->key) { r = &k->node; p = &(*p)->rb_right; } else { return &k->node; } } return r; } /* * rb_insert_count - Helper function to insert special kind of 'count' tree. */ static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; size_t e_ckey = e->count.key; size_t e_skey = e->start.key; while (*p) { struct e_node *k = rb_entry(parent = *p, struct e_node, count.node); if (e_ckey > k->count.key) { p = &(*p)->rb_left; } else if (e_ckey < k->count.key) { p = &(*p)->rb_right; } else if (e_skey < k->start.key) { p = &(*p)->rb_left; } else if (e_skey > k->start.key) { p = &(*p)->rb_right; } else { WARN_ON(1); return false; } } rb_link_node(&e->count.node, parent, p); rb_insert_color(&e->count.node, root); return true; } /* * rb_insert_start - Helper function to insert special kind of 'count' tree. */ static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; size_t e_skey = e->start.key; while (*p) { struct e_node *k; parent = *p; k = rb_entry(parent, struct e_node, start.node); if (e_skey < k->start.key) { p = &(*p)->rb_left; } else if (e_skey > k->start.key) { p = &(*p)->rb_right; } else { WARN_ON(1); return false; } } rb_link_node(&e->start.node, parent, p); rb_insert_color(&e->start.node, root); return true; } /* * wnd_add_free_ext - Adds a new extent of free space. * @build: 1 when building tree. */ static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, bool build) { struct e_node *e, *e0 = NULL; size_t ib, end_in = bit + len; struct rb_node *n; if (build) { /* Use extent_min to filter too short extents. */ if (wnd->count >= NTFS_MAX_WND_EXTENTS && len <= wnd->extent_min) { wnd->uptodated = -1; return; } } else { /* Try to find extent before 'bit'. */ n = rb_lookup(&wnd->start_tree, bit); if (!n) { n = rb_first(&wnd->start_tree); } else { e = rb_entry(n, struct e_node, start.node); n = rb_next(n); if (e->start.key + e->count.key == bit) { /* Remove left. */ bit = e->start.key; len += e->count.key; rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; e0 = e; } } while (n) { size_t next_end; e = rb_entry(n, struct e_node, start.node); next_end = e->start.key + e->count.key; if (e->start.key > end_in) break; /* Remove right. */ n = rb_next(n); len += next_end - end_in; end_in = next_end; rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; if (!e0) e0 = e; else kmem_cache_free(ntfs_enode_cachep, e); } if (wnd->uptodated != 1) { /* Check bits before 'bit'. */ ib = wnd->zone_bit == wnd->zone_end || bit < wnd->zone_end ? 0 : wnd->zone_end; while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { bit -= 1; len += 1; } /* Check bits after 'end_in'. */ ib = wnd->zone_bit == wnd->zone_end || end_in > wnd->zone_bit ? wnd->nbits : wnd->zone_bit; while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { end_in += 1; len += 1; } } } /* Insert new fragment. */ if (wnd->count >= NTFS_MAX_WND_EXTENTS) { if (e0) kmem_cache_free(ntfs_enode_cachep, e0); wnd->uptodated = -1; /* Compare with smallest fragment. */ n = rb_last(&wnd->count_tree); e = rb_entry(n, struct e_node, count.node); if (len <= e->count.key) goto out; /* Do not insert small fragments. */ if (build) { struct e_node *e2; n = rb_prev(n); e2 = rb_entry(n, struct e_node, count.node); /* Smallest fragment will be 'e2->count.key'. */ wnd->extent_min = e2->count.key; } /* Replace smallest fragment by new one. */ rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; } else { e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); if (!e) { wnd->uptodated = -1; goto out; } if (build && len <= wnd->extent_min) wnd->extent_min = len; } e->start.key = bit; e->count.key = len; if (len > wnd->extent_max) wnd->extent_max = len; rb_insert_start(&wnd->start_tree, e); rb_insert_count(&wnd->count_tree, e); wnd->count += 1; out:; } /* * wnd_remove_free_ext - Remove a run from the cached free space. */ static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) { struct rb_node *n, *n3; struct e_node *e, *e3; size_t end_in = bit + len; size_t end3, end, new_key, new_len, max_new_len; /* Try to find extent before 'bit'. */ n = rb_lookup(&wnd->start_tree, bit); if (!n) return; e = rb_entry(n, struct e_node, start.node); end = e->start.key + e->count.key; new_key = new_len = 0; len = e->count.key; /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */ if (e->start.key > bit) ; else if (end_in <= end) { /* Range [bit,end_in) inside 'e'. */ new_key = end_in; new_len = end - end_in; len = bit - e->start.key; } else if (bit > end) { bool bmax = false; n3 = rb_next(n); while (n3) { e3 = rb_entry(n3, struct e_node, start.node); if (e3->start.key >= end_in) break; if (e3->count.key == wnd->extent_max) bmax = true; end3 = e3->start.key + e3->count.key; if (end3 > end_in) { e3->start.key = end_in; rb_erase(&e3->count.node, &wnd->count_tree); e3->count.key = end3 - end_in; rb_insert_count(&wnd->count_tree, e3); break; } n3 = rb_next(n3); rb_erase(&e3->start.node, &wnd->start_tree); rb_erase(&e3->count.node, &wnd->count_tree); wnd->count -= 1; kmem_cache_free(ntfs_enode_cachep, e3); } if (!bmax) return; n3 = rb_first(&wnd->count_tree); wnd->extent_max = n3 ? rb_entry(n3, struct e_node, count.node)->count.key : 0; return; } if (e->count.key != wnd->extent_max) { ; } else if (rb_prev(&e->count.node)) { ; } else { n3 = rb_next(&e->count.node); max_new_len = max(len, new_len); if (!n3) { wnd->extent_max = max_new_len; } else { e3 = rb_entry(n3, struct e_node, count.node); wnd->extent_max = max(e3->count.key, max_new_len); } } if (!len) { if (new_len) { e->start.key = new_key; rb_erase(&e->count.node, &wnd->count_tree); e->count.key = new_len; rb_insert_count(&wnd->count_tree, e); } else { rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; kmem_cache_free(ntfs_enode_cachep, e); } goto out; } rb_erase(&e->count.node, &wnd->count_tree); e->count.key = len; rb_insert_count(&wnd->count_tree, e); if (!new_len) goto out; if (wnd->count >= NTFS_MAX_WND_EXTENTS) { wnd->uptodated = -1; /* Get minimal extent. */ e = rb_entry(rb_last(&wnd->count_tree), struct e_node, count.node); if (e->count.key > new_len) goto out; /* Replace minimum. */ rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; } else { e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); if (!e) wnd->uptodated = -1; } if (e) { e->start.key = new_key; e->count.key = new_len; rb_insert_start(&wnd->start_tree, e); rb_insert_count(&wnd->count_tree, e); wnd->count += 1; } out: if (!wnd->count && 1 != wnd->uptodated) wnd_rescan(wnd); } /* * wnd_rescan - Scan all bitmap. Used while initialization. */ static int wnd_rescan(struct wnd_bitmap *wnd) { int err = 0; size_t prev_tail = 0; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi = sb->s_fs_info; u64 lbo, len = 0; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; u32 wbits = 8 * sb->s_blocksize; u32 used, frb; size_t wpos, wbit, iw, vbo; struct buffer_head *bh = NULL; CLST lcn, clen; wnd->uptodated = 0; wnd->extent_max = 0; wnd->extent_min = MINUS_ONE_T; wnd->total_zeroes = 0; vbo = 0; for (iw = 0; iw < wnd->nwnd; iw++) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; if (wnd->inited) { if (!wnd->free_bits[iw]) { /* All ones. */ if (prev_tail) { wnd_add_free_ext(wnd, vbo * 8 - prev_tail, prev_tail, true); prev_tail = 0; } goto next_wnd; } if (wbits == wnd->free_bits[iw]) { /* All zeroes. */ prev_tail += wbits; wnd->total_zeroes += wbits; goto next_wnd; } } if (!len) { u32 off = vbo & sbi->cluster_mask; if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, &lcn, &clen, NULL)) { err = -ENOENT; goto out; } lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; } bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) { err = -EIO; goto out; } used = ntfs_bitmap_weight_le(bh->b_data, wbits); if (used < wbits) { frb = wbits - used; wnd->free_bits[iw] = frb; wnd->total_zeroes += frb; } wpos = 0; wbit = vbo * 8; if (wbit + wbits > wnd->nbits) wbits = wnd->nbits - wbit; do { used = find_next_zero_bit_le(bh->b_data, wbits, wpos); if (used > wpos && prev_tail) { wnd_add_free_ext(wnd, wbit + wpos - prev_tail, prev_tail, true); prev_tail = 0; } wpos = used; if (wpos >= wbits) { /* No free blocks. */ prev_tail = 0; break; } frb = find_next_bit_le(bh->b_data, wbits, wpos); if (frb >= wbits) { /* Keep last free block. */ prev_tail += frb - wpos; break; } wnd_add_free_ext(wnd, wbit + wpos - prev_tail, frb + prev_tail - wpos, true); /* Skip free block and first '1'. */ wpos = frb + 1; /* Reset previous tail. */ prev_tail = 0; } while (wpos < wbits); next_wnd: if (bh) put_bh(bh); bh = NULL; vbo += blocksize; if (len) { len -= blocksize; lbo += blocksize; } } /* Add last block. */ if (prev_tail) wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); /* * Before init cycle wnd->uptodated was 0. * If any errors or limits occurs while initialization then * wnd->uptodated will be -1. * If 'uptodated' is still 0 then Tree is really updated. */ if (!wnd->uptodated) wnd->uptodated = 1; if (wnd->zone_bit != wnd->zone_end) { size_t zlen = wnd->zone_end - wnd->zone_bit; wnd->zone_end = wnd->zone_bit; wnd_zone_set(wnd, wnd->zone_bit, zlen); } out: return err; } int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) { int err; u32 blocksize = sb->s_blocksize; u32 wbits = blocksize * 8; init_rwsem(&wnd->rw_lock); wnd->sb = sb; wnd->nbits = nbits; wnd->total_zeroes = nbits; wnd->extent_max = MINUS_ONE_T; wnd->zone_bit = wnd->zone_end = 0; wnd->nwnd = bytes_to_block(sb, ntfs3_bitmap_size(nbits)); wnd->bits_last = nbits & (wbits - 1); if (!wnd->bits_last) wnd->bits_last = wbits; wnd->free_bits = kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO); if (!wnd->free_bits) return -ENOMEM; err = wnd_rescan(wnd); if (err) return err; wnd->inited = true; return 0; } /* * wnd_map - Call sb_bread for requested window. */ static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) { size_t vbo; CLST lcn, clen; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi; struct buffer_head *bh; u64 lbo; sbi = sb->s_fs_info; vbo = (u64)iw << sb->s_blocksize_bits; if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, NULL)) { return ERR_PTR(-ENOENT); } lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); if (!bh) return ERR_PTR(-EIO); return bh; } /* * wnd_set_free - Mark the bits range from bit to bit + bits as free. */ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) { int err = 0; struct super_block *sb = wnd->sb; u32 wbits = 8 * sb->s_blocksize; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbit = bit & (wbits - 1); struct buffer_head *bh; u32 op; for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } lock_buffer(bh); ntfs_bitmap_clear_le(bh->b_data, wbit, op); wnd->free_bits[iw] += op; wnd->total_zeroes += op; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); put_bh(bh); wnd_add_free_ext(wnd, bit, op, false); } return err; } /* * wnd_set_used - Mark the bits range from bit to bit + bits as used. */ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) { int err = 0; struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); struct buffer_head *bh; u32 op; for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } lock_buffer(bh); ntfs_bitmap_set_le(bh->b_data, wbit, op); wnd->free_bits[iw] -= op; wnd->total_zeroes -= op; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); put_bh(bh); if (!RB_EMPTY_ROOT(&wnd->start_tree)) wnd_remove_free_ext(wnd, bit, op); } return err; } /* * wnd_set_used_safe - Mark the bits range from bit to bit + bits as used. * * Unlikely wnd_set_used/wnd_set_free this function is not full trusted. * It scans every bit in bitmap and marks free bit as used. * @done - how many bits were marked as used. * * NOTE: normally *done should be 0. */ int wnd_set_used_safe(struct wnd_bitmap *wnd, size_t bit, size_t bits, size_t *done) { size_t i, from = 0, len = 0; int err = 0; *done = 0; for (i = 0; i < bits; i++) { if (wnd_is_free(wnd, bit + i, 1)) { if (!len) from = bit + i; len += 1; } else if (len) { err = wnd_set_used(wnd, from, len); *done += len; len = 0; if (err) break; } } if (len) { /* last fragment. */ err = wnd_set_used(wnd, from, len); *done += len; } return err; } /* * wnd_is_free_hlp * * Return: True if all clusters [bit, bit+bits) are free (bitmap only). */ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) { struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); u32 op; for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); if (wbits != wnd->free_bits[iw]) { bool ret; struct buffer_head *bh = wnd_map(wnd, iw); if (IS_ERR(bh)) return false; ret = are_bits_clear(bh->b_data, wbit, op); put_bh(bh); if (!ret) return false; } } return true; } /* * wnd_is_free * * Return: True if all clusters [bit, bit+bits) are free. */ bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) { bool ret; struct rb_node *n; size_t end; struct e_node *e; if (RB_EMPTY_ROOT(&wnd->start_tree)) goto use_wnd; n = rb_lookup(&wnd->start_tree, bit); if (!n) goto use_wnd; e = rb_entry(n, struct e_node, start.node); end = e->start.key + e->count.key; if (bit < end && bit + bits <= end) return true; use_wnd: ret = wnd_is_free_hlp(wnd, bit, bits); return ret; } /* * wnd_is_used * * Return: True if all clusters [bit, bit+bits) are used. */ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) { bool ret = false; struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); u32 op; size_t end; struct rb_node *n; struct e_node *e; if (RB_EMPTY_ROOT(&wnd->start_tree)) goto use_wnd; end = bit + bits; n = rb_lookup(&wnd->start_tree, end - 1); if (!n) goto use_wnd; e = rb_entry(n, struct e_node, start.node); if (e->start.key + e->count.key > bit) return false; use_wnd: for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); if (wnd->free_bits[iw]) { bool ret; struct buffer_head *bh = wnd_map(wnd, iw); if (IS_ERR(bh)) goto out; ret = are_bits_set(bh->b_data, wbit, op); put_bh(bh); if (!ret) goto out; } } ret = true; out: return ret; } /* * wnd_find - Look for free space. * * - flags - BITMAP_FIND_XXX flags * * Return: 0 if not found. */ size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, size_t flags, size_t *allocated) { struct super_block *sb; u32 wbits, wpos, wzbit, wzend; size_t fnd, max_alloc, b_len, b_pos; size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; size_t to_alloc0 = to_alloc; const struct e_node *e; const struct rb_node *pr, *cr; u8 log2_bits; bool fbits_valid; struct buffer_head *bh; /* Fast checking for available free space. */ if (flags & BITMAP_FIND_FULL) { size_t zeroes = wnd_zeroes(wnd); zeroes -= wnd->zone_end - wnd->zone_bit; if (zeroes < to_alloc0) goto no_space; if (to_alloc0 > wnd->extent_max) goto no_space; } else { if (to_alloc > wnd->extent_max) to_alloc = wnd->extent_max; } if (wnd->zone_bit <= hint && hint < wnd->zone_end) hint = wnd->zone_end; max_alloc = wnd->nbits; b_len = b_pos = 0; if (hint >= max_alloc) hint = 0; if (RB_EMPTY_ROOT(&wnd->start_tree)) { if (wnd->uptodated == 1) { /* Extents tree is updated -> No free space. */ goto no_space; } goto scan_bitmap; } e = NULL; if (!hint) goto allocate_biggest; /* Use hint: Enumerate extents by start >= hint. */ pr = NULL; cr = wnd->start_tree.rb_node; for (;;) { e = rb_entry(cr, struct e_node, start.node); if (e->start.key == hint) break; if (e->start.key < hint) { pr = cr; cr = cr->rb_right; if (!cr) break; continue; } cr = cr->rb_left; if (!cr) { e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; break; } } if (!e) goto allocate_biggest; if (e->start.key + e->count.key > hint) { /* We have found extension with 'hint' inside. */ size_t len = e->start.key + e->count.key - hint; if (len >= to_alloc && hint + to_alloc <= max_alloc) { fnd = hint; goto found; } if (!(flags & BITMAP_FIND_FULL)) { if (len > to_alloc) len = to_alloc; if (hint + len <= max_alloc) { fnd = hint; to_alloc = len; goto found; } } } allocate_biggest: /* Allocate from biggest free extent. */ e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); if (e->count.key != wnd->extent_max) wnd->extent_max = e->count.key; if (e->count.key < max_alloc) { if (e->count.key >= to_alloc) { ; } else if (flags & BITMAP_FIND_FULL) { if (e->count.key < to_alloc0) { /* Biggest free block is less then requested. */ goto no_space; } to_alloc = e->count.key; } else if (-1 != wnd->uptodated) { to_alloc = e->count.key; } else { /* Check if we can use more bits. */ size_t op, max_check; struct rb_root start_tree; memcpy(&start_tree, &wnd->start_tree, sizeof(struct rb_root)); memset(&wnd->start_tree, 0, sizeof(struct rb_root)); max_check = e->start.key + to_alloc; if (max_check > max_alloc) max_check = max_alloc; for (op = e->start.key + e->count.key; op < max_check; op++) { if (!wnd_is_free(wnd, op, 1)) break; } memcpy(&wnd->start_tree, &start_tree, sizeof(struct rb_root)); to_alloc = op - e->start.key; } /* Prepare to return. */ fnd = e->start.key; if (e->start.key + to_alloc > max_alloc) to_alloc = max_alloc - e->start.key; goto found; } if (wnd->uptodated == 1) { /* Extents tree is updated -> no free space. */ goto no_space; } b_len = e->count.key; b_pos = e->start.key; scan_bitmap: sb = wnd->sb; log2_bits = sb->s_blocksize_bits + 3; /* At most two ranges [hint, max_alloc) + [0, hint). */ Again: /* TODO: Optimize request for case nbits > wbits. */ iw = hint >> log2_bits; wbits = sb->s_blocksize * 8; wpos = hint & (wbits - 1); prev_tail = 0; fbits_valid = true; if (max_alloc == wnd->nbits) { nwnd = wnd->nwnd; } else { size_t t = max_alloc + wbits - 1; nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; } /* Enumerate all windows. */ for (; iw < nwnd; iw++) { wbit = iw << log2_bits; if (!wnd->free_bits[iw]) { if (prev_tail > b_len) { b_pos = wbit - prev_tail; b_len = prev_tail; } /* Skip full used window. */ prev_tail = 0; wpos = 0; continue; } if (unlikely(iw + 1 == nwnd)) { if (max_alloc == wnd->nbits) { wbits = wnd->bits_last; } else { size_t t = max_alloc & (wbits - 1); if (t) { wbits = t; fbits_valid = false; } } } if (wnd->zone_end > wnd->zone_bit) { ebit = wbit + wbits; zbit = max(wnd->zone_bit, wbit); zend = min(wnd->zone_end, ebit); /* Here we have a window [wbit, ebit) and zone [zbit, zend). */ if (zend <= zbit) { /* Zone does not overlap window. */ } else { wzbit = zbit - wbit; wzend = zend - wbit; /* Zone overlaps window. */ if (wnd->free_bits[iw] == wzend - wzbit) { prev_tail = 0; wpos = 0; continue; } /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */ bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { /* TODO: Error */ prev_tail = 0; wpos = 0; continue; } /* Scan range [wbit, zbit). */ if (wpos < wzbit) { /* Scan range [wpos, zbit). */ fnd = wnd_scan(bh->b_data, wbit, wpos, wzbit, to_alloc, &prev_tail, &b_pos, &b_len); if (fnd != MINUS_ONE_T) { put_bh(bh); goto found; } } prev_tail = 0; /* Scan range [zend, ebit). */ if (wzend < wbits) { fnd = wnd_scan(bh->b_data, wbit, max(wzend, wpos), wbits, to_alloc, &prev_tail, &b_pos, &b_len); if (fnd != MINUS_ONE_T) { put_bh(bh); goto found; } } wpos = 0; put_bh(bh); continue; } } /* Current window does not overlap zone. */ if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { /* Window is empty. */ if (prev_tail + wbits >= to_alloc) { fnd = wbit + wpos - prev_tail; goto found; } /* Increase 'prev_tail' and process next window. */ prev_tail += wbits; wpos = 0; continue; } /* Read window. */ bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { // TODO: Error. prev_tail = 0; wpos = 0; continue; } /* Scan range [wpos, eBits). */ fnd = wnd_scan(bh->b_data, wbit, wpos, wbits, to_alloc, &prev_tail, &b_pos, &b_len); put_bh(bh); if (fnd != MINUS_ONE_T) goto found; } if (b_len < prev_tail) { /* The last fragment. */ b_len = prev_tail; b_pos = max_alloc - prev_tail; } if (hint) { /* * We have scanned range [hint max_alloc). * Prepare to scan range [0 hint + to_alloc). */ size_t nextmax = hint + to_alloc; if (likely(nextmax >= hint) && nextmax < max_alloc) max_alloc = nextmax; hint = 0; goto Again; } if (!b_len) goto no_space; wnd->extent_max = b_len; if (flags & BITMAP_FIND_FULL) goto no_space; fnd = b_pos; to_alloc = b_len; found: if (flags & BITMAP_FIND_MARK_AS_USED) { /* TODO: Optimize remove extent (pass 'e'?). */ if (wnd_set_used(wnd, fnd, to_alloc)) goto no_space; } else if (wnd->extent_max != MINUS_ONE_T && to_alloc > wnd->extent_max) { wnd->extent_max = to_alloc; } *allocated = fnd; return to_alloc; no_space: return 0; } /* * wnd_extend - Extend bitmap ($MFT bitmap). */ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) { int err; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi = sb->s_fs_info; u32 blocksize = sb->s_blocksize; u32 wbits = blocksize * 8; u32 b0, new_last; size_t bits, iw, new_wnd; size_t old_bits = wnd->nbits; u16 *new_free; if (new_bits <= old_bits) return -EINVAL; /* Align to 8 byte boundary. */ new_wnd = bytes_to_block(sb, ntfs3_bitmap_size(new_bits)); new_last = new_bits & (wbits - 1); if (!new_last) new_last = wbits; if (new_wnd != wnd->nwnd) { new_free = kmalloc_array(new_wnd, sizeof(u16), GFP_NOFS); if (!new_free) return -ENOMEM; memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short)); memset(new_free + wnd->nwnd, 0, (new_wnd - wnd->nwnd) * sizeof(short)); kvfree(wnd->free_bits); wnd->free_bits = new_free; } /* Zero bits [old_bits,new_bits). */ bits = new_bits - old_bits; b0 = old_bits & (wbits - 1); for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { u32 op; size_t frb; u64 vbo, lbo, bytes; struct buffer_head *bh; if (iw + 1 == new_wnd) wbits = new_last; op = b0 + bits > wbits ? wbits - b0 : bits; vbo = (u64)iw * blocksize; err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); if (err) return err; bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) return -EIO; lock_buffer(bh); ntfs_bitmap_clear_le(bh->b_data, b0, blocksize * 8 - b0); frb = wbits - ntfs_bitmap_weight_le(bh->b_data, wbits); wnd->total_zeroes += frb - wnd->free_bits[iw]; wnd->free_bits[iw] = frb; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); /* err = sync_dirty_buffer(bh); */ b0 = 0; bits -= op; } wnd->nbits = new_bits; wnd->nwnd = new_wnd; wnd->bits_last = new_last; wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); return 0; } void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) { size_t zlen = wnd->zone_end - wnd->zone_bit; if (zlen) wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) wnd_remove_free_ext(wnd, lcn, len); wnd->zone_bit = lcn; wnd->zone_end = lcn + len; } int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) { int err = 0; struct super_block *sb = sbi->sb; struct wnd_bitmap *wnd = &sbi->used.bitmap; u32 wbits = 8 * sb->s_blocksize; CLST len = 0, lcn = 0, done = 0; CLST minlen = bytes_to_cluster(sbi, range->minlen); CLST lcn_from = bytes_to_cluster(sbi, range->start); size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); u32 wbit = lcn_from & (wbits - 1); CLST lcn_to; if (!minlen) minlen = 1; if (range->len == (u64)-1) lcn_to = wnd->nbits; else lcn_to = bytes_to_cluster(sbi, range->start + range->len); down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); for (; iw < wnd->nwnd; iw++, wbit = 0) { CLST lcn_wnd = iw * wbits; struct buffer_head *bh; if (lcn_wnd > lcn_to) break; if (!wnd->free_bits[iw]) continue; if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; if (lcn_wnd + wbits > lcn_to) wbits = lcn_to - lcn_wnd; bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } for (; wbit < wbits; wbit++) { if (!test_bit_le(wbit, bh->b_data)) { if (!len) lcn = lcn_wnd + wbit; len += 1; continue; } if (len >= minlen) { err = ntfs_discard(sbi, lcn, len); if (err) goto out; done += len; } len = 0; } put_bh(bh); } /* Process the last fragment. */ if (len >= minlen) { err = ntfs_discard(sbi, lcn, len); if (err) goto out; done += len; } out: range->len = (u64)done << sbi->cluster_bits; up_read(&wnd->rw_lock); return err; } #if BITS_PER_LONG == 64 typedef __le64 bitmap_ulong; #define cpu_to_ul(x) cpu_to_le64(x) #define ul_to_cpu(x) le64_to_cpu(x) #else typedef __le32 bitmap_ulong; #define cpu_to_ul(x) cpu_to_le32(x) #define ul_to_cpu(x) le32_to_cpu(x) #endif void ntfs_bitmap_set_le(void *map, unsigned int start, int len) { bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); const unsigned int size = start + len; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); bitmap_ulong mask_to_set = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); while (len - bits_to_set >= 0) { *p |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_LONG; mask_to_set = cpu_to_ul(~0UL); p++; } if (len) { mask_to_set &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); *p |= mask_to_set; } } void ntfs_bitmap_clear_le(void *map, unsigned int start, int len) { bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); const unsigned int size = start + len; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); bitmap_ulong mask_to_clear = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); while (len - bits_to_clear >= 0) { *p &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_LONG; mask_to_clear = cpu_to_ul(~0UL); p++; } if (len) { mask_to_clear &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); *p &= ~mask_to_clear; } } unsigned int ntfs_bitmap_weight_le(const void *bitmap, int bits) { const ulong *bmp = bitmap; unsigned int k, lim = bits / BITS_PER_LONG; unsigned int w = 0; for (k = 0; k < lim; k++) w += hweight_long(bmp[k]); if (bits % BITS_PER_LONG) { w += hweight_long(ul_to_cpu(((bitmap_ulong *)bitmap)[k]) & BITMAP_LAST_WORD_MASK(bits)); } return w; }
424 419 418 420 425 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * cls_cgroup.h Control Group Classifier * * Authors: Thomas Graf <tgraf@suug.ch> */ #ifndef _NET_CLS_CGROUP_H #define _NET_CLS_CGROUP_H #include <linux/cgroup.h> #include <linux/hardirq.h> #include <linux/rcupdate.h> #include <net/sock.h> #include <net/inet_sock.h> #ifdef CONFIG_CGROUP_NET_CLASSID struct cgroup_cls_state { struct cgroup_subsys_state css; u32 classid; }; struct cgroup_cls_state *task_cls_state(struct task_struct *p); static inline u32 task_cls_classid(struct task_struct *p) { u32 classid; if (in_interrupt()) return 0; rcu_read_lock(); classid = container_of(task_css(p, net_cls_cgrp_id), struct cgroup_cls_state, css)->classid; rcu_read_unlock(); return classid; } static inline void sock_update_classid(struct sock_cgroup_data *skcd) { u32 classid; classid = task_cls_classid(current); sock_cgroup_set_classid(skcd, classid); } static inline u32 __task_get_classid(struct task_struct *task) { return task_cls_state(task)->classid; } static inline u32 task_get_classid(const struct sk_buff *skb) { u32 classid = __task_get_classid(current); /* Due to the nature of the classifier it is required to ignore all * packets originating from softirq context as accessing `current' * would lead to false results. * * This test assumes that all callers of dev_queue_xmit() explicitly * disable bh. Knowing this, it is possible to detect softirq based * calls by looking at the number of nested bh disable calls because * softirqs always disables bh. */ if (in_serving_softirq()) { struct sock *sk = skb_to_full_sk(skb); /* If there is an sock_cgroup_classid we'll use that. */ if (!sk || !sk_fullsock(sk)) return 0; classid = sock_cgroup_classid(&sk->sk_cgrp_data); } return classid; } #else /* !CONFIG_CGROUP_NET_CLASSID */ static inline void sock_update_classid(struct sock_cgroup_data *skcd) { } static inline u32 task_get_classid(const struct sk_buff *skb) { return 0; } #endif /* CONFIG_CGROUP_NET_CLASSID */ #endif /* _NET_CLS_CGROUP_H */
5754 5755 5514 5414 5723 5719 5721 3485 5725 4918 5728 5738 47 5734 4867 4868 4866 4867 1 5426 5 5445 4887 4864 3636 3648 5426 3302 3658 8 5421 5425 5447 26 5413 5447 5426 10 5419 5446 26 42 44 43 11 42 41 42 1 1 5419 5423 5414 5415 26 26 5476 5482 5479 5463 30 30 30 5482 6 45 47 47 47 6 44 44 45 45 1 44 44 5406 17 5412 5425 5425 97 5418 5446 5426 5448 5427 5422 3298 3634 5448 5421 5414 5415 5422 5447 110 108 5409 4 6 1 5470 6 6 5469 3510 3905 4 5509 5732 5754 5419 5479 5503 5473 5727 5722 5721 5758 3 4 5719 5721 6 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 // SPDX-License-Identifier: GPL-2.0 #include <kunit/visibility.h> #include <linux/kernel.h> #include <linux/irqflags.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/bug.h> #include "printk_ringbuffer.h" #include "internal.h" /** * DOC: printk_ringbuffer overview * * Data Structure * -------------- * The printk_ringbuffer is made up of 3 internal ringbuffers: * * desc_ring * A ring of descriptors and their meta data (such as sequence number, * timestamp, loglevel, etc.) as well as internal state information about * the record and logical positions specifying where in the other * ringbuffer the text strings are located. * * text_data_ring * A ring of data blocks. A data block consists of an unsigned long * integer (ID) that maps to a desc_ring index followed by the text * string of the record. * * The internal state information of a descriptor is the key element to allow * readers and writers to locklessly synchronize access to the data. * * Implementation * -------------- * * Descriptor Ring * ~~~~~~~~~~~~~~~ * The descriptor ring is an array of descriptors. A descriptor contains * essential meta data to track the data of a printk record using * blk_lpos structs pointing to associated text data blocks (see * "Data Rings" below). Each descriptor is assigned an ID that maps * directly to index values of the descriptor array and has a state. The ID * and the state are bitwise combined into a single descriptor field named * @state_var, allowing ID and state to be synchronously and atomically * updated. * * Descriptors have four states: * * reserved * A writer is modifying the record. * * committed * The record and all its data are written. A writer can reopen the * descriptor (transitioning it back to reserved), but in the committed * state the data is consistent. * * finalized * The record and all its data are complete and available for reading. A * writer cannot reopen the descriptor. * * reusable * The record exists, but its text and/or meta data may no longer be * available. * * Querying the @state_var of a record requires providing the ID of the * descriptor to query. This can yield a possible fifth (pseudo) state: * * miss * The descriptor being queried has an unexpected ID. * * The descriptor ring has a @tail_id that contains the ID of the oldest * descriptor and @head_id that contains the ID of the newest descriptor. * * When a new descriptor should be created (and the ring is full), the tail * descriptor is invalidated by first transitioning to the reusable state and * then invalidating all tail data blocks up to and including the data blocks * associated with the tail descriptor (for the text ring). Then * @tail_id is advanced, followed by advancing @head_id. And finally the * @state_var of the new descriptor is initialized to the new ID and reserved * state. * * The @tail_id can only be advanced if the new @tail_id would be in the * committed or reusable queried state. This makes it possible that a valid * sequence number of the tail is always available. * * Descriptor Finalization * ~~~~~~~~~~~~~~~~~~~~~~~ * When a writer calls the commit function prb_commit(), record data is * fully stored and is consistent within the ringbuffer. However, a writer can * reopen that record, claiming exclusive access (as with prb_reserve()), and * modify that record. When finished, the writer must again commit the record. * * In order for a record to be made available to readers (and also become * recyclable for writers), it must be finalized. A finalized record cannot be * reopened and can never become "unfinalized". Record finalization can occur * in three different scenarios: * * 1) A writer can simultaneously commit and finalize its record by calling * prb_final_commit() instead of prb_commit(). * * 2) When a new record is reserved and the previous record has been * committed via prb_commit(), that previous record is automatically * finalized. * * 3) When a record is committed via prb_commit() and a newer record * already exists, the record being committed is automatically finalized. * * Data Ring * ~~~~~~~~~ * The text data ring is a byte array composed of data blocks. Data blocks are * referenced by blk_lpos structs that point to the logical position of the * beginning of a data block and the beginning of the next adjacent data * block. Logical positions are mapped directly to index values of the byte * array ringbuffer. * * Each data block consists of an ID followed by the writer data. The ID is * the identifier of a descriptor that is associated with the data block. A * given data block is considered valid if all of the following conditions * are met: * * 1) The descriptor associated with the data block is in the committed * or finalized queried state. * * 2) The blk_lpos struct within the descriptor associated with the data * block references back to the same data block. * * 3) The data block is within the head/tail logical position range. * * If the writer data of a data block would extend beyond the end of the * byte array, only the ID of the data block is stored at the logical * position and the full data block (ID and writer data) is stored at the * beginning of the byte array. The referencing blk_lpos will point to the * ID before the wrap and the next data block will be at the logical * position adjacent the full data block after the wrap. * * Data rings have a @tail_lpos that points to the beginning of the oldest * data block and a @head_lpos that points to the logical position of the * next (not yet existing) data block. * * When a new data block should be created (and the ring is full), tail data * blocks will first be invalidated by putting their associated descriptors * into the reusable state and then pushing the @tail_lpos forward beyond * them. Then the @head_lpos is pushed forward and is associated with a new * descriptor. If a data block is not valid, the @tail_lpos cannot be * advanced beyond it. * * Info Array * ~~~~~~~~~~ * The general meta data of printk records are stored in printk_info structs, * stored in an array with the same number of elements as the descriptor ring. * Each info corresponds to the descriptor of the same index in the * descriptor ring. Info validity is confirmed by evaluating the corresponding * descriptor before and after loading the info. * * Usage * ----- * Here are some simple examples demonstrating writers and readers. For the * examples a global ringbuffer (test_rb) is available (which is not the * actual ringbuffer used by printk):: * * DEFINE_PRINTKRB(test_rb, 15, 5); * * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of * 1 MiB (2 ^ (15 + 5)) for text data. * * Sample writer code:: * * const char *textstr = "message text"; * struct prb_reserved_entry e; * struct printk_record r; * * // specify how much to allocate * prb_rec_init_wr(&r, strlen(textstr) + 1); * * if (prb_reserve(&e, &test_rb, &r)) { * snprintf(r.text_buf, r.text_buf_size, "%s", textstr); * * r.info->text_len = strlen(textstr); * r.info->ts_nsec = local_clock(); * r.info->caller_id = printk_caller_id(); * * // commit and finalize the record * prb_final_commit(&e); * } * * Note that additional writer functions are available to extend a record * after it has been committed but not yet finalized. This can be done as * long as no new records have been reserved and the caller is the same. * * Sample writer code (record extending):: * * // alternate rest of previous example * * r.info->text_len = strlen(textstr); * r.info->ts_nsec = local_clock(); * r.info->caller_id = printk_caller_id(); * * // commit the record (but do not finalize yet) * prb_commit(&e); * } * * ... * * // specify additional 5 bytes text space to extend * prb_rec_init_wr(&r, 5); * * // try to extend, but only if it does not exceed 32 bytes * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id(), 32)) { * snprintf(&r.text_buf[r.info->text_len], * r.text_buf_size - r.info->text_len, "hello"); * * r.info->text_len += 5; * * // commit and finalize the record * prb_final_commit(&e); * } * * Sample reader code:: * * struct printk_info info; * struct printk_record r; * char text_buf[32]; * u64 seq; * * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf)); * * prb_for_each_record(0, &test_rb, &seq, &r) { * if (info.seq != seq) * pr_warn("lost %llu records\n", info.seq - seq); * * if (info.text_len > r.text_buf_size) { * pr_warn("record %llu text truncated\n", info.seq); * text_buf[r.text_buf_size - 1] = 0; * } * * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec, * &text_buf[0]); * } * * Note that additional less convenient reader functions are available to * allow complex record access. * * ABA Issues * ~~~~~~~~~~ * To help avoid ABA issues, descriptors are referenced by IDs (array index * values combined with tagged bits counting array wraps) and data blocks are * referenced by logical positions (array index values combined with tagged * bits counting array wraps). However, on 32-bit systems the number of * tagged bits is relatively small such that an ABA incident is (at least * theoretically) possible. For example, if 4 million maximally sized (1KiB) * printk messages were to occur in NMI context on a 32-bit system, the * interrupted context would not be able to recognize that the 32-bit integer * completely wrapped and thus represents a different data block than the one * the interrupted context expects. * * To help combat this possibility, additional state checking is performed * (such as using cmpxchg() even though set() would suffice). These extra * checks are commented as such and will hopefully catch any ABA issue that * a 32-bit system might experience. * * Memory Barriers * ~~~~~~~~~~~~~~~ * Multiple memory barriers are used. To simplify proving correctness and * generating litmus tests, lines of code related to memory barriers * (loads, stores, and the associated memory barriers) are labeled:: * * LMM(function:letter) * * Comments reference the labels using only the "function:letter" part. * * The memory barrier pairs and their ordering are: * * desc_reserve:D / desc_reserve:B * push descriptor tail (id), then push descriptor head (id) * * desc_reserve:D / data_push_tail:B * push data tail (lpos), then set new descriptor reserved (state) * * desc_reserve:D / desc_push_tail:C * push descriptor tail (id), then set new descriptor reserved (state) * * desc_reserve:D / prb_first_seq:C * push descriptor tail (id), then set new descriptor reserved (state) * * desc_reserve:F / desc_read:D * set new descriptor id and reserved (state), then allow writer changes * * data_alloc:A (or data_realloc:A) / desc_read:D * set old descriptor reusable (state), then modify new data block area * * data_alloc:A (or data_realloc:A) / data_push_tail:B * push data tail (lpos), then modify new data block area * * _prb_commit:B / desc_read:B * store writer changes, then set new descriptor committed (state) * * desc_reopen_last:A / _prb_commit:B * set descriptor reserved (state), then read descriptor data * * _prb_commit:B / desc_reserve:D * set new descriptor committed (state), then check descriptor head (id) * * data_push_tail:D / data_push_tail:A * set descriptor reusable (state), then push data tail (lpos) * * desc_push_tail:B / desc_reserve:D * set descriptor reusable (state), then push descriptor tail (id) * * desc_update_last_finalized:A / desc_last_finalized_seq:A * store finalized record, then set new highest finalized sequence number */ #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) #define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1) #define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits) #define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1) /* Determine the data array index from a logical position. */ #define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring)) /* Determine the desc array index from an ID or sequence number. */ #define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring)) /* Determine how many times the data array has wrapped. */ #define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits) /* Determine if a logical position refers to a data-less block. */ #define LPOS_DATALESS(lpos) ((lpos) & 1UL) #define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \ LPOS_DATALESS((blk)->next)) /* Get the logical position at index 0 of the current wrap. */ #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ ((lpos) & ~DATA_SIZE_MASK(data_ring)) /* Get the ID for the same index of the previous wrap as the given ID. */ #define DESC_ID_PREV_WRAP(desc_ring, id) \ DESC_ID((id) - DESCS_COUNT(desc_ring)) /* * A data block: mapped directly to the beginning of the data block area * specified as a logical position within the data ring. * * @id: the ID of the associated descriptor * @data: the writer data * * Note that the size of a data block is only known by its associated * descriptor. */ struct prb_data_block { unsigned long id; char data[]; }; /* * Return the descriptor associated with @n. @n can be either a * descriptor ID or a sequence number. */ static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n) { return &desc_ring->descs[DESC_INDEX(desc_ring, n)]; } /* * Return the printk_info associated with @n. @n can be either a * descriptor ID or a sequence number. */ static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n) { return &desc_ring->infos[DESC_INDEX(desc_ring, n)]; } static struct prb_data_block *to_block(struct prb_data_ring *data_ring, unsigned long begin_lpos) { return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)]; } /* * Increase the data size to account for data block meta data plus any * padding so that the adjacent data block is aligned on the ID size. */ static unsigned int to_blk_size(unsigned int size) { struct prb_data_block *db = NULL; size += sizeof(*db); size = ALIGN(size, sizeof(db->id)); return size; } /* * Sanity checker for reserve size. The ringbuffer code assumes that a data * block does not exceed the maximum possible size that could fit within the * ringbuffer. This function provides that basic size check so that the * assumption is safe. */ static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) { struct prb_data_block *db = NULL; if (size == 0) return true; /* * Ensure the alignment padded size could possibly fit in the data * array. The largest possible data block must still leave room for * at least the ID of the next block. */ size = to_blk_size(size); if (size > DATA_SIZE(data_ring) - sizeof(db->id)) return false; return true; } /* Query the state of a descriptor. */ static enum desc_state get_desc_state(unsigned long id, unsigned long state_val) { if (id != DESC_ID(state_val)) return desc_miss; return DESC_STATE(state_val); } /* * Get a copy of a specified descriptor and return its queried state. If the * descriptor is in an inconsistent state (miss or reserved), the caller can * only expect the descriptor's @state_var field to be valid. * * The sequence number and caller_id can be optionally retrieved. Like all * non-state_var data, they are only valid if the descriptor is in a * consistent state. */ static enum desc_state desc_read(struct prb_desc_ring *desc_ring, unsigned long id, struct prb_desc *desc_out, u64 *seq_out, u32 *caller_id_out) { struct printk_info *info = to_info(desc_ring, id); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; enum desc_state d_state; unsigned long state_val; /* Check the descriptor state. */ state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */ d_state = get_desc_state(id, state_val); if (d_state == desc_miss || d_state == desc_reserved) { /* * The descriptor is in an inconsistent state. Set at least * @state_var so that the caller can see the details of * the inconsistent state. */ goto out; } /* * Guarantee the state is loaded before copying the descriptor * content. This avoids copying obsolete descriptor content that might * not apply to the descriptor state. This pairs with _prb_commit:B. * * Memory barrier involvement: * * If desc_read:A reads from _prb_commit:B, then desc_read:C reads * from _prb_commit:A. * * Relies on: * * WMB from _prb_commit:A to _prb_commit:B * matching * RMB from desc_read:A to desc_read:C */ smp_rmb(); /* LMM(desc_read:B) */ /* * Copy the descriptor data. The data is not valid until the * state has been re-checked. A memcpy() for all of @desc * cannot be used because of the atomic_t @state_var field. */ if (desc_out) { memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos, sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */ } if (seq_out) *seq_out = info->seq; /* also part of desc_read:C */ if (caller_id_out) *caller_id_out = info->caller_id; /* also part of desc_read:C */ /* * 1. Guarantee the descriptor content is loaded before re-checking * the state. This avoids reading an obsolete descriptor state * that may not apply to the copied content. This pairs with * desc_reserve:F. * * Memory barrier involvement: * * If desc_read:C reads from desc_reserve:G, then desc_read:E * reads from desc_reserve:F. * * Relies on: * * WMB from desc_reserve:F to desc_reserve:G * matching * RMB from desc_read:C to desc_read:E * * 2. Guarantee the record data is loaded before re-checking the * state. This avoids reading an obsolete descriptor state that may * not apply to the copied data. This pairs with data_alloc:A and * data_realloc:A. * * Memory barrier involvement: * * If copy_data:A reads from data_alloc:B, then desc_read:E * reads from desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to data_alloc:B * matching * RMB from desc_read:C to desc_read:E * * Note: desc_make_reusable:A and data_alloc:B can be different * CPUs. However, the data_alloc:B CPU (which performs the * full memory barrier) must have previously seen * desc_make_reusable:A. */ smp_rmb(); /* LMM(desc_read:D) */ /* * The data has been copied. Return the current descriptor state, * which may have changed since the load above. */ state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */ d_state = get_desc_state(id, state_val); out: if (desc_out) atomic_long_set(&desc_out->state_var, state_val); return d_state; } /* * Take a specified descriptor out of the finalized state by attempting * the transition from finalized to reusable. Either this context or some * other context will have been successful. */ static void desc_make_reusable(struct prb_desc_ring *desc_ring, unsigned long id) { unsigned long val_finalized = DESC_SV(id, desc_finalized); unsigned long val_reusable = DESC_SV(id, desc_reusable); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; atomic_long_cmpxchg_relaxed(state_var, val_finalized, val_reusable); /* LMM(desc_make_reusable:A) */ } /* * Given the text data ring, put the associated descriptor of each * data block from @lpos_begin until @lpos_end into the reusable state. * * If there is any problem making the associated descriptor reusable, either * the descriptor has not yet been finalized or another writer context has * already pushed the tail lpos past the problematic data block. Regardless, * on error the caller can re-load the tail lpos to determine the situation. */ static bool data_make_reusable(struct printk_ringbuffer *rb, unsigned long lpos_begin, unsigned long lpos_end, unsigned long *lpos_out) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_desc_ring *desc_ring = &rb->desc_ring; struct prb_data_block *blk; enum desc_state d_state; struct prb_desc desc; struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos; unsigned long id; /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { blk = to_block(data_ring, lpos_begin); /* * Load the block ID from the data block. This is a data race * against a writer that may have newly reserved this data * area. If the loaded value matches a valid descriptor ID, * the blk_lpos of that descriptor will be checked to make * sure it points back to this data block. If the check fails, * the data area has been recycled by another writer. */ id = blk->id; /* LMM(data_make_reusable:A) */ d_state = desc_read(desc_ring, id, &desc, NULL, NULL); /* LMM(data_make_reusable:B) */ switch (d_state) { case desc_miss: case desc_reserved: case desc_committed: return false; case desc_finalized: /* * This data block is invalid if the descriptor * does not point back to it. */ if (blk_lpos->begin != lpos_begin) return false; desc_make_reusable(desc_ring, id); break; case desc_reusable: /* * This data block is invalid if the descriptor * does not point back to it. */ if (blk_lpos->begin != lpos_begin) return false; break; } /* Advance @lpos_begin to the next data block. */ lpos_begin = blk_lpos->next; } *lpos_out = lpos_begin; return true; } /* * Advance the data ring tail to at least @lpos. This function puts * descriptors into the reusable state if the tail is pushed beyond * their associated data block. */ static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos) { struct prb_data_ring *data_ring = &rb->text_data_ring; unsigned long tail_lpos_new; unsigned long tail_lpos; unsigned long next_lpos; /* If @lpos is from a data-less block, there is nothing to do. */ if (LPOS_DATALESS(lpos)) return true; /* * Any descriptor states that have transitioned to reusable due to the * data tail being pushed to this loaded value will be visible to this * CPU. This pairs with data_push_tail:D. * * Memory barrier involvement: * * If data_push_tail:A reads from data_push_tail:D, then this CPU can * see desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to data_push_tail:D * matches * READFROM from data_push_tail:D to data_push_tail:A * thus * READFROM from desc_make_reusable:A to this CPU */ tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */ /* * Loop until the tail lpos is at or beyond @lpos. This condition * may already be satisfied, resulting in no full memory barrier * from data_push_tail:D being performed. However, since this CPU * sees the new tail lpos, any descriptor states that transitioned to * the reusable state must already be visible. */ while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { /* * Make all descriptors reusable that are associated with * data blocks before @lpos. */ if (!data_make_reusable(rb, tail_lpos, lpos, &next_lpos)) { /* * 1. Guarantee the block ID loaded in * data_make_reusable() is performed before * reloading the tail lpos. The failed * data_make_reusable() may be due to a newly * recycled data area causing the tail lpos to * have been previously pushed. This pairs with * data_alloc:A and data_realloc:A. * * Memory barrier involvement: * * If data_make_reusable:A reads from data_alloc:B, * then data_push_tail:C reads from * data_push_tail:D. * * Relies on: * * MB from data_push_tail:D to data_alloc:B * matching * RMB from data_make_reusable:A to * data_push_tail:C * * Note: data_push_tail:D and data_alloc:B can be * different CPUs. However, the data_alloc:B * CPU (which performs the full memory * barrier) must have previously seen * data_push_tail:D. * * 2. Guarantee the descriptor state loaded in * data_make_reusable() is performed before * reloading the tail lpos. The failed * data_make_reusable() may be due to a newly * recycled descriptor causing the tail lpos to * have been previously pushed. This pairs with * desc_reserve:D. * * Memory barrier involvement: * * If data_make_reusable:B reads from * desc_reserve:F, then data_push_tail:C reads * from data_push_tail:D. * * Relies on: * * MB from data_push_tail:D to desc_reserve:F * matching * RMB from data_make_reusable:B to * data_push_tail:C * * Note: data_push_tail:D and desc_reserve:F can * be different CPUs. However, the * desc_reserve:F CPU (which performs the * full memory barrier) must have previously * seen data_push_tail:D. */ smp_rmb(); /* LMM(data_push_tail:B) */ tail_lpos_new = atomic_long_read(&data_ring->tail_lpos ); /* LMM(data_push_tail:C) */ if (tail_lpos_new == tail_lpos) return false; /* Another CPU pushed the tail. Try again. */ tail_lpos = tail_lpos_new; continue; } /* * Guarantee any descriptor states that have transitioned to * reusable are stored before pushing the tail lpos. A full * memory barrier is needed since other CPUs may have made * the descriptor states reusable. This pairs with * data_push_tail:A. */ if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos, next_lpos)) { /* LMM(data_push_tail:D) */ break; } } return true; } /* * Advance the desc ring tail. This function advances the tail by one * descriptor, thus invalidating the oldest descriptor. Before advancing * the tail, the tail descriptor is made reusable and all data blocks up to * and including the descriptor's data block are invalidated (i.e. the data * ring tail is pushed past the data block of the descriptor being made * reusable). */ static bool desc_push_tail(struct printk_ringbuffer *rb, unsigned long tail_id) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; struct prb_desc desc; d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL); switch (d_state) { case desc_miss: /* * If the ID is exactly 1 wrap behind the expected, it is * in the process of being reserved by another writer and * must be considered reserved. */ if (DESC_ID(atomic_long_read(&desc.state_var)) == DESC_ID_PREV_WRAP(desc_ring, tail_id)) { return false; } /* * The ID has changed. Another writer must have pushed the * tail and recycled the descriptor already. Success is * returned because the caller is only interested in the * specified tail being pushed, which it was. */ return true; case desc_reserved: case desc_committed: return false; case desc_finalized: desc_make_reusable(desc_ring, tail_id); break; case desc_reusable: break; } /* * Data blocks must be invalidated before their associated * descriptor can be made available for recycling. Invalidating * them later is not possible because there is no way to trust * data blocks once their associated descriptor is gone. */ if (!data_push_tail(rb, desc.text_blk_lpos.next)) return false; /* * Check the next descriptor after @tail_id before pushing the tail * to it because the tail must always be in a finalized or reusable * state. The implementation of prb_first_seq() relies on this. * * A successful read implies that the next descriptor is less than or * equal to @head_id so there is no risk of pushing the tail past the * head. */ d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc, NULL, NULL); /* LMM(desc_push_tail:A) */ if (d_state == desc_finalized || d_state == desc_reusable) { /* * Guarantee any descriptor states that have transitioned to * reusable are stored before pushing the tail ID. This allows * verifying the recycled descriptor state. A full memory * barrier is needed since other CPUs may have made the * descriptor states reusable. This pairs with desc_reserve:D. */ atomic_long_cmpxchg(&desc_ring->tail_id, tail_id, DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */ } else { /* * Guarantee the last state load from desc_read() is before * reloading @tail_id in order to see a new tail ID in the * case that the descriptor has been recycled. This pairs * with desc_reserve:D. * * Memory barrier involvement: * * If desc_push_tail:A reads from desc_reserve:F, then * desc_push_tail:D reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:F * matching * RMB from desc_push_tail:A to desc_push_tail:D * * Note: desc_push_tail:B and desc_reserve:F can be different * CPUs. However, the desc_reserve:F CPU (which performs * the full memory barrier) must have previously seen * desc_push_tail:B. */ smp_rmb(); /* LMM(desc_push_tail:C) */ /* * Re-check the tail ID. The descriptor following @tail_id is * not in an allowed tail state. But if the tail has since * been moved by another CPU, then it does not matter. */ if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */ return false; } return true; } /* Reserve a new descriptor, invalidating the oldest if necessary. */ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long prev_state_val; unsigned long id_prev_wrap; struct prb_desc *desc; unsigned long head_id; unsigned long id; head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */ do { id = DESC_ID(head_id + 1); id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id); /* * Guarantee the head ID is read before reading the tail ID. * Since the tail ID is updated before the head ID, this * guarantees that @id_prev_wrap is never ahead of the tail * ID. This pairs with desc_reserve:D. * * Memory barrier involvement: * * If desc_reserve:A reads from desc_reserve:D, then * desc_reserve:C reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:D * matching * RMB from desc_reserve:A to desc_reserve:C * * Note: desc_push_tail:B and desc_reserve:D can be different * CPUs. However, the desc_reserve:D CPU (which performs * the full memory barrier) must have previously seen * desc_push_tail:B. */ smp_rmb(); /* LMM(desc_reserve:B) */ if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id )) { /* LMM(desc_reserve:C) */ /* * Make space for the new descriptor by * advancing the tail. */ if (!desc_push_tail(rb, id_prev_wrap)) return false; } /* * 1. Guarantee the tail ID is read before validating the * recycled descriptor state. A read memory barrier is * sufficient for this. This pairs with desc_push_tail:B. * * Memory barrier involvement: * * If desc_reserve:C reads from desc_push_tail:B, then * desc_reserve:E reads from desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to desc_push_tail:B * matching * RMB from desc_reserve:C to desc_reserve:E * * Note: desc_make_reusable:A and desc_push_tail:B can be * different CPUs. However, the desc_push_tail:B CPU * (which performs the full memory barrier) must have * previously seen desc_make_reusable:A. * * 2. Guarantee the tail ID is stored before storing the head * ID. This pairs with desc_reserve:B. * * 3. Guarantee any data ring tail changes are stored before * recycling the descriptor. Data ring tail changes can * happen via desc_push_tail()->data_push_tail(). A full * memory barrier is needed since another CPU may have * pushed the data ring tails. This pairs with * data_push_tail:B. * * 4. Guarantee a new tail ID is stored before recycling the * descriptor. A full memory barrier is needed since * another CPU may have pushed the tail ID. This pairs * with desc_push_tail:C and this also pairs with * prb_first_seq:C. * * 5. Guarantee the head ID is stored before trying to * finalize the previous descriptor. This pairs with * _prb_commit:B. */ } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id, id)); /* LMM(desc_reserve:D) */ desc = to_desc(desc_ring, id); /* * If the descriptor has been recycled, verify the old state val. * See "ABA Issues" about why this verification is performed. */ prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */ if (prev_state_val && get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) { WARN_ON_ONCE(1); return false; } /* * Assign the descriptor a new ID and set its state to reserved. * See "ABA Issues" about why cmpxchg() instead of set() is used. * * Guarantee the new descriptor ID and state is stored before making * any other changes. A write memory barrier is sufficient for this. * This pairs with desc_read:D. */ if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val, DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */ WARN_ON_ONCE(1); return false; } /* Now data in @desc can be modified: LMM(desc_reserve:G) */ *id_out = id; return true; } /* Determine the end of a data block. */ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, unsigned long lpos, unsigned int size) { unsigned long begin_lpos; unsigned long next_lpos; begin_lpos = lpos; next_lpos = lpos + size; /* First check if the data block does not wrap. */ if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) return next_lpos; /* Wrapping data blocks store their data at the beginning. */ return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size); } /* * Allocate a new data block, invalidating the oldest data block(s) * if necessary. This function also associates the data block with * a specified descriptor. */ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, struct prb_data_blk_lpos *blk_lpos, unsigned long id) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_data_block *blk; unsigned long begin_lpos; unsigned long next_lpos; if (size == 0) { /* * Data blocks are not created for empty lines. Instead, the * reader will recognize these special lpos values and handle * it appropriately. */ blk_lpos->begin = EMPTY_LINE_LPOS; blk_lpos->next = EMPTY_LINE_LPOS; return NULL; } size = to_blk_size(size); begin_lpos = atomic_long_read(&data_ring->head_lpos); do { next_lpos = get_next_lpos(data_ring, begin_lpos, size); if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) { /* Failed to allocate, specify a data-less block. */ blk_lpos->begin = FAILED_LPOS; blk_lpos->next = FAILED_LPOS; return NULL; } /* * 1. Guarantee any descriptor states that have transitioned * to reusable are stored before modifying the newly * allocated data area. A full memory barrier is needed * since other CPUs may have made the descriptor states * reusable. See data_push_tail:A about why the reusable * states are visible. This pairs with desc_read:D. * * 2. Guarantee any updated tail lpos is stored before * modifying the newly allocated data area. Another CPU may * be in data_make_reusable() and is reading a block ID * from this area. data_make_reusable() can handle reading * a garbage block ID value, but then it must be able to * load a new tail lpos. A full memory barrier is needed * since other CPUs may have updated the tail lpos. This * pairs with data_push_tail:B. */ } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos, next_lpos)); /* LMM(data_alloc:A) */ blk = to_block(data_ring, begin_lpos); blk->id = id; /* LMM(data_alloc:B) */ if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); /* * Store the ID on the wrapped block for consistency. * The printk_ringbuffer does not actually use it. */ blk->id = id; } blk_lpos->begin = begin_lpos; blk_lpos->next = next_lpos; return &blk->data[0]; } /* * Try to resize an existing data block associated with the descriptor * specified by @id. If the resized data block should become wrapped, it * copies the old data to the new data block. If @size yields a data block * with the same or less size, the data block is left as is. * * Fail if this is not the last allocated data block or if there is not * enough space or it is not possible make enough space. * * Return a pointer to the beginning of the entire data buffer or NULL on * failure. */ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, struct prb_data_blk_lpos *blk_lpos, unsigned long id) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_data_block *blk; unsigned long head_lpos; unsigned long next_lpos; bool wrapped; /* Reallocation only works if @blk_lpos is the newest data block. */ head_lpos = atomic_long_read(&data_ring->head_lpos); if (head_lpos != blk_lpos->next) return NULL; /* Keep track if @blk_lpos was a wrapping data block. */ wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); size = to_blk_size(size); next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); /* If the data block does not increase, there is nothing to do. */ if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { if (wrapped) blk = to_block(data_ring, 0); else blk = to_block(data_ring, blk_lpos->begin); return &blk->data[0]; } if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) return NULL; /* The memory barrier involvement is the same as data_alloc:A. */ if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos, next_lpos)) { /* LMM(data_realloc:A) */ return NULL; } blk = to_block(data_ring, blk_lpos->begin); if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { struct prb_data_block *old_blk = blk; /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); /* * Store the ID on the wrapped block for consistency. * The printk_ringbuffer does not actually use it. */ blk->id = id; if (!wrapped) { /* * Since the allocated space is now in the newly * created wrapping data block, copy the content * from the old data block. */ memcpy(&blk->data[0], &old_blk->data[0], (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id)); } } blk_lpos->next = next_lpos; return &blk->data[0]; } /* Return the number of bytes used by a data block. */ static unsigned int space_used(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos) { /* Data-less blocks take no space. */ if (BLK_DATALESS(blk_lpos)) return 0; if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { /* Data block does not wrap. */ return (DATA_INDEX(data_ring, blk_lpos->next) - DATA_INDEX(data_ring, blk_lpos->begin)); } /* * For wrapping data blocks, the trailing (wasted) space is * also counted. */ return (DATA_INDEX(data_ring, blk_lpos->next) + DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin)); } /* * Given @blk_lpos, return a pointer to the writer data from the data block * and calculate the size of the data part. A NULL pointer is returned if * @blk_lpos specifies values that could never be legal. * * This function (used by readers) performs strict validation on the lpos * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is * triggered if an internal error is detected. */ static const char *get_data(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos, unsigned int *data_size) { struct prb_data_block *db; /* Data-less data block description. */ if (BLK_DATALESS(blk_lpos)) { /* * Records that are just empty lines are also valid, even * though they do not have a data block. For such records * explicitly return empty string data to signify success. */ if (blk_lpos->begin == EMPTY_LINE_LPOS && blk_lpos->next == EMPTY_LINE_LPOS) { *data_size = 0; return ""; } /* Data lost, invalid, or otherwise unavailable. */ return NULL; } /* Regular data block: @begin less than @next and in same wrap. */ if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && blk_lpos->begin < blk_lpos->next) { db = to_block(data_ring, blk_lpos->begin); *data_size = blk_lpos->next - blk_lpos->begin; /* Wrapping data block: @begin is one wrap behind @next. */ } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == DATA_WRAPS(data_ring, blk_lpos->next)) { db = to_block(data_ring, 0); *data_size = DATA_INDEX(data_ring, blk_lpos->next); /* Illegal block description. */ } else { WARN_ON_ONCE(1); return NULL; } /* A valid data block will always be aligned to the ID size. */ if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { return NULL; } /* A valid data block will always have at least an ID. */ if (WARN_ON_ONCE(*data_size < sizeof(db->id))) return NULL; /* Subtract block ID space from size to reflect data size. */ *data_size -= sizeof(db->id); return &db->data[0]; } /* * Attempt to transition the newest descriptor from committed back to reserved * so that the record can be modified by a writer again. This is only possible * if the descriptor is not yet finalized and the provided @caller_id matches. */ static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring, u32 caller_id, unsigned long *id_out) { unsigned long prev_state_val; enum desc_state d_state; struct prb_desc desc; struct prb_desc *d; unsigned long id; u32 cid; id = atomic_long_read(&desc_ring->head_id); /* * To reduce unnecessarily reopening, first check if the descriptor * state and caller ID are correct. */ d_state = desc_read(desc_ring, id, &desc, NULL, &cid); if (d_state != desc_committed || cid != caller_id) return NULL; d = to_desc(desc_ring, id); prev_state_val = DESC_SV(id, desc_committed); /* * Guarantee the reserved state is stored before reading any * record data. A full memory barrier is needed because @state_var * modification is followed by reading. This pairs with _prb_commit:B. * * Memory barrier involvement: * * If desc_reopen_last:A reads from _prb_commit:B, then * prb_reserve_in_last:A reads from _prb_commit:A. * * Relies on: * * WMB from _prb_commit:A to _prb_commit:B * matching * MB If desc_reopen_last:A to prb_reserve_in_last:A */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */ return NULL; } *id_out = id; return d; } /** * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer * used by the newest record. * * @e: The entry structure to setup. * @rb: The ringbuffer to re-reserve and extend data in. * @r: The record structure to allocate buffers for. * @caller_id: The caller ID of the caller (reserving writer). * @max_size: Fail if the extended size would be greater than this. * * This is the public function available to writers to re-reserve and extend * data. * * The writer specifies the text size to extend (not the new total size) by * setting the @text_buf_size field of @r. To ensure proper initialization * of @r, prb_rec_init_wr() should be used. * * This function will fail if @caller_id does not match the caller ID of the * newest record. In that case the caller must reserve new data using * prb_reserve(). * * Context: Any context. Disables local interrupts on success. * Return: true if text data could be extended, otherwise false. * * On success: * * - @r->text_buf points to the beginning of the entire text buffer. * * - @r->text_buf_size is set to the new total size of the buffer. * * - @r->info is not touched so that @r->info->text_len could be used * to append the text. * * - prb_record_text_space() can be used on @e to query the new * actually used space. * * Important: All @r->info fields will already be set with the current values * for the record. I.e. @r->info->text_len will be less than * @text_buf_size. Writers can use @r->info->text_len to know * where concatenation begins and writers should update * @r->info->text_len after concatenating. */ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r, u32 caller_id, unsigned int max_size) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info; unsigned int data_size; struct prb_desc *d; unsigned long id; local_irq_save(e->irqflags); /* Transition the newest descriptor back to the reserved state. */ d = desc_reopen_last(desc_ring, caller_id, &id); if (!d) { local_irq_restore(e->irqflags); goto fail_reopen; } /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ info = to_info(desc_ring, id); /* * Set the @e fields here so that prb_commit() can be used if * anything fails from now on. */ e->rb = rb; e->id = id; /* * desc_reopen_last() checked the caller_id, but there was no * exclusive access at that point. The descriptor may have * changed since then. */ if (caller_id != info->caller_id) goto fail; if (BLK_DATALESS(&d->text_blk_lpos)) { if (WARN_ON_ONCE(info->text_len != 0)) { pr_warn_once("wrong text_len value (%hu, expecting 0)\n", info->text_len); info->text_len = 0; } if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; if (r->text_buf_size > max_size) goto fail; r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); } else { if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size)) goto fail; /* * Increase the buffer size to include the original size. If * the meta data (@text_len) is not sane, use the full data * block size. */ if (WARN_ON_ONCE(info->text_len > data_size)) { pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n", info->text_len, data_size); info->text_len = data_size; } r->text_buf_size += info->text_len; if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; if (r->text_buf_size > max_size) goto fail; r->text_buf = data_realloc(rb, r->text_buf_size, &d->text_blk_lpos, id); } if (r->text_buf_size && !r->text_buf) goto fail; r->info = info; e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); return true; fail: prb_commit(e); /* prb_commit() re-enabled interrupts. */ fail_reopen: /* Make it clear to the caller that the re-reserve failed. */ memset(r, 0, sizeof(*r)); return false; } /* * @last_finalized_seq value guarantees that all records up to and including * this sequence number are finalized and can be read. The only exception are * too old records which have already been overwritten. * * It is also guaranteed that @last_finalized_seq only increases. * * Be aware that finalized records following non-finalized records are not * reported because they are not yet available to the reader. For example, * a new record stored via printk() will not be available to a printer if * it follows a record that has not been finalized yet. However, once that * non-finalized record becomes finalized, @last_finalized_seq will be * appropriately updated and the full set of finalized records will be * available to the printer. And since each printk() caller will either * directly print or trigger deferred printing of all available unprinted * records, all printk() messages will get printed. */ static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long ulseq; /* * Guarantee the sequence number is loaded before loading the * associated record in order to guarantee that the record can be * seen by this CPU. This pairs with desc_update_last_finalized:A. */ ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq ); /* LMM(desc_last_finalized_seq:A) */ return __ulseq_to_u64seq(rb, ulseq); } static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, struct printk_record *r, unsigned int *line_count); /* * Check if there are records directly following @last_finalized_seq that are * finalized. If so, update @last_finalized_seq to the latest of these * records. It is not allowed to skip over records that are not yet finalized. */ static void desc_update_last_finalized(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; u64 old_seq = desc_last_finalized_seq(rb); unsigned long oldval; unsigned long newval; u64 finalized_seq; u64 try_seq; try_again: finalized_seq = old_seq; try_seq = finalized_seq + 1; /* Try to find later finalized records. */ while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { finalized_seq = try_seq; try_seq++; } /* No update needed if no later finalized record was found. */ if (finalized_seq == old_seq) return; oldval = __u64seq_to_ulseq(old_seq); newval = __u64seq_to_ulseq(finalized_seq); /* * Set the sequence number of a later finalized record that has been * seen. * * Guarantee the record data is visible to other CPUs before storing * its sequence number. This pairs with desc_last_finalized_seq:A. * * Memory barrier involvement: * * If desc_last_finalized_seq:A reads from * desc_update_last_finalized:A, then desc_read:A reads from * _prb_commit:B. * * Relies on: * * RELEASE from _prb_commit:B to desc_update_last_finalized:A * matching * ACQUIRE from desc_last_finalized_seq:A to desc_read:A * * Note: _prb_commit:B and desc_update_last_finalized:A can be * different CPUs. However, the desc_update_last_finalized:A * CPU (which performs the release) must have previously seen * _prb_commit:B. */ if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ old_seq = __ulseq_to_u64seq(rb, oldval); goto try_again; } } /* * Attempt to finalize a specified descriptor. If this fails, the descriptor * is either already final or it will finalize itself when the writer commits. */ static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long prev_state_val = DESC_SV(id, desc_committed); struct prb_desc *d = to_desc(desc_ring, id); if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ desc_update_last_finalized(rb); } } /** * prb_reserve() - Reserve space in the ringbuffer. * * @e: The entry structure to setup. * @rb: The ringbuffer to reserve data in. * @r: The record structure to allocate buffers for. * * This is the public function available to writers to reserve data. * * The writer specifies the text size to reserve by setting the * @text_buf_size field of @r. To ensure proper initialization of @r, * prb_rec_init_wr() should be used. * * Context: Any context. Disables local interrupts on success. * Return: true if at least text data could be allocated, otherwise false. * * On success, the fields @info and @text_buf of @r will be set by this * function and should be filled in by the writer before committing. Also * on success, prb_record_text_space() can be used on @e to query the actual * space used for the text data block. * * Important: @info->text_len needs to be set correctly by the writer in * order for data to be readable and/or extended. Its value * is initialized to 0. */ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info; struct prb_desc *d; unsigned long id; u64 seq; if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; /* * Descriptors in the reserved state act as blockers to all further * reservations once the desc_ring has fully wrapped. Disable * interrupts during the reserve/commit window in order to minimize * the likelihood of this happening. */ local_irq_save(e->irqflags); if (!desc_reserve(rb, &id)) { /* Descriptor reservation failures are tracked. */ atomic_long_inc(&rb->fail); local_irq_restore(e->irqflags); goto fail; } d = to_desc(desc_ring, id); info = to_info(desc_ring, id); /* * All @info fields (except @seq) are cleared and must be filled in * by the writer. Save @seq before clearing because it is used to * determine the new sequence number. */ seq = info->seq; memset(info, 0, sizeof(*info)); /* * Set the @e fields here so that prb_commit() can be used if * text data allocation fails. */ e->rb = rb; e->id = id; /* * Initialize the sequence number if it has "never been set". * Otherwise just increment it by a full wrap. * * @seq is considered "never been set" if it has a value of 0, * _except_ for @infos[0], which was specially setup by the ringbuffer * initializer and therefore is always considered as set. * * See the "Bootstrap" comment block in printk_ringbuffer.h for * details about how the initializer bootstraps the descriptors. */ if (seq == 0 && DESC_INDEX(desc_ring, id) != 0) info->seq = DESC_INDEX(desc_ring, id); else info->seq = seq + DESCS_COUNT(desc_ring); /* * New data is about to be reserved. Once that happens, previous * descriptors are no longer able to be extended. Finalize the * previous descriptor now so that it can be made available to * readers. (For seq==0 there is no previous descriptor.) */ if (info->seq > 0) desc_make_final(rb, DESC_ID(id - 1)); r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); /* If text data allocation fails, a data-less record is committed. */ if (r->text_buf_size && !r->text_buf) { prb_commit(e); /* prb_commit() re-enabled interrupts. */ goto fail; } r->info = info; /* Record full text space used by record. */ e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); return true; fail: /* Make it clear to the caller that the reserve failed. */ memset(r, 0, sizeof(*r)); return false; } EXPORT_SYMBOL_IF_KUNIT(prb_reserve); /* Commit the data (possibly finalizing it) and restore interrupts. */ static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; struct prb_desc *d = to_desc(desc_ring, e->id); unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); /* Now the writer has finished all writing: LMM(_prb_commit:A) */ /* * Set the descriptor as committed. See "ABA Issues" about why * cmpxchg() instead of set() is used. * * 1 Guarantee all record data is stored before the descriptor state * is stored as committed. A write memory barrier is sufficient * for this. This pairs with desc_read:B and desc_reopen_last:A. * * 2. Guarantee the descriptor state is stored as committed before * re-checking the head ID in order to possibly finalize this * descriptor. This pairs with desc_reserve:D. * * Memory barrier involvement: * * If prb_commit:A reads from desc_reserve:D, then * desc_make_final:A reads from _prb_commit:B. * * Relies on: * * MB _prb_commit:B to prb_commit:A * matching * MB desc_reserve:D to desc_make_final:A */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ WARN_ON_ONCE(1); } /* Restore interrupts, the reserve/commit window is finished. */ local_irq_restore(e->irqflags); } /** * prb_commit() - Commit (previously reserved) data to the ringbuffer. * * @e: The entry containing the reserved data information. * * This is the public function available to writers to commit data. * * Note that the data is not yet available to readers until it is finalized. * Finalizing happens automatically when space for the next record is * reserved. * * See prb_final_commit() for a version of this function that finalizes * immediately. * * Context: Any context. Enables local interrupts. */ void prb_commit(struct prb_reserved_entry *e) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; unsigned long head_id; _prb_commit(e, desc_committed); /* * If this descriptor is no longer the head (i.e. a new record has * been allocated), extending the data for this record is no longer * allowed and therefore it must be finalized. */ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ if (head_id != e->id) desc_make_final(e->rb, e->id); } EXPORT_SYMBOL_IF_KUNIT(prb_commit); /** * prb_final_commit() - Commit and finalize (previously reserved) data to * the ringbuffer. * * @e: The entry containing the reserved data information. * * This is the public function available to writers to commit+finalize data. * * By finalizing, the data is made immediately available to readers. * * This function should only be used if there are no intentions of extending * this data using prb_reserve_in_last(). * * Context: Any context. Enables local interrupts. */ void prb_final_commit(struct prb_reserved_entry *e) { _prb_commit(e, desc_finalized); desc_update_last_finalized(e->rb); } /* * Count the number of lines in provided text. All text has at least 1 line * (even if @text_size is 0). Each '\n' processed is counted as an additional * line. */ static unsigned int count_lines(const char *text, unsigned int text_size) { unsigned int next_size = text_size; unsigned int line_count = 1; const char *next = text; while (next_size) { next = memchr(next, '\n', next_size); if (!next) break; line_count++; next++; next_size = text_size - (next - text); } return line_count; } /* * Given @blk_lpos, copy an expected @len of data into the provided buffer. * If @line_count is provided, count the number of lines in the data. * * This function (used by readers) performs strict validation on the data * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is * triggered if an internal error is detected. */ static bool copy_data(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf, unsigned int buf_size, unsigned int *line_count) { unsigned int data_size; const char *data; /* Caller might not want any data. */ if ((!buf || !buf_size) && !line_count) return true; data = get_data(data_ring, blk_lpos, &data_size); if (!data) return false; /* * Actual cannot be less than expected. It can be more than expected * because of the trailing alignment padding. * * Note that invalid @len values can occur because the caller loads * the value during an allowed data race. */ if (data_size < (unsigned int)len) return false; /* Caller interested in the line count? */ if (line_count) *line_count = count_lines(data, len); /* Caller interested in the data content? */ if (!buf || !buf_size) return true; data_size = min_t(unsigned int, buf_size, len); memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */ return true; } /* * This is an extended version of desc_read(). It gets a copy of a specified * descriptor. However, it also verifies that the record is finalized and has * the sequence number @seq. On success, 0 is returned. * * Error return values: * -EINVAL: A finalized record with sequence number @seq does not exist. * -ENOENT: A finalized record with sequence number @seq exists, but its data * is not available. This is a valid record, so readers should * continue with the next record. */ static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, unsigned long id, u64 seq, struct prb_desc *desc_out) { struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos; enum desc_state d_state; u64 s; d_state = desc_read(desc_ring, id, desc_out, &s, NULL); /* * An unexpected @id (desc_miss) or @seq mismatch means the record * does not exist. A descriptor in the reserved or committed state * means the record does not yet exist for the reader. */ if (d_state == desc_miss || d_state == desc_reserved || d_state == desc_committed || s != seq) { return -EINVAL; } /* * A descriptor in the reusable state may no longer have its data * available; report it as existing but with lost data. Or the record * may actually be a record with lost data. */ if (d_state == desc_reusable || (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) { return -ENOENT; } return 0; } /* * Copy the ringbuffer data from the record with @seq to the provided * @r buffer. On success, 0 is returned. * * See desc_read_finalized_seq() for error return values. */ static int prb_read(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r, unsigned int *line_count) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info = to_info(desc_ring, seq); struct prb_desc *rdesc = to_desc(desc_ring, seq); atomic_long_t *state_var = &rdesc->state_var; struct prb_desc desc; unsigned long id; int err; /* Extract the ID, used to specify the descriptor to read. */ id = DESC_ID(atomic_long_read(state_var)); /* Get a local copy of the correct descriptor (if available). */ err = desc_read_finalized_seq(desc_ring, id, seq, &desc); /* * If @r is NULL, the caller is only interested in the availability * of the record. */ if (err || !r) return err; /* If requested, copy meta data. */ if (r->info) memcpy(r->info, info, sizeof(*(r->info))); /* Copy text data. If it fails, this is a data-less record. */ if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len, r->text_buf, r->text_buf_size, line_count)) { return -ENOENT; } /* Ensure the record is still finalized and has the same @seq. */ return desc_read_finalized_seq(desc_ring, id, seq, &desc); } /* Get the sequence number of the tail descriptor. */ u64 prb_first_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; struct prb_desc desc; unsigned long id; u64 seq; for (;;) { id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */ d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */ /* * This loop will not be infinite because the tail is * _always_ in the finalized or reusable state. */ if (d_state == desc_finalized || d_state == desc_reusable) break; /* * Guarantee the last state load from desc_read() is before * reloading @tail_id in order to see a new tail in the case * that the descriptor has been recycled. This pairs with * desc_reserve:D. * * Memory barrier involvement: * * If prb_first_seq:B reads from desc_reserve:F, then * prb_first_seq:A reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:F * matching * RMB prb_first_seq:B to prb_first_seq:A */ smp_rmb(); /* LMM(prb_first_seq:C) */ } return seq; } /** * prb_next_reserve_seq() - Get the sequence number after the most recently * reserved record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what sequence * number will be assigned to the next reserved record. * * Note that depending on the situation, this value can be equal to or * higher than the sequence number returned by prb_next_seq(). * * Context: Any context. * Return: The sequence number that will be assigned to the next record * reserved. */ u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long last_finalized_id; atomic_long_t *state_var; u64 last_finalized_seq; unsigned long head_id; struct prb_desc desc; unsigned long diff; struct prb_desc *d; int err; /* * It may not be possible to read a sequence number for @head_id. * So the ID of @last_finailzed_seq is used to calculate what the * sequence number of @head_id will be. */ try_again: last_finalized_seq = desc_last_finalized_seq(rb); /* * @head_id is loaded after @last_finalized_seq to ensure that * it points to the record with @last_finalized_seq or newer. * * Memory barrier involvement: * * If desc_last_finalized_seq:A reads from * desc_update_last_finalized:A, then * prb_next_reserve_seq:A reads from desc_reserve:D. * * Relies on: * * RELEASE from desc_reserve:D to desc_update_last_finalized:A * matching * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A * * Note: desc_reserve:D and desc_update_last_finalized:A can be * different CPUs. However, the desc_update_last_finalized:A CPU * (which performs the release) must have previously seen * desc_read:C, which implies desc_reserve:D can be seen. */ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ d = to_desc(desc_ring, last_finalized_seq); state_var = &d->state_var; /* Extract the ID, used to specify the descriptor to read. */ last_finalized_id = DESC_ID(atomic_long_read(state_var)); /* Ensure @last_finalized_id is correct. */ err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); if (err == -EINVAL) { if (last_finalized_seq == 0) { /* * No record has been finalized or even reserved yet. * * The @head_id is initialized such that the first * increment will yield the first record (seq=0). * Handle it separately to avoid a negative @diff * below. */ if (head_id == DESC0_ID(desc_ring->count_bits)) return 0; /* * One or more descriptors are already reserved. Use * the descriptor ID of the first one (@seq=0) for * the @diff below. */ last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; } else { /* Record must have been overwritten. Try again. */ goto try_again; } } /* Diff of known descriptor IDs to compute related sequence numbers. */ diff = head_id - last_finalized_id; /* * @head_id points to the most recently reserved record, but this * function returns the sequence number that will be assigned to the * next (not yet reserved) record. Thus +1 is needed. */ return (last_finalized_seq + diff + 1); } /* * Non-blocking read of a record. * * On success @seq is updated to the record that was read and (if provided) * @r and @line_count will contain the read/calculated data. * * On failure @seq is updated to a record that is not yet available to the * reader, but it will be the next record available to the reader. * * Note: When the current CPU is in panic, this function will skip over any * non-existent/non-finalized records in order to allow the panic CPU * to print any and all records that have been finalized. */ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, struct printk_record *r, unsigned int *line_count) { u64 tail_seq; int err; while ((err = prb_read(rb, *seq, r, line_count))) { tail_seq = prb_first_seq(rb); if (*seq < tail_seq) { /* * Behind the tail. Catch up and try again. This * can happen for -ENOENT and -EINVAL cases. */ *seq = tail_seq; } else if (err == -ENOENT) { /* Record exists, but the data was lost. Skip. */ (*seq)++; } else { /* * Non-existent/non-finalized record. Must stop. * * For panic situations it cannot be expected that * non-finalized records will become finalized. But * there may be other finalized records beyond that * need to be printed for a panic situation. If this * is the panic CPU, skip this * non-existent/non-finalized record unless non-panic * CPUs are still running and their debugging is * explicitly enabled. * * Note that new messages printed on panic CPU are * finalized when we are here. The only exception * might be the last message without trailing newline. * But it would have the sequence number returned * by "prb_next_reserve_seq() - 1". */ if (panic_on_this_cpu() && (!debug_non_panic_cpus || legacy_allow_panic_sync) && ((*seq + 1) < prb_next_reserve_seq(rb))) { (*seq)++; } else { return false; } } } return true; } /** * prb_read_valid() - Non-blocking read of a requested record or (if gone) * the next available record. * * @rb: The ringbuffer to read from. * @seq: The sequence number of the record to read. * @r: A record data buffer to store the read record to. * * This is the public function available to readers to read a record. * * The reader provides the @info and @text_buf buffers of @r to be * filled in. Any of the buffer pointers can be set to NULL if the reader * is not interested in that data. To ensure proper initialization of @r, * prb_rec_init_rd() should be used. * * Context: Any context. * Return: true if a record was read, otherwise false. * * On success, the reader must check r->info.seq to see which record was * actually read. This allows the reader to detect dropped records. * * Failure means @seq refers to a record not yet available to the reader. */ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r) { return _prb_read_valid(rb, &seq, r, NULL); } EXPORT_SYMBOL_IF_KUNIT(prb_read_valid); /** * prb_read_valid_info() - Non-blocking read of meta data for a requested * record or (if gone) the next available record. * * @rb: The ringbuffer to read from. * @seq: The sequence number of the record to read. * @info: A buffer to store the read record meta data to. * @line_count: A buffer to store the number of lines in the record text. * * This is the public function available to readers to read only the * meta data of a record. * * The reader provides the @info, @line_count buffers to be filled in. * Either of the buffer pointers can be set to NULL if the reader is not * interested in that data. * * Context: Any context. * Return: true if a record's meta data was read, otherwise false. * * On success, the reader must check info->seq to see which record meta data * was actually read. This allows the reader to detect dropped records. * * Failure means @seq refers to a record not yet available to the reader. */ bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, struct printk_info *info, unsigned int *line_count) { struct printk_record r; prb_rec_init_rd(&r, info, NULL, 0); return _prb_read_valid(rb, &seq, &r, line_count); } /** * prb_first_valid_seq() - Get the sequence number of the oldest available * record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what the * first/oldest valid sequence number is. * * This provides readers a starting point to begin iterating the ringbuffer. * * Context: Any context. * Return: The sequence number of the first/oldest record or, if the * ringbuffer is empty, 0 is returned. */ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) { u64 seq = 0; if (!_prb_read_valid(rb, &seq, NULL, NULL)) return 0; return seq; } /** * prb_next_seq() - Get the sequence number after the last available record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what the next * newest sequence number available to readers will be. * * This provides readers a sequence number to jump to if all currently * available records should be skipped. It is guaranteed that all records * previous to the returned value have been finalized and are (or were) * available to the reader. * * Context: Any context. * Return: The sequence number of the next newest (not yet available) record * for readers. */ u64 prb_next_seq(struct printk_ringbuffer *rb) { u64 seq; seq = desc_last_finalized_seq(rb); /* * Begin searching after the last finalized record. * * On 0, the search must begin at 0 because of hack#2 * of the bootstrapping phase it is not known if a * record at index 0 exists. */ if (seq != 0) seq++; /* * The information about the last finalized @seq might be inaccurate. * Search forward to find the current one. */ while (_prb_read_valid(rb, &seq, NULL, NULL)) seq++; return seq; } /** * prb_init() - Initialize a ringbuffer to use provided external buffers. * * @rb: The ringbuffer to initialize. * @text_buf: The data buffer for text data. * @textbits: The size of @text_buf as a power-of-2 value. * @descs: The descriptor buffer for ringbuffer records. * @descbits: The count of @descs items as a power-of-2 value. * @infos: The printk_info buffer for ringbuffer records. * * This is the public function available to writers to setup a ringbuffer * during runtime using provided buffers. * * This must match the initialization of DEFINE_PRINTKRB(). * * Context: Any context. */ void prb_init(struct printk_ringbuffer *rb, char *text_buf, unsigned int textbits, struct prb_desc *descs, unsigned int descbits, struct printk_info *infos) { memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0])); memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0])); rb->desc_ring.count_bits = descbits; rb->desc_ring.descs = descs; rb->desc_ring.infos = infos; atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); atomic_long_set(&rb->desc_ring.last_finalized_seq, 0); rb->text_data_ring.size_bits = textbits; rb->text_data_ring.data = text_buf; atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits)); atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits)); atomic_long_set(&rb->fail, 0); atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; infos[0].seq = -(u64)_DESCS_COUNT(descbits); infos[_DESCS_COUNT(descbits) - 1].seq = 0; } EXPORT_SYMBOL_IF_KUNIT(prb_init); /** * prb_record_text_space() - Query the full actual used ringbuffer space for * the text data of a reserved entry. * * @e: The successfully reserved entry to query. * * This is the public function available to writers to see how much actual * space is used in the ringbuffer to store the text data of the specified * entry. * * This function is only valid if @e has been successfully reserved using * prb_reserve(). * * Context: Any context. * Return: The size in bytes used by the text data of the associated record. */ unsigned int prb_record_text_space(struct prb_reserved_entry *e) { return e->text_space; }
1 4 4 137 81 56 71 28 34 318 318 158 147 75 115 71 580 579 22 295 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 #include <linux/init.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/net_namespace.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter_arp.h> #include <net/netfilter/nf_tables_ipv4.h> #include <net/netfilter/nf_tables_ipv6.h> #ifdef CONFIG_NF_TABLES_IPV4 static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_ipv4(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV4, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4, [NF_INET_FORWARD] = nft_do_chain_ipv4, [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; static void nft_chain_filter_ipv4_init(void) { nft_register_chain_type(&nft_chain_filter_ipv4); } static void nft_chain_filter_ipv4_fini(void) { nft_unregister_chain_type(&nft_chain_filter_ipv4); } #else static inline void nft_chain_filter_ipv4_init(void) {} static inline void nft_chain_filter_ipv4_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV4 */ #ifdef CONFIG_NF_TABLES_ARP static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_unspec(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_ARP, .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT), .hooks = { [NF_ARP_IN] = nft_do_chain_arp, [NF_ARP_OUT] = nft_do_chain_arp, }, }; static void nft_chain_filter_arp_init(void) { nft_register_chain_type(&nft_chain_filter_arp); } static void nft_chain_filter_arp_fini(void) { nft_unregister_chain_type(&nft_chain_filter_arp); } #else static inline void nft_chain_filter_arp_init(void) {} static inline void nft_chain_filter_arp_fini(void) {} #endif /* CONFIG_NF_TABLES_ARP */ #ifdef CONFIG_NF_TABLES_IPV6 static unsigned int nft_do_chain_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_ipv6(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV6, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6, [NF_INET_FORWARD] = nft_do_chain_ipv6, [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; static void nft_chain_filter_ipv6_init(void) { nft_register_chain_type(&nft_chain_filter_ipv6); } static void nft_chain_filter_ipv6_fini(void) { nft_unregister_chain_type(&nft_chain_filter_ipv6); } #else static inline void nft_chain_filter_ipv6_init(void) {} static inline void nft_chain_filter_ipv6_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV6 */ #ifdef CONFIG_NF_TABLES_INET static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (state->pf) { case NFPROTO_IPV4: nft_set_pktinfo_ipv4(&pkt); break; case NFPROTO_IPV6: nft_set_pktinfo_ipv6(&pkt); break; default: break; } return nft_do_chain(&pkt, priv); } static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_hook_state ingress_state = *state; struct nft_pktinfo pkt; switch (skb->protocol) { case htons(ETH_P_IP): /* Original hook is NFPROTO_NETDEV and NF_NETDEV_INGRESS. */ ingress_state.pf = NFPROTO_IPV4; ingress_state.hook = NF_INET_INGRESS; nft_set_pktinfo(&pkt, skb, &ingress_state); if (nft_set_pktinfo_ipv4_ingress(&pkt) < 0) return NF_DROP; break; case htons(ETH_P_IPV6): ingress_state.pf = NFPROTO_IPV6; ingress_state.hook = NF_INET_INGRESS; nft_set_pktinfo(&pkt, skb, &ingress_state); if (nft_set_pktinfo_ipv6_ingress(&pkt) < 0) return NF_DROP; break; default: return NF_ACCEPT; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_inet = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_INET, .hook_mask = (1 << NF_INET_INGRESS) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_INGRESS] = nft_do_chain_inet_ingress, [NF_INET_LOCAL_IN] = nft_do_chain_inet, [NF_INET_LOCAL_OUT] = nft_do_chain_inet, [NF_INET_FORWARD] = nft_do_chain_inet, [NF_INET_PRE_ROUTING] = nft_do_chain_inet, [NF_INET_POST_ROUTING] = nft_do_chain_inet, }, }; static void nft_chain_filter_inet_init(void) { nft_register_chain_type(&nft_chain_filter_inet); } static void nft_chain_filter_inet_fini(void) { nft_unregister_chain_type(&nft_chain_filter_inet); } #else static inline void nft_chain_filter_inet_init(void) {} static inline void nft_chain_filter_inet_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV6 */ #if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): nft_set_pktinfo_ipv4_validate(&pkt); break; case htons(ETH_P_IPV6): nft_set_pktinfo_ipv6_validate(&pkt); break; default: nft_set_pktinfo_unspec(&pkt); break; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .hook_mask = (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_POST_ROUTING), .hooks = { [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, [NF_BR_LOCAL_IN] = nft_do_chain_bridge, [NF_BR_FORWARD] = nft_do_chain_bridge, [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, [NF_BR_POST_ROUTING] = nft_do_chain_bridge, }, }; static void nft_chain_filter_bridge_init(void) { nft_register_chain_type(&nft_chain_filter_bridge); } static void nft_chain_filter_bridge_fini(void) { nft_unregister_chain_type(&nft_chain_filter_bridge); } #else static inline void nft_chain_filter_bridge_init(void) {} static inline void nft_chain_filter_bridge_fini(void) {} #endif /* CONFIG_NF_TABLES_BRIDGE */ #ifdef CONFIG_NF_TABLES_NETDEV static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (skb->protocol) { case htons(ETH_P_IP): nft_set_pktinfo_ipv4_validate(&pkt); break; case htons(ETH_P_IPV6): nft_set_pktinfo_ipv6_validate(&pkt); break; default: nft_set_pktinfo_unspec(&pkt); break; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, .hook_mask = (1 << NF_NETDEV_INGRESS) | (1 << NF_NETDEV_EGRESS), .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, [NF_NETDEV_EGRESS] = nft_do_chain_netdev, }, }; static int nft_netdev_event(unsigned long event, struct net_device *dev, struct nft_base_chain *basechain, bool changename) { struct nft_table *table = basechain->chain.table; struct nf_hook_ops *ops; struct nft_hook *hook; bool match; list_for_each_entry(hook, &basechain->hook_list, list) { ops = nft_hook_find_ops(hook, dev); match = !strncmp(hook->ifname, dev->name, hook->ifnamelen); switch (event) { case NETDEV_UNREGISTER: /* NOP if not found or new name still matching */ if (!ops || (changename && match)) continue; if (!(table->flags & NFT_TABLE_F_DORMANT)) nf_unregister_net_hook(dev_net(dev), ops); list_del_rcu(&ops->list); kfree_rcu(ops, rcu); break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ if (!match || (changename && ops)) continue; ops = kmemdup(&basechain->ops, sizeof(struct nf_hook_ops), GFP_KERNEL_ACCOUNT); if (!ops) return 1; ops->dev = dev; if (!(table->flags & NFT_TABLE_F_DORMANT) && nf_register_net_hook(dev_net(dev), ops)) { kfree(ops); return 1; } list_add_tail_rcu(&ops->list, &hook->ops_list); break; } break; } return 0; } static int __nf_tables_netdev_event(unsigned long event, struct net_device *dev, bool changename) { struct nft_base_chain *basechain; struct nftables_pernet *nft_net; struct nft_chain *chain; struct nft_table *table; nft_net = nft_pernet(dev_net(dev)); list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV && table->family != NFPROTO_INET) continue; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) continue; basechain = nft_base_chain(chain); if (table->family == NFPROTO_INET && basechain->ops.hooknum != NF_INET_INGRESS) continue; if (nft_netdev_event(event, dev, basechain, changename)) return 1; } } return 0; } static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nftables_pernet *nft_net; int ret = NOTIFY_DONE; if (event != NETDEV_REGISTER && event != NETDEV_UNREGISTER && event != NETDEV_CHANGENAME) return NOTIFY_DONE; nft_net = nft_pernet(dev_net(dev)); mutex_lock(&nft_net->commit_mutex); if (event == NETDEV_CHANGENAME) { if (__nf_tables_netdev_event(NETDEV_REGISTER, dev, true)) { ret = NOTIFY_BAD; goto out_unlock; } __nf_tables_netdev_event(NETDEV_UNREGISTER, dev, true); } else if (__nf_tables_netdev_event(event, dev, false)) { ret = NOTIFY_BAD; } out_unlock: mutex_unlock(&nft_net->commit_mutex); return ret; } static struct notifier_block nf_tables_netdev_notifier = { .notifier_call = nf_tables_netdev_event, }; static int nft_chain_filter_netdev_init(void) { int err; nft_register_chain_type(&nft_chain_filter_netdev); err = register_netdevice_notifier(&nf_tables_netdev_notifier); if (err) goto err_register_netdevice_notifier; return 0; err_register_netdevice_notifier: nft_unregister_chain_type(&nft_chain_filter_netdev); return err; } static void nft_chain_filter_netdev_fini(void) { nft_unregister_chain_type(&nft_chain_filter_netdev); unregister_netdevice_notifier(&nf_tables_netdev_notifier); } #else static inline int nft_chain_filter_netdev_init(void) { return 0; } static inline void nft_chain_filter_netdev_fini(void) {} #endif /* CONFIG_NF_TABLES_NETDEV */ int __init nft_chain_filter_init(void) { int err; err = nft_chain_filter_netdev_init(); if (err < 0) return err; nft_chain_filter_ipv4_init(); nft_chain_filter_ipv6_init(); nft_chain_filter_arp_init(); nft_chain_filter_inet_init(); nft_chain_filter_bridge_init(); return 0; } void nft_chain_filter_fini(void) { nft_chain_filter_bridge_fini(); nft_chain_filter_inet_fini(); nft_chain_filter_arp_fini(); nft_chain_filter_ipv6_fini(); nft_chain_filter_ipv4_fini(); nft_chain_filter_netdev_fini(); }
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 // SPDX-License-Identifier: GPL-2.0 #include <linux/cpufreq.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> extern const struct seq_operations cpuinfo_op; static int cpuinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &cpuinfo_op); } static const struct proc_ops cpuinfo_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = cpuinfo_open, .proc_read_iter = seq_read_iter, .proc_lseek = seq_lseek, .proc_release = seq_release, }; static int __init proc_cpuinfo_init(void) { proc_create("cpuinfo", 0, NULL, &cpuinfo_proc_ops); return 0; } fs_initcall(proc_cpuinfo_init);
13 13 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 // SPDX-License-Identifier: GPL-2.0-or-later /* AFS vlserver list management. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/kernel.h> #include <linux/slab.h> #include "internal.h" struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, unsigned short port) { struct afs_vlserver *vlserver; static atomic_t debug_ids; vlserver = kzalloc(struct_size(vlserver, name, name_len + 1), GFP_KERNEL); if (vlserver) { refcount_set(&vlserver->ref, 1); rwlock_init(&vlserver->lock); init_waitqueue_head(&vlserver->probe_wq); spin_lock_init(&vlserver->probe_lock); vlserver->debug_id = atomic_inc_return(&debug_ids); vlserver->rtt = UINT_MAX; vlserver->name_len = name_len; vlserver->service_id = VL_SERVICE; vlserver->port = port; memcpy(vlserver->name, name, name_len); } return vlserver; } static void afs_vlserver_rcu(struct rcu_head *rcu) { struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu); afs_put_addrlist(rcu_access_pointer(vlserver->addresses), afs_alist_trace_put_vlserver); kfree_rcu(vlserver, rcu); } void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver) { if (vlserver && refcount_dec_and_test(&vlserver->ref)) call_rcu(&vlserver->rcu, afs_vlserver_rcu); } struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) { struct afs_vlserver_list *vllist; vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL); if (vllist) { refcount_set(&vllist->ref, 1); rwlock_init(&vllist->lock); } return vllist; } void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist) { if (vllist) { if (refcount_dec_and_test(&vllist->ref)) { int i; for (i = 0; i < vllist->nr_servers; i++) { afs_put_vlserver(net, vllist->servers[i].server); } kfree_rcu(vllist, rcu); } } } static u16 afs_extract_le16(const u8 **_b) { u16 val; val = (u16)*(*_b)++ << 0; val |= (u16)*(*_b)++ << 8; return val; } /* * Build a VL server address list from a DNS queried server list. */ static struct afs_addr_list *afs_extract_vl_addrs(struct afs_net *net, const u8 **_b, const u8 *end, u8 nr_addrs, u16 port) { struct afs_addr_list *alist; const u8 *b = *_b; int ret = -EINVAL; alist = afs_alloc_addrlist(nr_addrs); if (!alist) return ERR_PTR(-ENOMEM); if (nr_addrs == 0) return alist; for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) { struct dns_server_list_v1_address hdr; __be32 x[4]; hdr.address_type = *b++; switch (hdr.address_type) { case DNS_ADDRESS_IS_IPV4: if (end - b < 4) { _leave(" = -EINVAL [short inet]"); goto error; } memcpy(x, b, 4); ret = afs_merge_fs_addr4(net, alist, x[0], port); if (ret < 0) goto error; b += 4; break; case DNS_ADDRESS_IS_IPV6: if (end - b < 16) { _leave(" = -EINVAL [short inet6]"); goto error; } memcpy(x, b, 16); ret = afs_merge_fs_addr6(net, alist, x, port); if (ret < 0) goto error; b += 16; break; default: _leave(" = -EADDRNOTAVAIL [unknown af %u]", hdr.address_type); ret = -EADDRNOTAVAIL; goto error; } } /* Start with IPv6 if available. */ if (alist->nr_ipv4 < alist->nr_addrs) alist->preferred = alist->nr_ipv4; *_b = b; return alist; error: *_b = b; afs_put_addrlist(alist, afs_alist_trace_put_parse_error); return ERR_PTR(ret); } /* * Build a VL server list from a DNS queried server list. */ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, const void *buffer, size_t buffer_size) { const struct dns_server_list_v1_header *hdr = buffer; struct dns_server_list_v1_server bs; struct afs_vlserver_list *vllist, *previous; struct afs_addr_list *addrs; struct afs_vlserver *server; const u8 *b = buffer, *end = buffer + buffer_size; int ret = -ENOMEM, nr_servers, i, j; _enter(""); /* Check that it's a server list, v1 */ if (end - b < sizeof(*hdr) || hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST || hdr->hdr.version != 1) { pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n", hdr->hdr.content, hdr->hdr.version, end - b); ret = -EDESTADDRREQ; goto dump; } nr_servers = hdr->nr_servers; vllist = afs_alloc_vlserver_list(nr_servers); if (!vllist) return ERR_PTR(-ENOMEM); vllist->source = (hdr->source < NR__dns_record_source) ? hdr->source : NR__dns_record_source; vllist->status = (hdr->status < NR__dns_lookup_status) ? hdr->status : NR__dns_lookup_status; read_lock(&cell->vl_servers_lock); previous = afs_get_vlserverlist( rcu_dereference_protected(cell->vl_servers, lockdep_is_held(&cell->vl_servers_lock))); read_unlock(&cell->vl_servers_lock); b += sizeof(*hdr); while (end - b >= sizeof(bs)) { bs.name_len = afs_extract_le16(&b); bs.priority = afs_extract_le16(&b); bs.weight = afs_extract_le16(&b); bs.port = afs_extract_le16(&b); bs.source = *b++; bs.status = *b++; bs.protocol = *b++; bs.nr_addrs = *b++; _debug("extract %u %u %u %u %u %u %*.*s", bs.name_len, bs.priority, bs.weight, bs.port, bs.protocol, bs.nr_addrs, bs.name_len, bs.name_len, b); if (end - b < bs.name_len) break; ret = -EPROTONOSUPPORT; if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) { bs.protocol = DNS_SERVER_PROTOCOL_UDP; } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) { _leave(" = [proto %u]", bs.protocol); goto error; } if (bs.port == 0) bs.port = AFS_VL_PORT; if (bs.source > NR__dns_record_source) bs.source = NR__dns_record_source; if (bs.status > NR__dns_lookup_status) bs.status = NR__dns_lookup_status; /* See if we can update an old server record */ server = NULL; for (i = 0; i < previous->nr_servers; i++) { struct afs_vlserver *p = previous->servers[i].server; if (p->name_len == bs.name_len && p->port == bs.port && strncasecmp(b, p->name, bs.name_len) == 0) { server = afs_get_vlserver(p); break; } } if (!server) { ret = -ENOMEM; server = afs_alloc_vlserver(b, bs.name_len, bs.port); if (!server) goto error; } b += bs.name_len; /* Extract the addresses - note that we can't skip this as we * have to advance the payload pointer. */ addrs = afs_extract_vl_addrs(cell->net, &b, end, bs.nr_addrs, bs.port); if (IS_ERR(addrs)) { ret = PTR_ERR(addrs); goto error_2; } if (vllist->nr_servers >= nr_servers) { _debug("skip %u >= %u", vllist->nr_servers, nr_servers); afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty); afs_put_vlserver(cell->net, server); continue; } addrs->source = bs.source; addrs->status = bs.status; if (addrs->nr_addrs == 0) { afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty); if (!rcu_access_pointer(server->addresses)) { afs_put_vlserver(cell->net, server); continue; } } else { struct afs_addr_list *old = addrs; write_lock(&server->lock); old = rcu_replace_pointer(server->addresses, old, lockdep_is_held(&server->lock)); write_unlock(&server->lock); afs_put_addrlist(old, afs_alist_trace_put_vlserver_old); } /* TODO: Might want to check for duplicates */ /* Insertion-sort by priority and weight */ for (j = 0; j < vllist->nr_servers; j++) { if (bs.priority < vllist->servers[j].priority) break; /* Lower preferable */ if (bs.priority == vllist->servers[j].priority && bs.weight > vllist->servers[j].weight) break; /* Higher preferable */ } if (j < vllist->nr_servers) { memmove(vllist->servers + j + 1, vllist->servers + j, (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry)); } clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags); vllist->servers[j].priority = bs.priority; vllist->servers[j].weight = bs.weight; vllist->servers[j].server = server; vllist->nr_servers++; } if (b != end) { _debug("parse error %zd", b - end); goto error; } afs_put_vlserverlist(cell->net, previous); _leave(" = ok [%u]", vllist->nr_servers); return vllist; error_2: afs_put_vlserver(cell->net, server); error: afs_put_vlserverlist(cell->net, vllist); afs_put_vlserverlist(cell->net, previous); dump: if (ret != -ENOMEM) { printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer); print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size); } return ERR_PTR(ret); }
1 1 1 1 1 4 9 3 1 2 1 1 6 1 3 1 1 2 1 1 1 11 3 5 1 2 4 3 6 6 5 1 1 6 6 1 1 6 6 1 1 6 3 5 1 1 3 1 2 30 27 10 10 32 2 30 30 25 3 20 10 30 33 33 58 58 56 6 58 58 57 58 58 58 56 58 57 58 160 160 160 153 39 39 96 63 33 33 33 33 98 98 96 1 17 81 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 // SPDX-License-Identifier: GPL-2.0-only /* * Media device * * Copyright (C) 2010 Nokia Corporation * * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> * Sakari Ailus <sakari.ailus@iki.fi> */ #include <linux/compat.h> #include <linux/export.h> #include <linux/idr.h> #include <linux/ioctl.h> #include <linux/media.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/pci.h> #include <linux/usb.h> #include <linux/version.h> #include <media/media-device.h> #include <media/media-devnode.h> #include <media/media-entity.h> #include <media/media-request.h> #ifdef CONFIG_MEDIA_CONTROLLER /* * Legacy defines from linux/media.h. This is the only place we need this * so we just define it here. The media.h header doesn't expose it to the * kernel to prevent it from being used by drivers, but here (and only here!) * we need it to handle the legacy behavior. */ #define MEDIA_ENT_SUBTYPE_MASK 0x0000ffff #define MEDIA_ENT_T_DEVNODE_UNKNOWN (MEDIA_ENT_F_OLD_BASE | \ MEDIA_ENT_SUBTYPE_MASK) /* ----------------------------------------------------------------------------- * Userspace API */ static inline void __user *media_get_uptr(__u64 arg) { return (void __user *)(uintptr_t)arg; } static int media_device_open(struct file *filp) { return 0; } static int media_device_close(struct file *filp) { return 0; } static long media_device_get_info(struct media_device *dev, void *arg) { struct media_device_info *info = arg; memset(info, 0, sizeof(*info)); if (dev->driver_name[0]) strscpy(info->driver, dev->driver_name, sizeof(info->driver)); else strscpy(info->driver, dev->dev->driver->name, sizeof(info->driver)); strscpy(info->model, dev->model, sizeof(info->model)); strscpy(info->serial, dev->serial, sizeof(info->serial)); strscpy(info->bus_info, dev->bus_info, sizeof(info->bus_info)); info->media_version = LINUX_VERSION_CODE; info->driver_version = info->media_version; info->hw_revision = dev->hw_revision; return 0; } static struct media_entity *find_entity(struct media_device *mdev, u32 id) { struct media_entity *entity; int next = id & MEDIA_ENT_ID_FLAG_NEXT; id &= ~MEDIA_ENT_ID_FLAG_NEXT; media_device_for_each_entity(entity, mdev) { if (((media_entity_id(entity) == id) && !next) || ((media_entity_id(entity) > id) && next)) { return entity; } } return NULL; } static long media_device_enum_entities(struct media_device *mdev, void *arg) { struct media_entity_desc *entd = arg; struct media_entity *ent; ent = find_entity(mdev, entd->id); if (ent == NULL) return -EINVAL; memset(entd, 0, sizeof(*entd)); entd->id = media_entity_id(ent); if (ent->name) strscpy(entd->name, ent->name, sizeof(entd->name)); entd->type = ent->function; entd->revision = 0; /* Unused */ entd->flags = ent->flags; entd->group_id = 0; /* Unused */ entd->pads = ent->num_pads; entd->links = ent->num_links - ent->num_backlinks; /* * Workaround for a bug at media-ctl <= v1.10 that makes it to * do the wrong thing if the entity function doesn't belong to * either MEDIA_ENT_F_OLD_BASE or MEDIA_ENT_F_OLD_SUBDEV_BASE * Ranges. * * Non-subdevices are expected to be at the MEDIA_ENT_F_OLD_BASE, * or, otherwise, will be silently ignored by media-ctl when * printing the graphviz diagram. So, map them into the devnode * old range. */ if (ent->function < MEDIA_ENT_F_OLD_BASE || ent->function > MEDIA_ENT_F_TUNER) { if (is_media_entity_v4l2_subdev(ent)) entd->type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN; else if (ent->function != MEDIA_ENT_F_IO_V4L) entd->type = MEDIA_ENT_T_DEVNODE_UNKNOWN; } memcpy(&entd->raw, &ent->info, sizeof(ent->info)); return 0; } static void media_device_kpad_to_upad(const struct media_pad *kpad, struct media_pad_desc *upad) { upad->entity = media_entity_id(kpad->entity); upad->index = kpad->index; upad->flags = kpad->flags; } static long media_device_enum_links(struct media_device *mdev, void *arg) { struct media_links_enum *links = arg; struct media_entity *entity; entity = find_entity(mdev, links->entity); if (entity == NULL) return -EINVAL; if (links->pads) { unsigned int p; for (p = 0; p < entity->num_pads; p++) { struct media_pad_desc pad; memset(&pad, 0, sizeof(pad)); media_device_kpad_to_upad(&entity->pads[p], &pad); if (copy_to_user(&links->pads[p], &pad, sizeof(pad))) return -EFAULT; } } if (links->links) { struct media_link *link; struct media_link_desc __user *ulink_desc = links->links; list_for_each_entry(link, &entity->links, list) { struct media_link_desc klink_desc; /* Ignore backlinks. */ if (link->source->entity != entity) continue; memset(&klink_desc, 0, sizeof(klink_desc)); media_device_kpad_to_upad(link->source, &klink_desc.source); media_device_kpad_to_upad(link->sink, &klink_desc.sink); klink_desc.flags = link->flags; if (copy_to_user(ulink_desc, &klink_desc, sizeof(*ulink_desc))) return -EFAULT; ulink_desc++; } } memset(links->reserved, 0, sizeof(links->reserved)); return 0; } static long media_device_setup_link(struct media_device *mdev, void *arg) { struct media_link_desc *linkd = arg; struct media_link *link = NULL; struct media_entity *source; struct media_entity *sink; /* Find the source and sink entities and link. */ source = find_entity(mdev, linkd->source.entity); sink = find_entity(mdev, linkd->sink.entity); if (source == NULL || sink == NULL) return -EINVAL; if (linkd->source.index >= source->num_pads || linkd->sink.index >= sink->num_pads) return -EINVAL; link = media_entity_find_link(&source->pads[linkd->source.index], &sink->pads[linkd->sink.index]); if (link == NULL) return -EINVAL; memset(linkd->reserved, 0, sizeof(linkd->reserved)); /* Setup the link on both entities. */ return __media_entity_setup_link(link, linkd->flags); } static long media_device_get_topology(struct media_device *mdev, void *arg) { struct media_v2_topology *topo = arg; struct media_entity *entity; struct media_interface *intf; struct media_pad *pad; struct media_link *link; struct media_v2_entity kentity, __user *uentity; struct media_v2_interface kintf, __user *uintf; struct media_v2_pad kpad, __user *upad; struct media_v2_link klink, __user *ulink; unsigned int i; int ret = 0; topo->topology_version = mdev->topology_version; /* Get entities and number of entities */ i = 0; uentity = media_get_uptr(topo->ptr_entities); media_device_for_each_entity(entity, mdev) { i++; if (ret || !uentity) continue; if (i > topo->num_entities) { ret = -ENOSPC; continue; } /* Copy fields to userspace struct if not error */ memset(&kentity, 0, sizeof(kentity)); kentity.id = entity->graph_obj.id; kentity.function = entity->function; kentity.flags = entity->flags; strscpy(kentity.name, entity->name, sizeof(kentity.name)); if (copy_to_user(uentity, &kentity, sizeof(kentity))) ret = -EFAULT; uentity++; } topo->num_entities = i; topo->reserved1 = 0; /* Get interfaces and number of interfaces */ i = 0; uintf = media_get_uptr(topo->ptr_interfaces); media_device_for_each_intf(intf, mdev) { i++; if (ret || !uintf) continue; if (i > topo->num_interfaces) { ret = -ENOSPC; continue; } memset(&kintf, 0, sizeof(kintf)); /* Copy intf fields to userspace struct */ kintf.id = intf->graph_obj.id; kintf.intf_type = intf->type; kintf.flags = intf->flags; if (media_type(&intf->graph_obj) == MEDIA_GRAPH_INTF_DEVNODE) { struct media_intf_devnode *devnode; devnode = intf_to_devnode(intf); kintf.devnode.major = devnode->major; kintf.devnode.minor = devnode->minor; } if (copy_to_user(uintf, &kintf, sizeof(kintf))) ret = -EFAULT; uintf++; } topo->num_interfaces = i; topo->reserved2 = 0; /* Get pads and number of pads */ i = 0; upad = media_get_uptr(topo->ptr_pads); media_device_for_each_pad(pad, mdev) { i++; if (ret || !upad) continue; if (i > topo->num_pads) { ret = -ENOSPC; continue; } memset(&kpad, 0, sizeof(kpad)); /* Copy pad fields to userspace struct */ kpad.id = pad->graph_obj.id; kpad.entity_id = pad->entity->graph_obj.id; kpad.flags = pad->flags; kpad.index = pad->index; if (copy_to_user(upad, &kpad, sizeof(kpad))) ret = -EFAULT; upad++; } topo->num_pads = i; topo->reserved3 = 0; /* Get links and number of links */ i = 0; ulink = media_get_uptr(topo->ptr_links); media_device_for_each_link(link, mdev) { if (link->is_backlink) continue; i++; if (ret || !ulink) continue; if (i > topo->num_links) { ret = -ENOSPC; continue; } memset(&klink, 0, sizeof(klink)); /* Copy link fields to userspace struct */ klink.id = link->graph_obj.id; klink.source_id = link->gobj0->id; klink.sink_id = link->gobj1->id; klink.flags = link->flags; if (copy_to_user(ulink, &klink, sizeof(klink))) ret = -EFAULT; ulink++; } topo->num_links = i; topo->reserved4 = 0; return ret; } static long media_device_request_alloc(struct media_device *mdev, void *arg) { int *alloc_fd = arg; if (!mdev->ops || !mdev->ops->req_validate || !mdev->ops->req_queue) return -ENOTTY; return media_request_alloc(mdev, alloc_fd); } static long copy_arg_from_user(void *karg, void __user *uarg, unsigned int cmd) { if ((_IOC_DIR(cmd) & _IOC_WRITE) && copy_from_user(karg, uarg, _IOC_SIZE(cmd))) return -EFAULT; return 0; } static long copy_arg_to_user(void __user *uarg, void *karg, unsigned int cmd) { if ((_IOC_DIR(cmd) & _IOC_READ) && copy_to_user(uarg, karg, _IOC_SIZE(cmd))) return -EFAULT; return 0; } /* Do acquire the graph mutex */ #define MEDIA_IOC_FL_GRAPH_MUTEX BIT(0) #define MEDIA_IOC_ARG(__cmd, func, fl, from_user, to_user) \ [_IOC_NR(MEDIA_IOC_##__cmd)] = { \ .cmd = MEDIA_IOC_##__cmd, \ .fn = func, \ .flags = fl, \ .arg_from_user = from_user, \ .arg_to_user = to_user, \ } #define MEDIA_IOC(__cmd, func, fl) \ MEDIA_IOC_ARG(__cmd, func, fl, copy_arg_from_user, copy_arg_to_user) /* the table is indexed by _IOC_NR(cmd) */ struct media_ioctl_info { unsigned int cmd; unsigned short flags; long (*fn)(struct media_device *dev, void *arg); long (*arg_from_user)(void *karg, void __user *uarg, unsigned int cmd); long (*arg_to_user)(void __user *uarg, void *karg, unsigned int cmd); }; static const struct media_ioctl_info ioctl_info[] = { MEDIA_IOC(DEVICE_INFO, media_device_get_info, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(ENUM_ENTITIES, media_device_enum_entities, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(ENUM_LINKS, media_device_enum_links, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(SETUP_LINK, media_device_setup_link, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(G_TOPOLOGY, media_device_get_topology, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(REQUEST_ALLOC, media_device_request_alloc, 0), }; static long media_device_ioctl(struct file *filp, unsigned int cmd, unsigned long __arg) { struct media_devnode *devnode = media_devnode_data(filp); struct media_device *dev = devnode->media_dev; const struct media_ioctl_info *info; void __user *arg = (void __user *)__arg; char __karg[256], *karg = __karg; long ret; if (_IOC_NR(cmd) >= ARRAY_SIZE(ioctl_info) || ioctl_info[_IOC_NR(cmd)].cmd != cmd) return -ENOIOCTLCMD; info = &ioctl_info[_IOC_NR(cmd)]; if (_IOC_SIZE(info->cmd) > sizeof(__karg)) { karg = kmalloc(_IOC_SIZE(info->cmd), GFP_KERNEL); if (!karg) return -ENOMEM; } if (info->arg_from_user) { ret = info->arg_from_user(karg, arg, cmd); if (ret) goto out_free; } if (info->flags & MEDIA_IOC_FL_GRAPH_MUTEX) mutex_lock(&dev->graph_mutex); ret = info->fn(dev, karg); if (info->flags & MEDIA_IOC_FL_GRAPH_MUTEX) mutex_unlock(&dev->graph_mutex); if (!ret && info->arg_to_user) ret = info->arg_to_user(arg, karg, cmd); out_free: if (karg != __karg) kfree(karg); return ret; } #ifdef CONFIG_COMPAT struct media_links_enum32 { __u32 entity; compat_uptr_t pads; /* struct media_pad_desc * */ compat_uptr_t links; /* struct media_link_desc * */ __u32 reserved[4]; }; static long media_device_enum_links32(struct media_device *mdev, struct media_links_enum32 __user *ulinks) { struct media_links_enum links; compat_uptr_t pads_ptr, links_ptr; int ret; memset(&links, 0, sizeof(links)); if (get_user(links.entity, &ulinks->entity) || get_user(pads_ptr, &ulinks->pads) || get_user(links_ptr, &ulinks->links)) return -EFAULT; links.pads = compat_ptr(pads_ptr); links.links = compat_ptr(links_ptr); ret = media_device_enum_links(mdev, &links); if (ret) return ret; if (copy_to_user(ulinks->reserved, links.reserved, sizeof(ulinks->reserved))) return -EFAULT; return 0; } #define MEDIA_IOC_ENUM_LINKS32 _IOWR('|', 0x02, struct media_links_enum32) static long media_device_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct media_devnode *devnode = media_devnode_data(filp); struct media_device *dev = devnode->media_dev; long ret; switch (cmd) { case MEDIA_IOC_ENUM_LINKS32: mutex_lock(&dev->graph_mutex); ret = media_device_enum_links32(dev, (struct media_links_enum32 __user *)arg); mutex_unlock(&dev->graph_mutex); break; default: return media_device_ioctl(filp, cmd, arg); } return ret; } #endif /* CONFIG_COMPAT */ static const struct media_file_operations media_device_fops = { .owner = THIS_MODULE, .open = media_device_open, .ioctl = media_device_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = media_device_compat_ioctl, #endif /* CONFIG_COMPAT */ .release = media_device_close, }; /* ----------------------------------------------------------------------------- * sysfs */ static ssize_t model_show(struct device *cd, struct device_attribute *attr, char *buf) { struct media_devnode *devnode = to_media_devnode(cd); struct media_device *mdev = devnode->media_dev; return sprintf(buf, "%.*s\n", (int)sizeof(mdev->model), mdev->model); } static DEVICE_ATTR_RO(model); /* ----------------------------------------------------------------------------- * Registration/unregistration */ static void media_device_release(struct media_devnode *devnode) { dev_dbg(devnode->parent, "Media device released\n"); } static void __media_device_unregister_entity(struct media_entity *entity) { struct media_device *mdev = entity->graph_obj.mdev; struct media_link *link, *tmp; struct media_interface *intf; struct media_pad *iter; ida_free(&mdev->entity_internal_idx, entity->internal_idx); /* Remove all interface links pointing to this entity */ list_for_each_entry(intf, &mdev->interfaces, graph_obj.list) { list_for_each_entry_safe(link, tmp, &intf->links, list) { if (link->entity == entity) __media_remove_intf_link(link); } } /* Remove all data links that belong to this entity */ __media_entity_remove_links(entity); /* Remove all pads that belong to this entity */ media_entity_for_each_pad(entity, iter) media_gobj_destroy(&iter->graph_obj); /* Remove the entity */ media_gobj_destroy(&entity->graph_obj); /* invoke entity_notify callbacks to handle entity removal?? */ } int __must_check media_device_register_entity(struct media_device *mdev, struct media_entity *entity) { struct media_entity_notify *notify, *next; struct media_pad *iter; int ret; if (entity->function == MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN || entity->function == MEDIA_ENT_F_UNKNOWN) dev_warn(mdev->dev, "Entity type for entity %s was not initialized!\n", entity->name); /* Warn if we apparently re-register an entity */ WARN_ON(entity->graph_obj.mdev != NULL); entity->graph_obj.mdev = mdev; INIT_LIST_HEAD(&entity->links); entity->num_links = 0; entity->num_backlinks = 0; ret = ida_alloc_min(&mdev->entity_internal_idx, 1, GFP_KERNEL); if (ret < 0) return ret; entity->internal_idx = ret; mutex_lock(&mdev->graph_mutex); mdev->entity_internal_idx_max = max(mdev->entity_internal_idx_max, entity->internal_idx); /* Initialize media_gobj embedded at the entity */ media_gobj_create(mdev, MEDIA_GRAPH_ENTITY, &entity->graph_obj); /* Initialize objects at the pads */ media_entity_for_each_pad(entity, iter) media_gobj_create(mdev, MEDIA_GRAPH_PAD, &iter->graph_obj); /* invoke entity_notify callbacks */ list_for_each_entry_safe(notify, next, &mdev->entity_notify, list) notify->notify(entity, notify->notify_data); if (mdev->entity_internal_idx_max >= mdev->pm_count_walk.ent_enum.idx_max) { struct media_graph new = { .top = 0 }; /* * Initialise the new graph walk before cleaning up * the old one in order not to spoil the graph walk * object of the media device if graph walk init fails. */ ret = media_graph_walk_init(&new, mdev); if (ret) { __media_device_unregister_entity(entity); mutex_unlock(&mdev->graph_mutex); return ret; } media_graph_walk_cleanup(&mdev->pm_count_walk); mdev->pm_count_walk = new; } mutex_unlock(&mdev->graph_mutex); return 0; } EXPORT_SYMBOL_GPL(media_device_register_entity); void media_device_unregister_entity(struct media_entity *entity) { struct media_device *mdev = entity->graph_obj.mdev; if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_device_unregister_entity(entity); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_device_unregister_entity); void media_device_init(struct media_device *mdev) { INIT_LIST_HEAD(&mdev->entities); INIT_LIST_HEAD(&mdev->interfaces); INIT_LIST_HEAD(&mdev->pads); INIT_LIST_HEAD(&mdev->links); INIT_LIST_HEAD(&mdev->entity_notify); mutex_init(&mdev->req_queue_mutex); mutex_init(&mdev->graph_mutex); ida_init(&mdev->entity_internal_idx); atomic_set(&mdev->request_id, 0); if (!*mdev->bus_info) media_set_bus_info(mdev->bus_info, sizeof(mdev->bus_info), mdev->dev); dev_dbg(mdev->dev, "Media device initialized\n"); } EXPORT_SYMBOL_GPL(media_device_init); void media_device_cleanup(struct media_device *mdev) { ida_destroy(&mdev->entity_internal_idx); mdev->entity_internal_idx_max = 0; media_graph_walk_cleanup(&mdev->pm_count_walk); mutex_destroy(&mdev->graph_mutex); mutex_destroy(&mdev->req_queue_mutex); } EXPORT_SYMBOL_GPL(media_device_cleanup); int __must_check __media_device_register(struct media_device *mdev, struct module *owner) { struct media_devnode *devnode; int ret; devnode = kzalloc(sizeof(*devnode), GFP_KERNEL); if (!devnode) return -ENOMEM; /* Register the device node. */ mdev->devnode = devnode; devnode->fops = &media_device_fops; devnode->parent = mdev->dev; devnode->release = media_device_release; /* Set version 0 to indicate user-space that the graph is static */ mdev->topology_version = 0; ret = media_devnode_register(mdev, devnode, owner); if (ret < 0) { /* devnode free is handled in media_devnode_*() */ mdev->devnode = NULL; return ret; } ret = device_create_file(&devnode->dev, &dev_attr_model); if (ret < 0) { /* devnode free is handled in media_devnode_*() */ mdev->devnode = NULL; media_devnode_unregister_prepare(devnode); media_devnode_unregister(devnode); return ret; } dev_dbg(mdev->dev, "Media device registered\n"); return 0; } EXPORT_SYMBOL_GPL(__media_device_register); void media_device_register_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr) { mutex_lock(&mdev->graph_mutex); list_add_tail(&nptr->list, &mdev->entity_notify); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_device_register_entity_notify); /* * Note: Should be called with mdev->lock held. */ static void __media_device_unregister_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr) { list_del(&nptr->list); } void media_device_unregister_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr) { mutex_lock(&mdev->graph_mutex); __media_device_unregister_entity_notify(mdev, nptr); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_device_unregister_entity_notify); void media_device_unregister(struct media_device *mdev) { struct media_entity *entity; struct media_entity *next; struct media_interface *intf, *tmp_intf; struct media_entity_notify *notify, *nextp; if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); /* Check if mdev was ever registered at all */ if (!media_devnode_is_registered(mdev->devnode)) { mutex_unlock(&mdev->graph_mutex); return; } /* Clear the devnode register bit to avoid races with media dev open */ media_devnode_unregister_prepare(mdev->devnode); /* Remove all entities from the media device */ list_for_each_entry_safe(entity, next, &mdev->entities, graph_obj.list) __media_device_unregister_entity(entity); /* Remove all entity_notify callbacks from the media device */ list_for_each_entry_safe(notify, nextp, &mdev->entity_notify, list) __media_device_unregister_entity_notify(mdev, notify); /* Remove all interfaces from the media device */ list_for_each_entry_safe(intf, tmp_intf, &mdev->interfaces, graph_obj.list) { /* * Unlink the interface, but don't free it here; the * module which created it is responsible for freeing * it */ __media_remove_intf_links(intf); media_gobj_destroy(&intf->graph_obj); } mutex_unlock(&mdev->graph_mutex); dev_dbg(mdev->dev, "Media device unregistered\n"); device_remove_file(&mdev->devnode->dev, &dev_attr_model); media_devnode_unregister(mdev->devnode); /* devnode free is handled in media_devnode_*() */ mdev->devnode = NULL; } EXPORT_SYMBOL_GPL(media_device_unregister); #if IS_ENABLED(CONFIG_PCI) void media_device_pci_init(struct media_device *mdev, struct pci_dev *pci_dev, const char *name) { mdev->dev = &pci_dev->dev; if (name) strscpy(mdev->model, name, sizeof(mdev->model)); else strscpy(mdev->model, pci_name(pci_dev), sizeof(mdev->model)); sprintf(mdev->bus_info, "PCI:%s", pci_name(pci_dev)); mdev->hw_revision = (pci_dev->subsystem_vendor << 16) | pci_dev->subsystem_device; media_device_init(mdev); } EXPORT_SYMBOL_GPL(media_device_pci_init); #endif #if IS_ENABLED(CONFIG_USB) void __media_device_usb_init(struct media_device *mdev, struct usb_device *udev, const char *board_name, const char *driver_name) { mdev->dev = &udev->dev; if (driver_name) strscpy(mdev->driver_name, driver_name, sizeof(mdev->driver_name)); if (board_name) strscpy(mdev->model, board_name, sizeof(mdev->model)); else if (udev->product) strscpy(mdev->model, udev->product, sizeof(mdev->model)); else strscpy(mdev->model, "unknown model", sizeof(mdev->model)); if (udev->serial) strscpy(mdev->serial, udev->serial, sizeof(mdev->serial)); usb_make_path(udev, mdev->bus_info, sizeof(mdev->bus_info)); mdev->hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); media_device_init(mdev); } EXPORT_SYMBOL_GPL(__media_device_usb_init); #endif #endif /* CONFIG_MEDIA_CONTROLLER */
1028 1033 1 1017 15 1 1 2 2 2245 2246 2246 2246 2244 378 2241 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 // SPDX-License-Identifier: GPL-2.0 #include <linux/irq_work.h> #include <linux/spinlock.h> #include <linux/task_work.h> #include <linux/resume_user_mode.h> static struct callback_head work_exited; /* all we need is ->next == NULL */ #ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); #endif /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback * @work: the callback to run * @notify: how to notify the targeted task * * Queue @work for task_work_run() below and notify the @task if @notify * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. * * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted * task and run the task_work, regardless of whether the task is currently * running in the kernel or userspace. * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a * reschedule IPI to force the targeted task to reschedule and run task_work. * This can be advantageous if there's no strict requirement that the * task_work be run as soon as possible, just whenever the task enters the * kernel anyway. * @TWA_RESUME work is run only when the task exits the kernel and returns to * user mode, or before entering guest mode. * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the * current @task and if the current context is NMI. * * Fails if the @task is exiting/exited and thus it can't process this @work. * Otherwise @work->func() will be called when the @task goes through one of * the aforementioned transitions, or exits. * * If the targeted task is exiting, then an error is returned and the work item * is not queued. It's up to the caller to arrange for an alternative mechanism * in that case. * * Note: there is no ordering guarantee on works queued here. The task_work * list is LIFO. * * RETURNS: * 0 if succeeds or -ESRCH. */ int task_work_add(struct task_struct *task, struct callback_head *work, enum task_work_notify_mode notify) { struct callback_head *head; if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; if (!IS_ENABLED(CONFIG_IRQ_WORK)) return -EINVAL; } else { kasan_record_aux_stack(work); } head = READ_ONCE(task->task_works); do { if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; } while (!try_cmpxchg(&task->task_works, &head, work)); switch (notify) { case TWA_NONE: break; case TWA_RESUME: set_notify_resume(task); break; case TWA_SIGNAL: set_notify_signal(task); break; case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; #ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; #endif default: WARN_ON_ONCE(1); break; } return 0; } /** * task_work_cancel_match - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @match: match function to call * @data: data to be passed in to match function * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data) { struct callback_head **pprev = &task->task_works; struct callback_head *work; unsigned long flags; if (likely(!task_work_pending(task))) return NULL; /* * If cmpxchg() fails we continue without updating pprev. * Either we raced with task_work_add() which added the * new entry before this work, we will find it again. Or * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); work = READ_ONCE(*pprev); while (work) { if (!match(work, data)) { pprev = &work->next; work = READ_ONCE(*pprev); } else if (try_cmpxchg(pprev, &work, work->next)) break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); return work; } static bool task_work_func_match(struct callback_head *cb, void *data) { return cb->func == data; } /** * task_work_cancel_func - cancel a pending work matching a function added by task_work_add() * @task: the task which should execute the func's work * @func: identifies the func to match with a work to remove * * Find the last queued pending work with ->func == @func and remove * it from queue. * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_func(struct task_struct *task, task_work_func_t func) { return task_work_cancel_match(task, task_work_func_match, func); } static bool task_work_match(struct callback_head *cb, void *data) { return cb == data; } /** * task_work_cancel - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @cb: the callback to remove if queued * * Remove a callback from a task's queue if queued. * * RETURNS: * True if the callback was queued and got cancelled, false otherwise. */ bool task_work_cancel(struct task_struct *task, struct callback_head *cb) { struct callback_head *ret; ret = task_work_cancel_match(task, task_work_match, cb); return ret == cb; } /** * task_work_run - execute the works added by task_work_add() * * Flush the pending works. Should be used by the core kernel code. * Called before the task returns to the user-mode or stops, or when * it exits. In the latter case task_work_add() can no longer add the * new work after task_work_run() returns. */ void task_work_run(void) { struct task_struct *task = current; struct callback_head *work, *head, *next; for (;;) { /* * work->func() can do task_work_add(), do not set * work_exited unless the list is empty. */ work = READ_ONCE(task->task_works); do { head = NULL; if (!work) { if (task->flags & PF_EXITING) head = &work_exited; else break; } } while (!try_cmpxchg(&task->task_works, &work, head)); if (!work) break; /* * Synchronize with task_work_cancel_match(). It can not remove * the first entry == work, cmpxchg(task_works) must fail. * But it can remove another entry from the ->next list. */ raw_spin_lock_irq(&task->pi_lock); raw_spin_unlock_irq(&task->pi_lock); do { next = work->next; work->func(work); work = next; cond_resched(); } while (work); } }
3 4 2 1 1 4 2 1 1 18 18 2 17 3 1 3 1 2 1 1 1 1 1 1 1 1 2 1 9 1 2 2 6 1 7 7 2 4 6 3 2 1 2 1 1 3 8 2 1 1 3 1 1 1 6 11 1 10 9 1 4 1 1 1 1 1 4 38 20 18 2 2 2 2 2 16 2 18 18 39 38 39 39 39 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/fs_context.c * * Copyright (C) 1992 Rick Sladkey * Conversion to new mount api Copyright (C) David Howells * * NFS mount handling. * * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com> */ #include <linux/compat.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <net/handshake.h> #include "nfs.h" #include "internal.h" #include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_MOUNT #if IS_ENABLED(CONFIG_NFS_V3) #define NFS_DEFAULT_VERSION 3 #else #define NFS_DEFAULT_VERSION 2 #endif #define NFS_MAX_CONNECTIONS 16 enum nfs_param { Opt_ac, Opt_acdirmax, Opt_acdirmin, Opt_acl, Opt_acregmax, Opt_acregmin, Opt_actimeo, Opt_addr, Opt_bg, Opt_bsize, Opt_clientaddr, Opt_cto, Opt_alignwrite, Opt_fatal_neterrors, Opt_fg, Opt_fscache, Opt_fscache_flag, Opt_hard, Opt_intr, Opt_local_lock, Opt_lock, Opt_lookupcache, Opt_migration, Opt_minorversion, Opt_mountaddr, Opt_mounthost, Opt_mountport, Opt_mountproto, Opt_mountvers, Opt_namelen, Opt_nconnect, Opt_max_connect, Opt_port, Opt_posix, Opt_proto, Opt_rdirplus, Opt_rdirplus_none, Opt_rdirplus_force, Opt_rdma, Opt_resvport, Opt_retrans, Opt_retry, Opt_rsize, Opt_sec, Opt_sharecache, Opt_sloppy, Opt_soft, Opt_softerr, Opt_softreval, Opt_source, Opt_tcp, Opt_timeo, Opt_trunkdiscovery, Opt_udp, Opt_v, Opt_vers, Opt_wsize, Opt_write, Opt_xprtsec, Opt_cert_serial, Opt_privkey_serial, }; enum { Opt_fatal_neterrors_default, Opt_fatal_neterrors_enetunreach, Opt_fatal_neterrors_none, }; static const struct constant_table nfs_param_enums_fatal_neterrors[] = { { "default", Opt_fatal_neterrors_default }, { "ENETDOWN:ENETUNREACH", Opt_fatal_neterrors_enetunreach }, { "ENETUNREACH:ENETDOWN", Opt_fatal_neterrors_enetunreach }, { "none", Opt_fatal_neterrors_none }, {} }; enum { Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_none, Opt_local_lock_posix, }; static const struct constant_table nfs_param_enums_local_lock[] = { { "all", Opt_local_lock_all }, { "flock", Opt_local_lock_flock }, { "posix", Opt_local_lock_posix }, { "none", Opt_local_lock_none }, {} }; enum { Opt_lookupcache_all, Opt_lookupcache_none, Opt_lookupcache_positive, }; static const struct constant_table nfs_param_enums_lookupcache[] = { { "all", Opt_lookupcache_all }, { "none", Opt_lookupcache_none }, { "pos", Opt_lookupcache_positive }, { "positive", Opt_lookupcache_positive }, {} }; enum { Opt_write_lazy, Opt_write_eager, Opt_write_wait, }; static const struct constant_table nfs_param_enums_write[] = { { "lazy", Opt_write_lazy }, { "eager", Opt_write_eager }, { "wait", Opt_write_wait }, {} }; static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_flag_no("ac", Opt_ac), fsparam_u32 ("acdirmax", Opt_acdirmax), fsparam_u32 ("acdirmin", Opt_acdirmin), fsparam_flag_no("acl", Opt_acl), fsparam_u32 ("acregmax", Opt_acregmax), fsparam_u32 ("acregmin", Opt_acregmin), fsparam_u32 ("actimeo", Opt_actimeo), fsparam_string("addr", Opt_addr), fsparam_flag ("bg", Opt_bg), fsparam_u32 ("bsize", Opt_bsize), fsparam_string("clientaddr", Opt_clientaddr), fsparam_flag_no("cto", Opt_cto), fsparam_flag_no("alignwrite", Opt_alignwrite), fsparam_enum("fatal_neterrors", Opt_fatal_neterrors, nfs_param_enums_fatal_neterrors), fsparam_flag ("fg", Opt_fg), fsparam_flag_no("fsc", Opt_fscache_flag), fsparam_string("fsc", Opt_fscache), fsparam_flag ("hard", Opt_hard), __fsparam(NULL, "intr", Opt_intr, fs_param_neg_with_no|fs_param_deprecated, NULL), fsparam_enum ("local_lock", Opt_local_lock, nfs_param_enums_local_lock), fsparam_flag_no("lock", Opt_lock), fsparam_enum ("lookupcache", Opt_lookupcache, nfs_param_enums_lookupcache), fsparam_flag_no("migration", Opt_migration), fsparam_u32 ("minorversion", Opt_minorversion), fsparam_string("mountaddr", Opt_mountaddr), fsparam_string("mounthost", Opt_mounthost), fsparam_u32 ("mountport", Opt_mountport), fsparam_string("mountproto", Opt_mountproto), fsparam_u32 ("mountvers", Opt_mountvers), fsparam_u32 ("namlen", Opt_namelen), fsparam_u32 ("nconnect", Opt_nconnect), fsparam_u32 ("max_connect", Opt_max_connect), fsparam_string("nfsvers", Opt_vers), fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), fsparam_string("proto", Opt_proto), fsparam_flag_no("rdirplus", Opt_rdirplus), // rdirplus|nordirplus fsparam_string("rdirplus", Opt_rdirplus), // rdirplus=... fsparam_flag ("rdma", Opt_rdma), fsparam_flag_no("resvport", Opt_resvport), fsparam_u32 ("retrans", Opt_retrans), fsparam_string("retry", Opt_retry), fsparam_u32 ("rsize", Opt_rsize), fsparam_string("sec", Opt_sec), fsparam_flag_no("sharecache", Opt_sharecache), fsparam_flag ("sloppy", Opt_sloppy), fsparam_flag ("soft", Opt_soft), fsparam_flag ("softerr", Opt_softerr), fsparam_flag ("softreval", Opt_softreval), fsparam_string("source", Opt_source), fsparam_flag ("tcp", Opt_tcp), fsparam_u32 ("timeo", Opt_timeo), fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery), fsparam_flag ("udp", Opt_udp), fsparam_flag ("v2", Opt_v), fsparam_flag ("v3", Opt_v), fsparam_flag ("v4", Opt_v), fsparam_flag ("v4.0", Opt_v), fsparam_flag ("v4.1", Opt_v), fsparam_flag ("v4.2", Opt_v), fsparam_string("vers", Opt_vers), fsparam_enum ("write", Opt_write, nfs_param_enums_write), fsparam_u32 ("wsize", Opt_wsize), fsparam_string("xprtsec", Opt_xprtsec), fsparam_s32("cert_serial", Opt_cert_serial), fsparam_s32("privkey_serial", Opt_privkey_serial), {} }; enum { Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, Opt_vers_4_1, Opt_vers_4_2, }; static const struct constant_table nfs_vers_tokens[] = { { "2", Opt_vers_2 }, { "3", Opt_vers_3 }, { "4", Opt_vers_4 }, { "4.0", Opt_vers_4_0 }, { "4.1", Opt_vers_4_1 }, { "4.2", Opt_vers_4_2 }, {} }; enum { Opt_xprt_rdma, Opt_xprt_rdma6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_udp, Opt_xprt_udp6, nr__Opt_xprt }; static const struct constant_table nfs_xprt_protocol_tokens[] = { { "rdma", Opt_xprt_rdma }, { "rdma6", Opt_xprt_rdma6 }, { "tcp", Opt_xprt_tcp }, { "tcp6", Opt_xprt_tcp6 }, { "udp", Opt_xprt_udp }, { "udp6", Opt_xprt_udp6 }, {} }; enum { Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, Opt_sec_none, Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, Opt_sec_sys, nr__Opt_sec }; static const struct constant_table nfs_secflavor_tokens[] = { { "krb5", Opt_sec_krb5 }, { "krb5i", Opt_sec_krb5i }, { "krb5p", Opt_sec_krb5p }, { "lkey", Opt_sec_lkey }, { "lkeyi", Opt_sec_lkeyi }, { "lkeyp", Opt_sec_lkeyp }, { "none", Opt_sec_none }, { "null", Opt_sec_none }, { "spkm3", Opt_sec_spkm }, { "spkm3i", Opt_sec_spkmi }, { "spkm3p", Opt_sec_spkmp }, { "sys", Opt_sec_sys }, {} }; enum { Opt_xprtsec_none, Opt_xprtsec_tls, Opt_xprtsec_mtls, nr__Opt_xprtsec }; static const struct constant_table nfs_xprtsec_policies[] = { { "none", Opt_xprtsec_none }, { "tls", Opt_xprtsec_tls }, { "mtls", Opt_xprtsec_mtls }, {} }; static const struct constant_table nfs_rdirplus_tokens[] = { { "none", Opt_rdirplus_none }, { "force", Opt_rdirplus_force }, {} }; /* * Sanity-check a server address provided by the mount command. * * Address family must be initialized, and address must not be * the ANY address for that family. */ static int nfs_verify_server_address(struct sockaddr_storage *addr) { switch (addr->ss_family) { case AF_INET: { struct sockaddr_in *sa = (struct sockaddr_in *)addr; return sa->sin_addr.s_addr != htonl(INADDR_ANY); } case AF_INET6: { struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; return !ipv6_addr_any(sa); } } return 0; } #ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx) { return true; } #else static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx) { if (ctx->version == 4) return true; return false; } #endif /* * Sanity check the NFS transport protocol. */ static int nfs_validate_transport_protocol(struct fs_context *fc, struct nfs_fs_context *ctx) { switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: if (nfs_server_transport_udp_invalid(ctx)) goto out_invalid_transport_udp; break; case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: break; default: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; } if (ctx->xprtsec.policy != RPC_XPRTSEC_NONE) switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_TCP: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP_TLS; break; default: goto out_invalid_xprtsec_policy; } return 0; out_invalid_transport_udp: return nfs_invalf(fc, "NFS: Unsupported transport protocol udp"); out_invalid_xprtsec_policy: return nfs_invalf(fc, "NFS: Transport does not support xprtsec"); } /* * For text based NFSv2/v3 mounts, the mount protocol transport default * settings should depend upon the specified NFS transport. */ static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) { if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP || ctx->mount_server.protocol == XPRT_TRANSPORT_TCP) return; switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; } } /* * Add 'flavor' to 'auth_info' if not already present. * Returns true if 'flavor' ends up in the list, false otherwise */ static int nfs_auth_info_add(struct fs_context *fc, struct nfs_auth_info *auth_info, rpc_authflavor_t flavor) { unsigned int i; unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); /* make sure this flavor isn't already in the list */ for (i = 0; i < auth_info->flavor_len; i++) { if (flavor == auth_info->flavors[i]) return 0; } if (auth_info->flavor_len + 1 >= max_flavor_len) return nfs_invalf(fc, "NFS: too many sec= flavors"); auth_info->flavors[auth_info->flavor_len++] = flavor; return 0; } /* * Parse the value of the 'sec=' option. */ static int nfs_parse_security_flavors(struct fs_context *fc, struct fs_parameter *param) { struct nfs_fs_context *ctx = nfs_fc2context(fc); rpc_authflavor_t pseudoflavor; char *string = param->string, *p; int ret; trace_nfs_mount_assign(param->key, string); while ((p = strsep(&string, ":")) != NULL) { if (!*p) continue; switch (lookup_constant(nfs_secflavor_tokens, p, -1)) { case Opt_sec_none: pseudoflavor = RPC_AUTH_NULL; break; case Opt_sec_sys: pseudoflavor = RPC_AUTH_UNIX; break; case Opt_sec_krb5: pseudoflavor = RPC_AUTH_GSS_KRB5; break; case Opt_sec_krb5i: pseudoflavor = RPC_AUTH_GSS_KRB5I; break; case Opt_sec_krb5p: pseudoflavor = RPC_AUTH_GSS_KRB5P; break; case Opt_sec_lkey: pseudoflavor = RPC_AUTH_GSS_LKEY; break; case Opt_sec_lkeyi: pseudoflavor = RPC_AUTH_GSS_LKEYI; break; case Opt_sec_lkeyp: pseudoflavor = RPC_AUTH_GSS_LKEYP; break; case Opt_sec_spkm: pseudoflavor = RPC_AUTH_GSS_SPKM; break; case Opt_sec_spkmi: pseudoflavor = RPC_AUTH_GSS_SPKMI; break; case Opt_sec_spkmp: pseudoflavor = RPC_AUTH_GSS_SPKMP; break; default: return nfs_invalf(fc, "NFS: sec=%s option not recognized", p); } ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor); if (ret < 0) return ret; } return 0; } static int nfs_parse_xprtsec_policy(struct fs_context *fc, struct fs_parameter *param) { struct nfs_fs_context *ctx = nfs_fc2context(fc); trace_nfs_mount_assign(param->key, param->string); switch (lookup_constant(nfs_xprtsec_policies, param->string, -1)) { case Opt_xprtsec_none: ctx->xprtsec.policy = RPC_XPRTSEC_NONE; break; case Opt_xprtsec_tls: ctx->xprtsec.policy = RPC_XPRTSEC_TLS_ANON; break; case Opt_xprtsec_mtls: ctx->xprtsec.policy = RPC_XPRTSEC_TLS_X509; break; default: return nfs_invalf(fc, "NFS: Unrecognized transport security policy"); } return 0; } static int nfs_parse_version_string(struct fs_context *fc, const char *string) { struct nfs_fs_context *ctx = nfs_fc2context(fc); ctx->flags &= ~NFS_MOUNT_VER3; switch (lookup_constant(nfs_vers_tokens, string, -1)) { case Opt_vers_2: ctx->version = 2; break; case Opt_vers_3: ctx->flags |= NFS_MOUNT_VER3; ctx->version = 3; break; case Opt_vers_4: /* Backward compatibility option. In future, * the mount program should always supply * a NFSv4 minor version number. */ ctx->version = 4; break; case Opt_vers_4_0: ctx->version = 4; ctx->minorversion = 0; break; case Opt_vers_4_1: ctx->version = 4; ctx->minorversion = 1; break; case Opt_vers_4_2: ctx->version = 4; ctx->minorversion = 2; break; default: return nfs_invalf(fc, "NFS: Unsupported NFS version"); } return 0; } #ifdef CONFIG_KEYS static int nfs_tls_key_verify(key_serial_t key_id) { struct key *key = key_lookup(key_id); int error = 0; if (IS_ERR(key)) { pr_err("key id %08x not found\n", key_id); return PTR_ERR(key); } if (test_bit(KEY_FLAG_REVOKED, &key->flags) || test_bit(KEY_FLAG_INVALIDATED, &key->flags)) { pr_err("key id %08x revoked\n", key_id); error = -EKEYREVOKED; } key_put(key); return error; } #else static inline int nfs_tls_key_verify(key_serial_t key_id) { return -ENOENT; } #endif /* CONFIG_KEYS */ /* * Parse a single mount parameter. */ static int nfs_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct nfs_fs_context *ctx = nfs_fc2context(fc); unsigned short protofamily, mountfamily; unsigned int len; int ret, opt; trace_nfs_mount_option(param); opt = fs_parse(fc, nfs_fs_parameters, param, &result); if (opt < 0) return (opt == -ENOPARAM && ctx->sloppy) ? 1 : opt; if (fc->security) ctx->has_sec_mnt_opts = 1; switch (opt) { case Opt_source: if (fc->source) return nfs_invalf(fc, "NFS: Multiple sources not supported"); fc->source = param->string; param->string = NULL; break; /* * boolean options: foo/nofoo */ case Opt_soft: ctx->flags |= NFS_MOUNT_SOFT; ctx->flags &= ~NFS_MOUNT_SOFTERR; break; case Opt_softerr: ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL; ctx->flags &= ~NFS_MOUNT_SOFT; break; case Opt_hard: ctx->flags &= ~(NFS_MOUNT_SOFT | NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL); break; case Opt_softreval: if (result.negated) ctx->flags &= ~NFS_MOUNT_SOFTREVAL; else ctx->flags |= NFS_MOUNT_SOFTREVAL; break; case Opt_posix: if (result.negated) ctx->flags &= ~NFS_MOUNT_POSIX; else ctx->flags |= NFS_MOUNT_POSIX; break; case Opt_cto: if (result.negated) ctx->flags |= NFS_MOUNT_NOCTO; else ctx->flags &= ~NFS_MOUNT_NOCTO; break; case Opt_trunkdiscovery: if (result.negated) ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY; else ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY; break; case Opt_alignwrite: if (result.negated) ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE; else ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE; break; case Opt_ac: if (result.negated) ctx->flags |= NFS_MOUNT_NOAC; else ctx->flags &= ~NFS_MOUNT_NOAC; break; case Opt_lock: if (result.negated) { ctx->lock_status = NFS_LOCK_NOLOCK; ctx->flags |= NFS_MOUNT_NONLM; ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { ctx->lock_status = NFS_LOCK_LOCK; ctx->flags &= ~NFS_MOUNT_NONLM; ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } break; case Opt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_tcp: case Opt_rdma: ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ ret = xprt_find_transport_ident(param->key); if (ret < 0) goto out_bad_transport; ctx->nfs_server.protocol = ret; break; case Opt_acl: if (result.negated) ctx->flags |= NFS_MOUNT_NOACL; else ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_rdirplus: if (result.negated) { ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; ctx->flags |= NFS_MOUNT_NORDIRPLUS; } else if (!param->string) { ctx->flags &= ~(NFS_MOUNT_NORDIRPLUS | NFS_MOUNT_FORCE_RDIRPLUS); } else { switch (lookup_constant(nfs_rdirplus_tokens, param->string, -1)) { case Opt_rdirplus_none: ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; ctx->flags |= NFS_MOUNT_NORDIRPLUS; break; case Opt_rdirplus_force: ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; ctx->flags |= NFS_MOUNT_FORCE_RDIRPLUS; break; default: goto out_invalid_value; } } break; case Opt_sharecache: if (result.negated) ctx->flags |= NFS_MOUNT_UNSHARED; else ctx->flags &= ~NFS_MOUNT_UNSHARED; break; case Opt_resvport: if (result.negated) ctx->flags |= NFS_MOUNT_NORESVPORT; else ctx->flags &= ~NFS_MOUNT_NORESVPORT; break; case Opt_fscache_flag: if (result.negated) ctx->options &= ~NFS_OPTION_FSCACHE; else ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); ctx->fscache_uniq = NULL; break; case Opt_fscache: trace_nfs_mount_assign(param->key, param->string); ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); ctx->fscache_uniq = param->string; param->string = NULL; break; case Opt_migration: if (result.negated) ctx->options &= ~NFS_OPTION_MIGRATION; else ctx->options |= NFS_OPTION_MIGRATION; break; /* * options that take numeric values */ case Opt_port: if (result.uint_32 > USHRT_MAX) goto out_of_bounds; ctx->nfs_server.port = result.uint_32; break; case Opt_rsize: ctx->rsize = result.uint_32; break; case Opt_wsize: ctx->wsize = result.uint_32; break; case Opt_bsize: ctx->bsize = result.uint_32; break; case Opt_timeo: if (result.uint_32 < 1 || result.uint_32 > INT_MAX) goto out_of_bounds; ctx->timeo = result.uint_32; break; case Opt_retrans: if (result.uint_32 > INT_MAX) goto out_of_bounds; ctx->retrans = result.uint_32; break; case Opt_acregmin: ctx->acregmin = result.uint_32; break; case Opt_acregmax: ctx->acregmax = result.uint_32; break; case Opt_acdirmin: ctx->acdirmin = result.uint_32; break; case Opt_acdirmax: ctx->acdirmax = result.uint_32; break; case Opt_actimeo: ctx->acregmin = result.uint_32; ctx->acregmax = result.uint_32; ctx->acdirmin = result.uint_32; ctx->acdirmax = result.uint_32; break; case Opt_namelen: ctx->namlen = result.uint_32; break; case Opt_mountport: if (result.uint_32 > USHRT_MAX) goto out_of_bounds; ctx->mount_server.port = result.uint_32; break; case Opt_mountvers: if (result.uint_32 < NFS_MNT_VERSION || result.uint_32 > NFS_MNT3_VERSION) goto out_of_bounds; ctx->mount_server.version = result.uint_32; break; case Opt_minorversion: if (result.uint_32 > NFS4_MAX_MINOR_VERSION) goto out_of_bounds; ctx->minorversion = result.uint_32; break; /* * options that take text values */ case Opt_v: ret = nfs_parse_version_string(fc, param->key + 1); if (ret < 0) return ret; break; case Opt_vers: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); ret = nfs_parse_version_string(fc, param->string); if (ret < 0) return ret; break; case Opt_sec: ret = nfs_parse_security_flavors(fc, param); if (ret < 0) return ret; break; case Opt_xprtsec: ret = nfs_parse_xprtsec_policy(fc, param); if (ret < 0) return ret; break; case Opt_cert_serial: ret = nfs_tls_key_verify(result.int_32); if (ret < 0) return ret; ctx->xprtsec.cert_serial = result.int_32; break; case Opt_privkey_serial: ret = nfs_tls_key_verify(result.int_32); if (ret < 0) return ret; ctx->xprtsec.privkey_serial = result.int_32; break; case Opt_proto: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); protofamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: protofamily = AF_INET6; fallthrough; case Opt_xprt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: protofamily = AF_INET6; fallthrough; case Opt_xprt_tcp: ctx->flags |= NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: protofamily = AF_INET6; fallthrough; case Opt_xprt_rdma: /* vector side protocols to TCP */ ctx->flags |= NFS_MOUNT_TCP; ret = xprt_find_transport_ident(param->string); if (ret < 0) goto out_bad_transport; ctx->nfs_server.protocol = ret; break; default: goto out_bad_transport; } ctx->protofamily = protofamily; break; case Opt_mountproto: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); mountfamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: mountfamily = AF_INET6; fallthrough; case Opt_xprt_udp: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: mountfamily = AF_INET6; fallthrough; case Opt_xprt_tcp: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma: /* not used for side protocols */ default: goto out_bad_transport; } ctx->mountfamily = mountfamily; break; case Opt_addr: trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address)); if (len == 0) goto out_invalid_address; ctx->nfs_server.addrlen = len; break; case Opt_clientaddr: trace_nfs_mount_assign(param->key, param->string); kfree(ctx->client_address); ctx->client_address = param->string; param->string = NULL; break; case Opt_mounthost: trace_nfs_mount_assign(param->key, param->string); kfree(ctx->mount_server.hostname); ctx->mount_server.hostname = param->string; param->string = NULL; break; case Opt_mountaddr: trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->mount_server.address, sizeof(ctx->mount_server._address)); if (len == 0) goto out_invalid_address; ctx->mount_server.addrlen = len; break; case Opt_nconnect: trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) goto out_of_bounds; ctx->nfs_server.nconnect = result.uint_32; break; case Opt_max_connect: trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS) goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; case Opt_fatal_neterrors: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_fatal_neterrors_default: if (fc->net_ns != &init_net) ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; else ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; break; case Opt_fatal_neterrors_enetunreach: ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; break; case Opt_fatal_neterrors_none: ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; break; default: goto out_invalid_value; } break; case Opt_lookupcache: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_lookupcache_all: ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); break; case Opt_lookupcache_positive: ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; break; case Opt_lookupcache_none: ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: goto out_invalid_value; } break; case Opt_local_lock: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_local_lock_all: ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_local_lock_flock: ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; break; case Opt_local_lock_posix: ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; break; case Opt_local_lock_none: ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; default: goto out_invalid_value; } break; case Opt_write: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_write_lazy: ctx->flags &= ~(NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT); break; case Opt_write_eager: ctx->flags |= NFS_MOUNT_WRITE_EAGER; ctx->flags &= ~NFS_MOUNT_WRITE_WAIT; break; case Opt_write_wait: ctx->flags |= NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT; break; default: goto out_invalid_value; } break; /* * Special options */ case Opt_sloppy: ctx->sloppy = true; break; } return 0; out_invalid_value: return nfs_invalf(fc, "NFS: Bad mount option value specified"); out_invalid_address: return nfs_invalf(fc, "NFS: Bad IP address specified"); out_of_bounds: return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); out_bad_transport: return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); } /* * Split fc->source into "hostname:export_path". * * The leftmost colon demarks the split between the server's hostname * and the export path. If the hostname starts with a left square * bracket, then it may contain colons. * * Note: caller frees hostname and export path, even on error. */ static int nfs_parse_source(struct fs_context *fc, size_t maxnamlen, size_t maxpathlen) { struct nfs_fs_context *ctx = nfs_fc2context(fc); const char *dev_name = fc->source; size_t len; const char *end; if (unlikely(!dev_name || !*dev_name)) return -EINVAL; /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); if (end == NULL || end[1] != ':') goto out_bad_devname; len = end - dev_name; end++; } else { const char *comma; end = strchr(dev_name, ':'); if (end == NULL) goto out_bad_devname; len = end - dev_name; /* kill possible hostname list: not supported */ comma = memchr(dev_name, ',', len); if (comma) len = comma - dev_name; } if (len > maxnamlen) goto out_hostname; kfree(ctx->nfs_server.hostname); /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); if (!ctx->nfs_server.hostname) goto out_nomem; len = strlen(++end); if (len > maxpathlen) goto out_path; ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL); if (!ctx->nfs_server.export_path) goto out_nomem; trace_nfs_mount_path(ctx->nfs_server.export_path); return 0; out_bad_devname: return nfs_invalf(fc, "NFS: device name not in host:path format"); out_nomem: nfs_errorf(fc, "NFS: not enough memory to parse device name"); return -ENOMEM; out_hostname: nfs_errorf(fc, "NFS: server hostname too long"); return -ENAMETOOLONG; out_path: nfs_errorf(fc, "NFS: export pathname too long"); return -ENAMETOOLONG; } static inline bool is_remount_fc(struct fs_context *fc) { return fc->root != NULL; } /* * Parse monolithic NFS2/NFS3 mount data * - fills in the mount root filehandle * * For option strings, user space handles the following behaviors: * * + DNS: mapping server host name to IP address ("addr=" option) * * + failure mode: how to behave if a mount request can't be handled * immediately ("fg/bg" option) * * + retry: how often to retry a mount request ("retry=" option) * * + breaking back: trying proto=udp after proto=tcp, v2 after v3, * mountproto=tcp after mountproto=udp, and so on */ static int nfs23_parse_monolithic(struct fs_context *fc, struct nfs_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_fh *mntfh = ctx->mntfh; struct sockaddr_storage *sap = &ctx->nfs_server._address; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; int ret; if (data == NULL) goto out_no_data; ctx->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: data->namlen = 0; fallthrough; case 2: data->bsize = 0; fallthrough; case 3: if (data->flags & NFS_MOUNT_VER3) goto out_no_v3; data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); /* Turn off security negotiation */ extra_flags |= NFS_MOUNT_SECFLAVOUR; fallthrough; case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; fallthrough; case 5: memset(data->context, 0, sizeof(data->context)); fallthrough; case 6: if (data->flags & NFS_MOUNT_VER3) { if (data->root.size > NFS3_FHSIZE || data->root.size == 0) goto out_invalid_fh; mntfh->size = data->root.size; ctx->version = 3; } else { mntfh->size = NFS2_FHSIZE; ctx->version = 2; } memcpy(mntfh->data, data->root.data, mntfh->size); if (mntfh->size < sizeof(mntfh->data)) memset(mntfh->data + mntfh->size, 0, sizeof(mntfh->data) - mntfh->size); /* * for proto == XPRT_TRANSPORT_UDP, which is what uses * to_exponential, implying shift: limit the shift value * to BITS_PER_LONG (majortimeo is unsigned long) */ if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */ if (data->retrans >= 64) /* shift value is too large */ goto out_invalid_data; /* * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ ctx->flags = data->flags & NFS_MOUNT_FLAGMASK; ctx->flags |= extra_flags; ctx->rsize = data->rsize; ctx->wsize = data->wsize; ctx->timeo = data->timeo; ctx->retrans = data->retrans; ctx->acregmin = data->acregmin; ctx->acregmax = data->acregmax; ctx->acdirmin = data->acdirmin; ctx->acdirmax = data->acdirmax; ctx->need_mount = false; if (!is_remount_fc(fc)) { memcpy(sap, &data->addr, sizeof(data->addr)); ctx->nfs_server.addrlen = sizeof(data->addr); ctx->nfs_server.port = ntohs(data->addr.sin_port); } if (sap->ss_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; if (!(data->flags & NFS_MOUNT_TCP)) ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); if (!ctx->nfs_server.hostname) goto out_nomem; ctx->namlen = data->namlen; ctx->bsize = data->bsize; if (data->flags & NFS_MOUNT_SECFLAVOUR) ctx->selected_flavor = data->pseudoflavor; else ctx->selected_flavor = RPC_AUTH_UNIX; if (!(data->flags & NFS_MOUNT_NONLM)) ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); else ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the * kernel. To continue to support that functionality we * have a touch of selinux knowledge here in the NFS code. The * userspace code converted context=blah to just blah so we are * converting back to the full string selinux understands. */ if (data->context[0]){ #ifdef CONFIG_SECURITY_SELINUX int ret; data->context[NFS_MAX_CONTEXT_LEN] = '\0'; ret = vfs_parse_fs_string(fc, "context", data->context); if (ret < 0) return ret; #else return -EINVAL; #endif } break; default: goto generic; } ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; ctx->skip_reconfig_option_check = true; return 0; generic: return generic_parse_monolithic(fc, data); out_no_data: if (is_remount_fc(fc)) { ctx->skip_reconfig_option_check = true; return 0; } return nfs_invalf(fc, "NFS: mount program didn't pass any mount data"); out_no_v3: return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3"); out_no_sec: return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS"); out_nomem: return -ENOMEM; out_no_address: return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_invalid_fh: return nfs_invalf(fc, "NFS: invalid root filehandle"); out_invalid_data: return nfs_invalf(fc, "NFS: invalid binary mount data"); } #if IS_ENABLED(CONFIG_NFS_V4) struct compat_nfs_string { compat_uint_t len; compat_uptr_t data; }; static inline void compat_nfs_string(struct nfs_string *dst, struct compat_nfs_string *src) { dst->data = compat_ptr(src->data); dst->len = src->len; } struct compat_nfs4_mount_data_v1 { compat_int_t version; compat_int_t flags; compat_int_t rsize; compat_int_t wsize; compat_int_t timeo; compat_int_t retrans; compat_int_t acregmin; compat_int_t acregmax; compat_int_t acdirmin; compat_int_t acdirmax; struct compat_nfs_string client_addr; struct compat_nfs_string mnt_path; struct compat_nfs_string hostname; compat_uint_t host_addrlen; compat_uptr_t host_addr; compat_int_t proto; compat_int_t auth_flavourlen; compat_uptr_t auth_flavours; }; static void nfs4_compat_mount_data_conv(struct nfs4_mount_data *data) { struct compat_nfs4_mount_data_v1 *compat = (struct compat_nfs4_mount_data_v1 *)data; /* copy the fields backwards */ data->auth_flavours = compat_ptr(compat->auth_flavours); data->auth_flavourlen = compat->auth_flavourlen; data->proto = compat->proto; data->host_addr = compat_ptr(compat->host_addr); data->host_addrlen = compat->host_addrlen; compat_nfs_string(&data->hostname, &compat->hostname); compat_nfs_string(&data->mnt_path, &compat->mnt_path); compat_nfs_string(&data->client_addr, &compat->client_addr); data->acdirmax = compat->acdirmax; data->acdirmin = compat->acdirmin; data->acregmax = compat->acregmax; data->acregmin = compat->acregmin; data->retrans = compat->retrans; data->timeo = compat->timeo; data->wsize = compat->wsize; data->rsize = compat->rsize; data->flags = compat->flags; data->version = compat->version; } /* * Validate NFSv4 mount options */ static int nfs4_parse_monolithic(struct fs_context *fc, struct nfs4_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct sockaddr_storage *sap = &ctx->nfs_server._address; int ret; char *c; if (!data) { if (is_remount_fc(fc)) goto done; return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data"); } ctx->version = 4; if (data->version != 1) return generic_parse_monolithic(fc, data); if (in_compat_syscall()) nfs4_compat_mount_data_conv(data); if (data->host_addrlen > sizeof(ctx->nfs_server.address)) goto out_no_address; if (data->host_addrlen == 0) goto out_no_address; ctx->nfs_server.addrlen = data->host_addrlen; if (copy_from_user(sap, data->host_addr, data->host_addrlen)) return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { rpc_authflavor_t pseudoflavor; if (data->auth_flavourlen > 1) goto out_inval_auth; if (copy_from_user(&pseudoflavor, data->auth_flavours, sizeof(pseudoflavor))) return -EFAULT; ctx->selected_flavor = pseudoflavor; } else { ctx->selected_flavor = RPC_AUTH_UNIX; } c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) return PTR_ERR(c); ctx->nfs_server.hostname = c; c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); if (IS_ERR(c)) return PTR_ERR(c); ctx->nfs_server.export_path = c; trace_nfs_mount_path(c); c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) return PTR_ERR(c); ctx->client_address = c; /* * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK; ctx->rsize = data->rsize; ctx->wsize = data->wsize; ctx->timeo = data->timeo; ctx->retrans = data->retrans; ctx->acregmin = data->acregmin; ctx->acregmax = data->acregmax; ctx->acdirmin = data->acdirmin; ctx->acdirmax = data->acdirmax; ctx->nfs_server.protocol = data->proto; ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; done: ctx->skip_reconfig_option_check = true; return 0; out_inval_auth: return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d", data->auth_flavourlen); out_no_address: return nfs_invalf(fc, "NFS4: mount program didn't pass remote address"); } #endif /* * Parse a monolithic block of data from sys_mount(). */ static int nfs_fs_context_parse_monolithic(struct fs_context *fc, void *data) { if (fc->fs_type == &nfs_fs_type) return nfs23_parse_monolithic(fc, data); #if IS_ENABLED(CONFIG_NFS_V4) if (fc->fs_type == &nfs4_fs_type) return nfs4_parse_monolithic(fc, data); #endif return nfs_invalf(fc, "NFS: Unsupported monolithic data version"); } /* * Validate the preparsed information in the config. */ static int nfs_fs_context_validate(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_subversion *nfs_mod; struct sockaddr_storage *sap = &ctx->nfs_server._address; int max_namelen = PAGE_SIZE; int max_pathlen = NFS_MAXPATHLEN; int port = 0; int ret; if (!fc->source) goto out_no_device_name; /* Check for sanity first. */ if (ctx->minorversion && ctx->version != 4) goto out_minorversion_mismatch; if (ctx->options & NFS_OPTION_MIGRATION && (ctx->version != 4 || ctx->minorversion != 0)) goto out_migration_misuse; /* Verify that any proto=/mountproto= options match the address * families in the addr=/mountaddr= options. */ if (ctx->protofamily != AF_UNSPEC && ctx->protofamily != ctx->nfs_server.address.sa_family) goto out_proto_mismatch; if (ctx->mountfamily != AF_UNSPEC) { if (ctx->mount_server.addrlen) { if (ctx->mountfamily != ctx->mount_server.address.sa_family) goto out_mountproto_mismatch; } else { if (ctx->mountfamily != ctx->nfs_server.address.sa_family) goto out_mountproto_mismatch; } } if (!nfs_verify_server_address(sap)) goto out_no_address; ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; if (ctx->version == 4) { if (IS_ENABLED(CONFIG_NFS_V4)) { if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; else port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL | NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { goto out_v4_not_compiled; } } else { nfs_set_mount_transport_protocol(ctx); if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; } nfs_set_port(sap, &ctx->nfs_server.port, port); ret = nfs_parse_source(fc, max_namelen, max_pathlen); if (ret < 0) return ret; /* Load the NFS protocol module if we haven't done so yet */ if (!ctx->nfs_mod) { nfs_mod = find_nfs_version(ctx->version); if (IS_ERR(nfs_mod)) { ret = PTR_ERR(nfs_mod); goto out_version_unavailable; } ctx->nfs_mod = nfs_mod; } /* Ensure the filesystem context has the correct fs_type */ if (fc->fs_type != ctx->nfs_mod->nfs_fs) { module_put(fc->fs_type->owner); __module_get(ctx->nfs_mod->nfs_fs->owner); fc->fs_type = ctx->nfs_mod->nfs_fs; } return 0; out_no_device_name: return nfs_invalf(fc, "NFS: Device name not specified"); out_v4_not_compiled: nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel"); return -EPROTONOSUPPORT; out_no_address: return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_mountproto_mismatch: return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option"); out_proto_mismatch: return nfs_invalf(fc, "NFS: Server address does not match proto= option"); out_minorversion_mismatch: return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u", ctx->version, ctx->minorversion); out_migration_misuse: return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version"); out_version_unavailable: nfs_errorf(fc, "NFS: Version unavailable"); return ret; } /* * Create an NFS superblock by the appropriate method. */ static int nfs_get_tree(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); int err = nfs_fs_context_validate(fc); if (err) return err; if (!ctx->internal) return ctx->nfs_mod->rpc_ops->try_get_tree(fc); else return nfs_get_tree_common(fc); } /* * Handle duplication of a configuration. The caller copied *src into *sc, but * it can't deal with resource pointers in the filesystem context, so we have * to do that. We need to clear pointers, copy data or get extra refs as * appropriate. */ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx; ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->mntfh = nfs_alloc_fhandle(); if (!ctx->mntfh) { kfree(ctx); return -ENOMEM; } nfs_copy_fh(ctx->mntfh, src->mntfh); get_nfs_version(ctx->nfs_mod); ctx->client_address = NULL; ctx->mount_server.hostname = NULL; ctx->nfs_server.export_path = NULL; ctx->nfs_server.hostname = NULL; ctx->fscache_uniq = NULL; ctx->clone_data.fattr = NULL; fc->fs_private = ctx; return 0; } static void nfs_fs_context_free(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); if (ctx) { if (ctx->server) nfs_free_server(ctx->server); if (ctx->nfs_mod) put_nfs_version(ctx->nfs_mod); kfree(ctx->client_address); kfree(ctx->mount_server.hostname); kfree(ctx->nfs_server.export_path); kfree(ctx->nfs_server.hostname); kfree(ctx->fscache_uniq); nfs_free_fhandle(ctx->mntfh); nfs_free_fattr(ctx->clone_data.fattr); kfree(ctx); } } static const struct fs_context_operations nfs_fs_context_ops = { .free = nfs_fs_context_free, .dup = nfs_fs_context_dup, .parse_param = nfs_fs_context_parse_param, .parse_monolithic = nfs_fs_context_parse_monolithic, .get_tree = nfs_get_tree, .reconfigure = nfs_reconfigure, }; /* * Prepare superblock configuration. We use the namespaces attached to the * context. This may be the current process's namespaces, or it may be a * container's namespaces. */ static int nfs_init_fs_context(struct fs_context *fc) { struct nfs_fs_context *ctx; ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL); if (unlikely(!ctx)) return -ENOMEM; ctx->mntfh = nfs_alloc_fhandle(); if (unlikely(!ctx->mntfh)) { kfree(ctx); return -ENOMEM; } ctx->protofamily = AF_UNSPEC; ctx->mountfamily = AF_UNSPEC; ctx->mount_server.port = NFS_UNSPEC_PORT; if (fc->root) { /* reconfigure, start with the current config */ struct nfs_server *nfss = fc->root->d_sb->s_fs_info; struct net *net = nfss->nfs_client->cl_net; ctx->flags = nfss->flags; ctx->rsize = nfss->rsize; ctx->wsize = nfss->wsize; ctx->retrans = nfss->client->cl_timeout->to_retries; ctx->selected_flavor = nfss->client->cl_auth->au_flavor; ctx->acregmin = nfss->acregmin / HZ; ctx->acregmax = nfss->acregmax / HZ; ctx->acdirmin = nfss->acdirmin / HZ; ctx->acdirmax = nfss->acdirmax / HZ; ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; ctx->nfs_server.port = nfss->port; ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; ctx->version = nfss->nfs_client->rpc_ops->version; ctx->minorversion = nfss->nfs_client->cl_minorversion; memcpy(&ctx->nfs_server._address, &nfss->nfs_client->cl_addr, ctx->nfs_server.addrlen); if (fc->net_ns != net) { put_net(fc->net_ns); fc->net_ns = get_net(net); } ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod; get_nfs_version(ctx->nfs_mod); } else { /* defaults */ ctx->timeo = NFS_UNSPEC_TIMEO; ctx->retrans = NFS_UNSPEC_RETRANS; ctx->acregmin = NFS_DEF_ACREGMIN; ctx->acregmax = NFS_DEF_ACREGMAX; ctx->acdirmin = NFS_DEF_ACDIRMIN; ctx->acdirmax = NFS_DEF_ACDIRMAX; ctx->nfs_server.port = NFS_UNSPEC_PORT; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; ctx->minorversion = 0; ctx->need_mount = true; ctx->xprtsec.policy = RPC_XPRTSEC_NONE; ctx->xprtsec.cert_serial = TLS_NO_CERT; ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; if (fc->net_ns != &init_net) ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; fc->s_iflags |= SB_I_STABLE_WRITES; } fc->fs_private = ctx; fc->ops = &nfs_fs_context_ops; return 0; } struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, .name = "nfs", .init_fs_context = nfs_init_fs_context, .parameters = nfs_fs_parameters, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); #if IS_ENABLED(CONFIG_NFS_V4) struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .init_fs_context = nfs_init_fs_context, .parameters = nfs_fs_parameters, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; MODULE_ALIAS_FS("nfs4"); MODULE_ALIAS("nfs4"); EXPORT_SYMBOL_GPL(nfs4_fs_type); #endif /* CONFIG_NFS_V4 */
194 194 123 123 308 130 4095 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_MMU_CONTEXT_H #define _ASM_X86_MMU_CONTEXT_H #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/pkeys.h> #include <trace/events/tlb.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> #include <asm/debugreg.h> #include <asm/gsseg.h> #include <asm/desc.h> extern atomic64_t last_mm_ctx_id; #ifdef CONFIG_PERF_EVENTS DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); void cr4_update_pce(void *ignored); #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL /* * ldt_structs can be allocated, used, and freed, but they are never * modified while live. */ struct ldt_struct { /* * Xen requires page-aligned LDTs with special permissions. This is * needed to prevent us from installing evil descriptors such as * call gates. On native, we could merge the ldt_struct and LDT * allocations, but it's not worth trying to optimize. */ struct desc_struct *entries; unsigned int nr_entries; /* * If PTI is in use, then the entries array is not mapped while we're * in user mode. The whole array will be aliased at the addressed * given by ldt_slot_va(slot). We use two slots so that we can allocate * and map, and enable a new LDT without invalidating the mapping * of an older, still-in-use LDT. * * slot will be -1 if this LDT doesn't have an alias mapping. */ int slot; }; /* * Used for LDT copy/destruction. */ static inline void init_new_context_ldt(struct mm_struct *mm) { mm->context.ldt = NULL; init_rwsem(&mm->context.ldt_usr_sem); } int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); void ldt_arch_exit_mmap(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ static inline void init_new_context_ldt(struct mm_struct *mm) { } static inline int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm) { return 0; } static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL extern void load_mm_ldt(struct mm_struct *mm); extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); #else static inline void load_mm_ldt(struct mm_struct *mm) { clear_LDT(); } static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { DEBUG_LOCKS_WARN_ON(preemptible()); } #endif #ifdef CONFIG_ADDRESS_MASKING static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) { /* * When switch_mm_irqs_off() is called for a kthread, it may race with * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it * reads a single value for both. */ return READ_ONCE(mm->context.lam_cr3_mask); } static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) { mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask; mm->context.untag_mask = oldmm->context.untag_mask; } #define mm_untag_mask mm_untag_mask static inline unsigned long mm_untag_mask(struct mm_struct *mm) { return mm->context.untag_mask; } static inline void mm_reset_untag_mask(struct mm_struct *mm) { mm->context.untag_mask = -1UL; } #define arch_pgtable_dma_compat arch_pgtable_dma_compat static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) { return !mm_lam_cr3_mask(mm) || test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags); } #else static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) { return 0; } static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) { } static inline void mm_reset_untag_mask(struct mm_struct *mm) { } #endif #define enter_lazy_tlb enter_lazy_tlb extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); #define mm_init_global_asid mm_init_global_asid extern void mm_init_global_asid(struct mm_struct *mm); extern void mm_free_global_asid(struct mm_struct *mm); /* * Init a new mm. Used on mm copies, like at fork() * and on mm's that are brand-new, like at execve(). */ #define init_new_context init_new_context static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { mutex_init(&mm->context.lock); mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } #endif mm_init_global_asid(mm); mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; } #define destroy_context destroy_context static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); mm_free_global_asid(mm); } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); #define switch_mm_irqs_off switch_mm_irqs_off #define activate_mm(prev, next) \ do { \ paravirt_enter_mmap(next); \ switch_mm_irqs_off((prev), (next), NULL); \ } while (0); #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ loadsegment(gs, 0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ do { \ shstk_free(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) #endif static inline void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; /* Duplicate the oldmm pkey state in mm: */ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; #endif } static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { arch_dup_pkeys(oldmm, mm); paravirt_enter_mmap(mm); dup_lam(oldmm, mm); return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) { paravirt_arch_exit_mmap(mm); ldt_arch_exit_mmap(mm); } #ifdef CONFIG_X86_64 static inline bool is_64bit_mm(struct mm_struct *mm) { return !IS_ENABLED(CONFIG_IA32_EMULATION) || !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags); } #else static inline bool is_64bit_mm(struct mm_struct *mm) { return false; } #endif static inline bool is_notrack_mm(struct mm_struct *mm) { return test_bit(MM_CONTEXT_NOTRACK, &mm->context.flags); } static inline void set_notrack_mm(struct mm_struct *mm) { set_bit(MM_CONTEXT_NOTRACK, &mm->context.flags); } /* * We only want to enforce protection keys on the current process * because we effectively have no access to PKRU for other * processes or any way to tell *which * PKRU in a threaded * process we could use. * * So do not enforce things if the VMA is not from the current * mm, or if we are in a kernel thread. */ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { /* pkeys never affect instruction fetches */ if (execute) return true; /* allow access if the VMA is not one from this process */ if (foreign || vma_is_foreign(vma)) return true; return __pkru_allows_pkey(vma_pkey(vma), write); } unsigned long __get_current_cr3_fast(void); #include <asm-generic/mmu_context.h> extern struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm); extern void unuse_temporary_mm(struct mm_struct *prev_mm); #endif /* _ASM_X86_MMU_CONTEXT_H */
35 35 35 35 35 27 27 17 32 32 32 17 4 27 30 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix * Copyright (C) 2006 Andrey Volkov, Varma Electronics * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/can-ml.h> #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/gpio/consumer.h> #include <linux/of.h> static void can_update_state_error_stats(struct net_device *dev, enum can_state new_state) { struct can_priv *priv = netdev_priv(dev); if (new_state <= priv->state) return; switch (new_state) { case CAN_STATE_ERROR_WARNING: priv->can_stats.error_warning++; break; case CAN_STATE_ERROR_PASSIVE: priv->can_stats.error_passive++; break; case CAN_STATE_BUS_OFF: priv->can_stats.bus_off++; break; default: break; } } static int can_tx_state_to_frame(struct net_device *dev, enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: return CAN_ERR_CRTL_ACTIVE; case CAN_STATE_ERROR_WARNING: return CAN_ERR_CRTL_TX_WARNING; case CAN_STATE_ERROR_PASSIVE: return CAN_ERR_CRTL_TX_PASSIVE; default: return 0; } } static int can_rx_state_to_frame(struct net_device *dev, enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: return CAN_ERR_CRTL_ACTIVE; case CAN_STATE_ERROR_WARNING: return CAN_ERR_CRTL_RX_WARNING; case CAN_STATE_ERROR_PASSIVE: return CAN_ERR_CRTL_RX_PASSIVE; default: return 0; } } const char *can_get_state_str(const enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: return "Error Active"; case CAN_STATE_ERROR_WARNING: return "Error Warning"; case CAN_STATE_ERROR_PASSIVE: return "Error Passive"; case CAN_STATE_BUS_OFF: return "Bus Off"; case CAN_STATE_STOPPED: return "Stopped"; case CAN_STATE_SLEEPING: return "Sleeping"; default: return "<unknown>"; } } EXPORT_SYMBOL_GPL(can_get_state_str); static enum can_state can_state_err_to_state(u16 err) { if (err < CAN_ERROR_WARNING_THRESHOLD) return CAN_STATE_ERROR_ACTIVE; if (err < CAN_ERROR_PASSIVE_THRESHOLD) return CAN_STATE_ERROR_WARNING; if (err < CAN_BUS_OFF_THRESHOLD) return CAN_STATE_ERROR_PASSIVE; return CAN_STATE_BUS_OFF; } void can_state_get_by_berr_counter(const struct net_device *dev, const struct can_berr_counter *bec, enum can_state *tx_state, enum can_state *rx_state) { *tx_state = can_state_err_to_state(bec->txerr); *rx_state = can_state_err_to_state(bec->rxerr); } EXPORT_SYMBOL_GPL(can_state_get_by_berr_counter); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state) { struct can_priv *priv = netdev_priv(dev); enum can_state new_state = max(tx_state, rx_state); if (unlikely(new_state == priv->state)) { netdev_warn(dev, "%s: oops, state did not change", __func__); return; } netdev_dbg(dev, "Controller changed from %s State (%d) into %s State (%d).\n", can_get_state_str(priv->state), priv->state, can_get_state_str(new_state), new_state); can_update_state_error_stats(dev, new_state); priv->state = new_state; if (!cf) return; if (unlikely(new_state == CAN_STATE_BUS_OFF)) { cf->can_id |= CAN_ERR_BUSOFF; return; } cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= tx_state >= rx_state ? can_tx_state_to_frame(dev, tx_state) : 0; cf->data[1] |= tx_state <= rx_state ? can_rx_state_to_frame(dev, rx_state) : 0; } EXPORT_SYMBOL_GPL(can_change_state); /* CAN device restart for bus-off recovery */ static int can_restart(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); struct sk_buff *skb; struct can_frame *cf; int err; if (!priv->do_set_mode) return -EOPNOTSUPP; if (netif_carrier_ok(dev)) netdev_err(dev, "Attempt to restart for bus-off recovery, but carrier is OK?\n"); /* No synchronization needed because the device is bus-off and * no messages can come in or go out. */ can_flush_echo_skb(dev); /* send restart message upstream */ skb = alloc_can_err_skb(dev, &cf); if (skb) { cf->can_id |= CAN_ERR_RESTARTED; netif_rx(skb); } /* Now restart the device */ netif_carrier_on(dev); err = priv->do_set_mode(dev, CAN_MODE_START); if (err) { netdev_err(dev, "Restart failed, error %pe\n", ERR_PTR(err)); netif_carrier_off(dev); return err; } else { netdev_dbg(dev, "Restarted\n"); priv->can_stats.restarts++; } return 0; } static void can_restart_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct can_priv *priv = container_of(dwork, struct can_priv, restart_work); can_restart(priv->dev); } int can_restart_now(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); /* A manual restart is only permitted if automatic restart is * disabled and the device is in the bus-off state */ if (priv->restart_ms) return -EINVAL; if (priv->state != CAN_STATE_BUS_OFF) return -EBUSY; cancel_delayed_work_sync(&priv->restart_work); return can_restart(dev); } /* CAN bus-off * * This functions should be called when the device goes bus-off to * tell the netif layer that no more packets can be sent or received. * If enabled, a timer is started to trigger bus-off recovery. */ void can_bus_off(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); if (priv->restart_ms) netdev_info(dev, "bus-off, scheduling restart in %d ms\n", priv->restart_ms); else netdev_info(dev, "bus-off\n"); netif_carrier_off(dev); if (priv->restart_ms) schedule_delayed_work(&priv->restart_work, msecs_to_jiffies(priv->restart_ms)); } EXPORT_SYMBOL_GPL(can_bus_off); void can_setup(struct net_device *dev) { dev->type = ARPHRD_CAN; dev->mtu = CAN_MTU; dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 10; /* New-style flags. */ dev->flags = IFF_NOARP; dev->features = NETIF_F_HW_CSUM; } /* Allocate and setup space for the CAN network device */ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs) { struct can_ml_priv *can_ml; struct net_device *dev; struct can_priv *priv; int size; /* We put the driver's priv, the CAN mid layer priv and the * echo skb into the netdevice's priv. The memory layout for * the netdev_priv is like this: * * +-------------------------+ * | driver's priv | * +-------------------------+ * | struct can_ml_priv | * +-------------------------+ * | array of struct sk_buff | * +-------------------------+ */ size = ALIGN(sizeof_priv, NETDEV_ALIGN) + sizeof(struct can_ml_priv); if (echo_skb_max) size = ALIGN(size, sizeof(struct sk_buff *)) + echo_skb_max * sizeof(struct sk_buff *); dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup, txqs, rxqs); if (!dev) return NULL; priv = netdev_priv(dev); priv->dev = dev; can_ml = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN); can_set_ml_priv(dev, can_ml); if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; priv->echo_skb = (void *)priv + (size - echo_skb_max * sizeof(struct sk_buff *)); } priv->state = CAN_STATE_STOPPED; INIT_DELAYED_WORK(&priv->restart_work, can_restart_work); return dev; } EXPORT_SYMBOL_GPL(alloc_candev_mqs); /* Free space of the CAN network device */ void free_candev(struct net_device *dev) { free_netdev(dev); } EXPORT_SYMBOL_GPL(free_candev); /* changing MTU and control mode for CAN/CANFD devices */ int can_change_mtu(struct net_device *dev, int new_mtu) { struct can_priv *priv = netdev_priv(dev); u32 ctrlmode_static = can_get_static_ctrlmode(priv); /* Do not allow changing the MTU while running */ if (dev->flags & IFF_UP) return -EBUSY; /* allow change of MTU according to the CANFD ability of the device */ switch (new_mtu) { case CAN_MTU: /* 'CANFD-only' controllers can not switch to CAN_MTU */ if (ctrlmode_static & CAN_CTRLMODE_FD) return -EINVAL; priv->ctrlmode &= ~CAN_CTRLMODE_FD; break; case CANFD_MTU: /* check for potential CANFD ability */ if (!(priv->ctrlmode_supported & CAN_CTRLMODE_FD) && !(ctrlmode_static & CAN_CTRLMODE_FD)) return -EINVAL; priv->ctrlmode |= CAN_CTRLMODE_FD; break; default: return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL_GPL(can_change_mtu); /* generic implementation of netdev_ops::ndo_eth_ioctl for CAN devices * supporting hardware timestamps */ int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct hwtstamp_config hwts_cfg = { 0 }; switch (cmd) { case SIOCSHWTSTAMP: /* set */ if (copy_from_user(&hwts_cfg, ifr->ifr_data, sizeof(hwts_cfg))) return -EFAULT; if (hwts_cfg.tx_type == HWTSTAMP_TX_ON && hwts_cfg.rx_filter == HWTSTAMP_FILTER_ALL) return 0; return -ERANGE; case SIOCGHWTSTAMP: /* get */ hwts_cfg.tx_type = HWTSTAMP_TX_ON; hwts_cfg.rx_filter = HWTSTAMP_FILTER_ALL; if (copy_to_user(ifr->ifr_data, &hwts_cfg, sizeof(hwts_cfg))) return -EFAULT; return 0; default: return -EOPNOTSUPP; } } EXPORT_SYMBOL(can_eth_ioctl_hwts); /* generic implementation of ethtool_ops::get_ts_info for CAN devices * supporting hardware timestamps */ int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_ON); info->rx_filters = BIT(HWTSTAMP_FILTER_ALL); return 0; } EXPORT_SYMBOL(can_ethtool_op_get_ts_info_hwts); /* Common open function when the device gets opened. * * This function should be called in the open function of the device * driver. */ int open_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); if (!priv->bittiming.bitrate) { netdev_err(dev, "bit-timing not yet defined\n"); return -EINVAL; } /* For CAN FD the data bitrate has to be >= the arbitration bitrate */ if ((priv->ctrlmode & CAN_CTRLMODE_FD) && (!priv->fd.data_bittiming.bitrate || priv->fd.data_bittiming.bitrate < priv->bittiming.bitrate)) { netdev_err(dev, "incorrect/missing data bit-timing\n"); return -EINVAL; } /* Switch carrier on if device was stopped while in bus-off state */ if (!netif_carrier_ok(dev)) netif_carrier_on(dev); return 0; } EXPORT_SYMBOL_GPL(open_candev); #ifdef CONFIG_OF /* Common function that can be used to understand the limitation of * a transceiver when it provides no means to determine these limitations * at runtime. */ void of_can_transceiver(struct net_device *dev) { struct device_node *dn; struct can_priv *priv = netdev_priv(dev); struct device_node *np = dev->dev.parent->of_node; int ret; dn = of_get_child_by_name(np, "can-transceiver"); if (!dn) return; ret = of_property_read_u32(dn, "max-bitrate", &priv->bitrate_max); of_node_put(dn); if ((ret && ret != -EINVAL) || (!ret && !priv->bitrate_max)) netdev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit.\n"); } EXPORT_SYMBOL_GPL(of_can_transceiver); #endif /* Common close function for cleanup before the device gets closed. * * This function should be called in the close function of the device * driver. */ void close_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); cancel_delayed_work_sync(&priv->restart_work); can_flush_echo_skb(dev); } EXPORT_SYMBOL_GPL(close_candev); static int can_set_termination(struct net_device *ndev, u16 term) { struct can_priv *priv = netdev_priv(ndev); int set; if (term == priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED]) set = 1; else set = 0; gpiod_set_value_cansleep(priv->termination_gpio, set); return 0; } static int can_get_termination(struct net_device *ndev) { struct can_priv *priv = netdev_priv(ndev); struct device *dev = ndev->dev.parent; struct gpio_desc *gpio; u32 term; int ret; /* Disabling termination by default is the safe choice: Else if many * bus participants enable it, no communication is possible at all. */ gpio = devm_gpiod_get_optional(dev, "termination", GPIOD_OUT_LOW); if (IS_ERR(gpio)) return dev_err_probe(dev, PTR_ERR(gpio), "Cannot get termination-gpios\n"); if (!gpio) return 0; ret = device_property_read_u32(dev, "termination-ohms", &term); if (ret) { netdev_err(ndev, "Cannot get termination-ohms: %pe\n", ERR_PTR(ret)); return ret; } if (term > U16_MAX) { netdev_err(ndev, "Invalid termination-ohms value (%u > %u)\n", term, U16_MAX); return -EINVAL; } priv->termination_const_cnt = ARRAY_SIZE(priv->termination_gpio_ohms); priv->termination_const = priv->termination_gpio_ohms; priv->termination_gpio = gpio; priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_DISABLED] = CAN_TERMINATION_DISABLED; priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED] = term; priv->do_set_termination = can_set_termination; return 0; } static bool can_bittiming_const_valid(const struct can_bittiming_const *btc) { if (!btc) return true; if (!btc->sjw_max) return false; return true; } /* Register the CAN network device */ int register_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); int err; /* Ensure termination_const, termination_const_cnt and * do_set_termination consistency. All must be either set or * unset. */ if ((!priv->termination_const != !priv->termination_const_cnt) || (!priv->termination_const != !priv->do_set_termination)) return -EINVAL; if (!priv->bitrate_const != !priv->bitrate_const_cnt) return -EINVAL; if (!priv->fd.data_bitrate_const != !priv->fd.data_bitrate_const_cnt) return -EINVAL; /* We only support either fixed bit rates or bit timing const. */ if ((priv->bitrate_const || priv->fd.data_bitrate_const) && (priv->bittiming_const || priv->fd.data_bittiming_const)) return -EINVAL; if (!can_bittiming_const_valid(priv->bittiming_const) || !can_bittiming_const_valid(priv->fd.data_bittiming_const)) return -EINVAL; if (!priv->termination_const) { err = can_get_termination(dev); if (err) return err; } dev->rtnl_link_ops = &can_link_ops; netif_carrier_off(dev); return register_netdev(dev); } EXPORT_SYMBOL_GPL(register_candev); /* Unregister the CAN network device */ void unregister_candev(struct net_device *dev) { unregister_netdev(dev); } EXPORT_SYMBOL_GPL(unregister_candev); /* Test if a network device is a candev based device * and return the can_priv* if so. */ struct can_priv *safe_candev_priv(struct net_device *dev) { if (dev->type != ARPHRD_CAN || dev->rtnl_link_ops != &can_link_ops) return NULL; return netdev_priv(dev); } EXPORT_SYMBOL_GPL(safe_candev_priv); static __init int can_dev_init(void) { int err; err = can_netlink_register(); if (!err) pr_info("CAN device driver interface\n"); return err; } module_init(can_dev_init); static __exit void can_dev_exit(void) { can_netlink_unregister(); } module_exit(can_dev_exit); MODULE_ALIAS_RTNL_LINK("can");
4 2 4 1 39 3 24 5 82 83 82 55 22 18 82 39 40 40 1 1 1 1 1 2 1 1 56 71 1 5 37 3 38 38 38 1 37 4 29 36 1 1 21 14 36 3 11 27 166 167 4 3 2 5 2 4 4 30 17 1 12 13 26 29 98 55 78 7 98 12 15 9 8 1 9 34 1 34 1 25 8 24 23 152 1 7 124 59 115 123 17 101 11 82 2 85 9 92 124 125 1 125 125 1 1 9 3 6 3 2 3 2 166 4 37 1 18 1 12 14 1 25 25 25 2 9 10 18 18 1 16 9 7 28 2 26 12 16 27 1 14 5 10 9 134 40 120 18 18 18 17 1 18 18 18 10 9 18 10 9 8 12 5 19 18 24 24 16 11 24 24 20 20 16 9 9 20 11 24 24 8 12 11 1 39 1 40 40 41 14 4 28 41 2 1 41 6 6 1 38 3 2 1 5 25 5 17 1 16 50 1 4 4 4 139 141 141 1 1 136 1 1 12 1 11 12 5 1 3 1 3 1 1 1 1 1 46 46 1 1 4 1 3 3 3 1 3 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 452 238 693 690 661 1 4 1 47 1 27 1 1 1 1 2 1 1 3 5 2 12 5 435 9 41 155 247 1 195 29 29 149 124 26 137 5 130 131 139 139 139 137 131 131 41 41 1 141 131 141 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds */ /* * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles * or rs-channels. It also implements echoing, cooked mode etc. * * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0. * * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the * tty_struct and tty_queue structures. Previously there was an array * of 256 tty_struct's which was statically allocated, and the * tty_queue structures were allocated at boot time. Both are now * dynamically allocated only when the tty is open. * * Also restructured routines so that there is more of a separation * between the high-level tty routines (tty_io.c and tty_ioctl.c) and * the low-level tty routines (serial.c, pty.c, console.c). This * makes for cleaner and more compact code. -TYT, 9/17/92 * * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines * which can be dynamically activated and de-activated by the line * discipline handling modules (like SLIP). * * NOTE: pay no attention to the line discipline code (yet); its * interface is still subject to change in this version... * -- TYT, 1/31/92 * * Added functionality to the OPOST tty handling. No delays, but all * other bits should be there. * -- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993. * * Rewrote canonical mode and added more termios flags. * -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94 * * Reorganized FASYNC support so mouse code can share it. * -- ctm@ardi.com, 9Sep95 * * New TIOCLINUX variants added. * -- mj@k332.feld.cvut.cz, 19-Nov-95 * * Restrict vt switching via ioctl() * -- grif@cs.ucr.edu, 5-Dec-95 * * Move console and virtual terminal code to more appropriate files, * implement CONFIG_VT and generalize console device interface. * -- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97 * * Rewrote tty_init_dev and tty_release_dev to eliminate races. * -- Bill Hawes <whawes@star.net>, June 97 * * Added devfs support. * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998 * * Added support for a Unix98-style ptmx device. * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998 * * Reduced memory usage for older ARM systems * -- Russell King <rmk@arm.linux.org.uk> * * Move do_SAK() into process context. Less stack use in devfs functions. * alloc_tty_struct() always uses kmalloc() * -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01 */ #include <linux/types.h> #include <linux/major.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/fcntl.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/devpts_fs.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/console.h> #include <linux/timer.h> #include <linux/ctype.h> #include <linux/kd.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/ppp-ioctl.h> #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/module.h> #include <linux/device.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/delay.h> #include <linux/seq_file.h> #include <linux/serial.h> #include <linux/ratelimit.h> #include <linux/compat.h> #include <linux/uaccess.h> #include <linux/termios_internal.h> #include <linux/fs.h> #include <linux/kbd_kern.h> #include <linux/vt_kern.h> #include <linux/selection.h> #include <linux/kmod.h> #include <linux/nsproxy.h> #include "tty.h" #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP # define tty_debug_hangup(tty, f, args...) tty_debug(tty, f, ##args) #else # define tty_debug_hangup(tty, f, args...) do { } while (0) #endif #define TTY_PARANOIA_CHECK 1 #define CHECK_TTY_COUNT 1 struct ktermios tty_std_termios = { /* for the benefit of tty drivers */ .c_iflag = ICRNL | IXON, .c_oflag = OPOST | ONLCR, .c_cflag = B38400 | CS8 | CREAD | HUPCL, .c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN, .c_cc = INIT_C_CC, .c_ispeed = 38400, .c_ospeed = 38400, /* .c_line = N_TTY, */ }; EXPORT_SYMBOL(tty_std_termios); /* This list gets poked at by procfs and various bits of boot up code. This * could do with some rationalisation such as pulling the tty proc function * into this file. */ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ /* Mutex to protect creating and releasing a tty */ DEFINE_MUTEX(tty_mutex); static ssize_t tty_read(struct kiocb *, struct iov_iter *); static ssize_t tty_write(struct kiocb *, struct iov_iter *); static __poll_t tty_poll(struct file *, poll_table *); static int tty_open(struct inode *, struct file *); #ifdef CONFIG_COMPAT static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #else #define tty_compat_ioctl NULL #endif static int __tty_fasync(int fd, struct file *filp, int on); static int tty_fasync(int fd, struct file *filp, int on); static void release_tty(struct tty_struct *tty, int idx); /** * free_tty_struct - free a disused tty * @tty: tty struct to free * * Free the write buffers, tty queue and tty memory itself. * * Locking: none. Must be called after tty is definitely unused */ static void free_tty_struct(struct tty_struct *tty) { tty_ldisc_deinit(tty); put_device(tty->dev); kvfree(tty->write_buf); kfree(tty); } static inline struct tty_struct *file_tty(struct file *file) { return ((struct tty_file_private *)file->private_data)->tty; } int tty_alloc_file(struct file *file) { struct tty_file_private *priv; priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; file->private_data = priv; return 0; } /* Associate a new file with the tty structure */ void tty_add_file(struct tty_struct *tty, struct file *file) { struct tty_file_private *priv = file->private_data; priv->tty = tty; priv->file = file; spin_lock(&tty->files_lock); list_add(&priv->list, &tty->tty_files); spin_unlock(&tty->files_lock); } /** * tty_free_file - free file->private_data * @file: to free private_data of * * This shall be used only for fail path handling when tty_add_file was not * called yet. */ void tty_free_file(struct file *file) { struct tty_file_private *priv = file->private_data; file->private_data = NULL; kfree(priv); } /* Delete file from its tty */ static void tty_del_file(struct file *file) { struct tty_file_private *priv = file->private_data; struct tty_struct *tty = priv->tty; spin_lock(&tty->files_lock); list_del(&priv->list); spin_unlock(&tty->files_lock); tty_free_file(file); } /** * tty_name - return tty naming * @tty: tty structure * * Convert a tty structure into a name. The name reflects the kernel naming * policy and if udev is in use may not reflect user space * * Locking: none */ const char *tty_name(const struct tty_struct *tty) { if (!tty) /* Hmm. NULL pointer. That's fun. */ return "NULL tty"; return tty->name; } EXPORT_SYMBOL(tty_name); const char *tty_driver_name(const struct tty_struct *tty) { if (!tty || !tty->driver) return ""; return tty->driver->name; } static int tty_paranoia_check(struct tty_struct *tty, struct inode *inode, const char *routine) { #ifdef TTY_PARANOIA_CHECK if (!tty) { pr_warn("(%d:%d): %s: NULL tty\n", imajor(inode), iminor(inode), routine); return 1; } #endif return 0; } /* Caller must hold tty_lock */ static void check_tty_count(struct tty_struct *tty, const char *routine) { #ifdef CHECK_TTY_COUNT struct list_head *p; int count = 0, kopen_count = 0; scoped_guard(spinlock, &tty->files_lock) list_for_each(p, &tty->tty_files) count++; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) count++; if (tty_port_kopened(tty->port)) kopen_count++; if (tty->count != (count + kopen_count)) { tty_warn(tty, "%s: tty->count(%d) != (#fd's(%d) + #kopen's(%d))\n", routine, tty->count, count, kopen_count); } #endif } /** * get_tty_driver - find device of a tty * @device: device identifier * @index: returns the index of the tty * * This routine returns a tty driver structure, given a device number and also * passes back the index number. * * Locking: caller must hold tty_mutex */ static struct tty_driver *get_tty_driver(dev_t device, int *index) { struct tty_driver *p; list_for_each_entry(p, &tty_drivers, tty_drivers) { dev_t base = MKDEV(p->major, p->minor_start); if (device < base || device >= base + p->num) continue; *index = device - base; return tty_driver_kref_get(p); } return NULL; } /** * tty_dev_name_to_number - return dev_t for device name * @name: user space name of device under /dev * @number: pointer to dev_t that this function will populate * * This function converts device names like ttyS0 or ttyUSB1 into dev_t like * (4, 64) or (188, 1). If no corresponding driver is registered then the * function returns -%ENODEV. * * Locking: this acquires tty_mutex to protect the tty_drivers list from * being modified while we are traversing it, and makes sure to * release it before exiting. */ int tty_dev_name_to_number(const char *name, dev_t *number) { struct tty_driver *p; int ret; int index, prefix_length = 0; const char *str; for (str = name; *str && !isdigit(*str); str++) ; if (!*str) return -EINVAL; ret = kstrtoint(str, 10, &index); if (ret) return ret; prefix_length = str - name; guard(mutex)(&tty_mutex); list_for_each_entry(p, &tty_drivers, tty_drivers) if (prefix_length == strlen(p->name) && strncmp(name, p->name, prefix_length) == 0) { if (index < p->num) { *number = MKDEV(p->major, p->minor_start + index); return 0; } } return -ENODEV; } EXPORT_SYMBOL_GPL(tty_dev_name_to_number); #ifdef CONFIG_CONSOLE_POLL /** * tty_find_polling_driver - find device of a polled tty * @name: name string to match * @line: pointer to resulting tty line nr * * This routine returns a tty driver structure, given a name and the condition * that the tty driver is capable of polled operation. */ struct tty_driver *tty_find_polling_driver(char *name, int *line) { struct tty_driver *p; int tty_line = 0; int len; char *str, *stp; for (str = name; *str; str++) if ((*str >= '0' && *str <= '9') || *str == ',') break; if (!*str) return NULL; len = str - name; tty_line = simple_strtoul(str, &str, 10); guard(mutex)(&tty_mutex); /* Search through the tty devices to look for a match */ list_for_each_entry(p, &tty_drivers, tty_drivers) { if (!len || strncmp(name, p->name, len) != 0) continue; stp = str; if (*stp == ',') stp++; if (*stp == '\0') stp = NULL; if (tty_line >= 0 && tty_line < p->num && p->ops && p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) { *line = tty_line; return tty_driver_kref_get(p); } } return NULL; } EXPORT_SYMBOL_GPL(tty_find_polling_driver); #endif static ssize_t hung_up_tty_read(struct kiocb *iocb, struct iov_iter *to) { return 0; } static ssize_t hung_up_tty_write(struct kiocb *iocb, struct iov_iter *from) { return -EIO; } /* No kernel lock held - none needed ;) */ static __poll_t hung_up_tty_poll(struct file *filp, poll_table *wait) { return EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | EPOLLWRNORM; } static long hung_up_tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return cmd == TIOCSPGRP ? -ENOTTY : -EIO; } static long hung_up_tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return cmd == TIOCSPGRP ? -ENOTTY : -EIO; } static int hung_up_tty_fasync(int fd, struct file *file, int on) { return -ENOTTY; } static void tty_show_fdinfo(struct seq_file *m, struct file *file) { struct tty_struct *tty = file_tty(file); if (tty && tty->ops && tty->ops->show_fdinfo) tty->ops->show_fdinfo(tty, m); } static const struct file_operations tty_fops = { .read_iter = tty_read, .write_iter = tty_write, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, .open = tty_open, .release = tty_release, .fasync = tty_fasync, .show_fdinfo = tty_show_fdinfo, }; static const struct file_operations console_fops = { .read_iter = tty_read, .write_iter = redirected_tty_write, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, .open = tty_open, .release = tty_release, .fasync = tty_fasync, }; static const struct file_operations hung_up_tty_fops = { .read_iter = hung_up_tty_read, .write_iter = hung_up_tty_write, .poll = hung_up_tty_poll, .unlocked_ioctl = hung_up_tty_ioctl, .compat_ioctl = hung_up_tty_compat_ioctl, .release = tty_release, .fasync = hung_up_tty_fasync, }; static DEFINE_SPINLOCK(redirect_lock); static struct file *redirect; /** * tty_wakeup - request more data * @tty: terminal * * Internal and external helper for wakeups of tty. This function informs the * line discipline if present that the driver is ready to receive more output * data. */ void tty_wakeup(struct tty_struct *tty) { struct tty_ldisc *ld; if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) { ld = tty_ldisc_ref(tty); if (ld) { if (ld->ops->write_wakeup) ld->ops->write_wakeup(tty); tty_ldisc_deref(ld); } } wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT); } EXPORT_SYMBOL_GPL(tty_wakeup); /** * tty_release_redirect - Release a redirect on a pty if present * @tty: tty device * * This is available to the pty code so if the master closes, if the slave is a * redirect it can release the redirect. */ static struct file *tty_release_redirect(struct tty_struct *tty) { guard(spinlock)(&redirect_lock); if (redirect && file_tty(redirect) == tty) { struct file *f = redirect; redirect = NULL; return f; } return NULL; } /** * __tty_hangup - actual handler for hangup events * @tty: tty device * @exit_session: if non-zero, signal all foreground group processes * * This can be called by a "kworker" kernel thread. That is process synchronous * but doesn't hold any locks, so we need to make sure we have the appropriate * locks for what we're doing. * * The hangup event clears any pending redirections onto the hung up device. It * ensures future writes will error and it does the needed line discipline * hangup and signal delivery. The tty object itself remains intact. * * Locking: * * BTM * * * redirect lock for undoing redirection * * file list lock for manipulating list of ttys * * tty_ldiscs_lock from called functions * * termios_rwsem resetting termios data * * tasklist_lock to walk task list for hangup event * * * ->siglock to protect ->signal/->sighand * */ static void __tty_hangup(struct tty_struct *tty, int exit_session) { struct file *cons_filp = NULL; struct file *filp, *f; struct tty_file_private *priv; int closecount = 0, n; int refs; if (!tty) return; f = tty_release_redirect(tty); tty_lock(tty); if (test_bit(TTY_HUPPED, &tty->flags)) { tty_unlock(tty); return; } /* * Some console devices aren't actually hung up for technical and * historical reasons, which can lead to indefinite interruptible * sleep in n_tty_read(). The following explicitly tells * n_tty_read() to abort readers. */ set_bit(TTY_HUPPING, &tty->flags); /* inuse_filps is protected by the single tty lock, * this really needs to change if we want to flush the * workqueue with the lock held. */ check_tty_count(tty, "tty_hangup"); spin_lock(&tty->files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(priv, &tty->tty_files, list) { filp = priv->file; if (filp->f_op->write_iter == redirected_tty_write) cons_filp = filp; if (filp->f_op->write_iter != tty_write) continue; closecount++; __tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } spin_unlock(&tty->files_lock); refs = tty_signal_session_leader(tty, exit_session); /* Account for the p->signal references we killed */ while (refs--) tty_kref_put(tty); tty_ldisc_hangup(tty, cons_filp != NULL); spin_lock_irq(&tty->ctrl.lock); clear_bit(TTY_THROTTLED, &tty->flags); clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); put_pid(tty->ctrl.session); put_pid(tty->ctrl.pgrp); tty->ctrl.session = NULL; tty->ctrl.pgrp = NULL; tty->ctrl.pktstatus = 0; spin_unlock_irq(&tty->ctrl.lock); /* * If one of the devices matches a console pointer, we * cannot just call hangup() because that will cause * tty->count and state->count to go out of sync. * So we just call close() the right number of times. */ if (cons_filp) { if (tty->ops->close) for (n = 0; n < closecount; n++) tty->ops->close(tty, cons_filp); } else if (tty->ops->hangup) tty->ops->hangup(tty); /* * We don't want to have driver/ldisc interactions beyond the ones * we did here. The driver layer expects no calls after ->hangup() * from the ldisc side, which is now guaranteed. */ set_bit(TTY_HUPPED, &tty->flags); clear_bit(TTY_HUPPING, &tty->flags); tty_unlock(tty); if (f) fput(f); } static void do_tty_hangup(struct work_struct *work) { struct tty_struct *tty = container_of(work, struct tty_struct, hangup_work); __tty_hangup(tty, 0); } /** * tty_hangup - trigger a hangup event * @tty: tty to hangup * * A carrier loss (virtual or otherwise) has occurred on @tty. Schedule a * hangup sequence to run after this event. */ void tty_hangup(struct tty_struct *tty) { tty_debug_hangup(tty, "hangup\n"); schedule_work(&tty->hangup_work); } EXPORT_SYMBOL(tty_hangup); /** * tty_vhangup - process vhangup * @tty: tty to hangup * * The user has asked via system call for the terminal to be hung up. We do * this synchronously so that when the syscall returns the process is complete. * That guarantee is necessary for security reasons. */ void tty_vhangup(struct tty_struct *tty) { tty_debug_hangup(tty, "vhangup\n"); __tty_hangup(tty, 0); } EXPORT_SYMBOL(tty_vhangup); /** * tty_vhangup_self - process vhangup for own ctty * * Perform a vhangup on the current controlling tty */ void tty_vhangup_self(void) { struct tty_struct *tty; tty = get_current_tty(); if (tty) { tty_vhangup(tty); tty_kref_put(tty); } } /** * tty_vhangup_session - hangup session leader exit * @tty: tty to hangup * * The session leader is exiting and hanging up its controlling terminal. * Every process in the foreground process group is signalled %SIGHUP. * * We do this synchronously so that when the syscall returns the process is * complete. That guarantee is necessary for security reasons. */ void tty_vhangup_session(struct tty_struct *tty) { tty_debug_hangup(tty, "session hangup\n"); __tty_hangup(tty, 1); } /** * tty_hung_up_p - was tty hung up * @filp: file pointer of tty * * Return: true if the tty has been subject to a vhangup or a carrier loss */ int tty_hung_up_p(struct file *filp) { return (filp && filp->f_op == &hung_up_tty_fops); } EXPORT_SYMBOL(tty_hung_up_p); void __stop_tty(struct tty_struct *tty) { if (tty->flow.stopped) return; tty->flow.stopped = true; if (tty->ops->stop) tty->ops->stop(tty); } /** * stop_tty - propagate flow control * @tty: tty to stop * * Perform flow control to the driver. May be called on an already stopped * device and will not re-call the &tty_driver->stop() method. * * This functionality is used by both the line disciplines for halting incoming * flow and by the driver. It may therefore be called from any context, may be * under the tty %atomic_write_lock but not always. * * Locking: * flow.lock */ void stop_tty(struct tty_struct *tty) { guard(spinlock_irqsave)(&tty->flow.lock); __stop_tty(tty); } EXPORT_SYMBOL(stop_tty); void __start_tty(struct tty_struct *tty) { if (!tty->flow.stopped || tty->flow.tco_stopped) return; tty->flow.stopped = false; if (tty->ops->start) tty->ops->start(tty); tty_wakeup(tty); } /** * start_tty - propagate flow control * @tty: tty to start * * Start a tty that has been stopped if at all possible. If @tty was previously * stopped and is now being started, the &tty_driver->start() method is invoked * and the line discipline woken. * * Locking: * flow.lock */ void start_tty(struct tty_struct *tty) { guard(spinlock_irqsave)(&tty->flow.lock); __start_tty(tty); } EXPORT_SYMBOL(start_tty); static void tty_update_time(struct tty_struct *tty, bool mtime) { time64_t sec = ktime_get_real_seconds(); struct tty_file_private *priv; guard(spinlock)(&tty->files_lock); list_for_each_entry(priv, &tty->tty_files, list) { struct inode *inode = file_inode(priv->file); struct timespec64 time = mtime ? inode_get_mtime(inode) : inode_get_atime(inode); /* * We only care if the two values differ in anything other than the * lower three bits (i.e every 8 seconds). If so, then we can update * the time of the tty device, otherwise it could be construded as a * security leak to let userspace know the exact timing of the tty. */ if ((sec ^ time.tv_sec) & ~7) { if (mtime) inode_set_mtime(inode, sec, 0); else inode_set_atime(inode, sec, 0); } } } /* * Iterate on the ldisc ->read() function until we've gotten all * the data the ldisc has for us. * * The "cookie" is something that the ldisc read function can fill * in to let us know that there is more data to be had. * * We promise to continue to call the ldisc until it stops returning * data or clears the cookie. The cookie may be something that the * ldisc maintains state for and needs to free. */ static ssize_t iterate_tty_read(struct tty_ldisc *ld, struct tty_struct *tty, struct file *file, struct iov_iter *to) { void *cookie = NULL; unsigned long offset = 0; ssize_t retval = 0; size_t copied, count = iov_iter_count(to); u8 kernel_buf[64]; do { ssize_t size = min(count, sizeof(kernel_buf)); size = ld->ops->read(tty, file, kernel_buf, size, &cookie, offset); if (!size) break; if (size < 0) { /* Did we have an earlier error (ie -EFAULT)? */ if (retval) break; retval = size; /* * -EOVERFLOW means we didn't have enough space * for a whole packet, and we shouldn't return * a partial result. */ if (retval == -EOVERFLOW) offset = 0; break; } copied = copy_to_iter(kernel_buf, size, to); offset += copied; count -= copied; /* * If the user copy failed, we still need to do another ->read() * call if we had a cookie to let the ldisc clear up. * * But make sure size is zeroed. */ if (unlikely(copied != size)) { count = 0; retval = -EFAULT; } } while (cookie); /* We always clear tty buffer in case they contained passwords */ memzero_explicit(kernel_buf, sizeof(kernel_buf)); return offset ? offset : retval; } /** * tty_read - read method for tty device files * @iocb: kernel I/O control block * @to: destination for the data read * * Perform the read system call function on this terminal device. Checks * for hung up devices before calling the line discipline method. * * Locking: * Locks the line discipline internally while needed. Multiple read calls * may be outstanding in parallel. */ static ssize_t tty_read(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; ssize_t ret; if (tty_paranoia_check(tty, inode, "tty_read")) return -EIO; if (!tty || tty_io_error(tty)) return -EIO; /* We want to wait for the line discipline to sort out in this * situation. */ ld = tty_ldisc_ref_wait(tty); if (!ld) return hung_up_tty_read(iocb, to); ret = -EIO; if (ld->ops->read) ret = iterate_tty_read(ld, tty, file, to); tty_ldisc_deref(ld); if (ret > 0) tty_update_time(tty, false); return ret; } void tty_write_unlock(struct tty_struct *tty) { mutex_unlock(&tty->atomic_write_lock); wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT); } int tty_write_lock(struct tty_struct *tty, bool ndelay) { if (!mutex_trylock(&tty->atomic_write_lock)) { if (ndelay) return -EAGAIN; if (mutex_lock_interruptible(&tty->atomic_write_lock)) return -ERESTARTSYS; } return 0; } /* * Split writes up in sane blocksizes to avoid * denial-of-service type attacks */ static ssize_t iterate_tty_write(struct tty_ldisc *ld, struct tty_struct *tty, struct file *file, struct iov_iter *from) { size_t chunk, count = iov_iter_count(from); ssize_t ret, written = 0; ret = tty_write_lock(tty, file->f_flags & O_NDELAY); if (ret < 0) return ret; /* * We chunk up writes into a temporary buffer. This * simplifies low-level drivers immensely, since they * don't have locking issues and user mode accesses. * * But if TTY_NO_WRITE_SPLIT is set, we should use a * big chunk-size.. * * The default chunk-size is 2kB, because the NTTY * layer has problems with bigger chunks. It will * claim to be able to handle more characters than * it actually does. */ chunk = 2048; if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags)) chunk = 65536; if (count < chunk) chunk = count; /* write_buf/write_cnt is protected by the atomic_write_lock mutex */ if (tty->write_cnt < chunk) { u8 *buf_chunk; if (chunk < 1024) chunk = 1024; buf_chunk = kvmalloc(chunk, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!buf_chunk) { ret = -ENOMEM; goto out; } kvfree(tty->write_buf); tty->write_cnt = chunk; tty->write_buf = buf_chunk; } /* Do the write .. */ for (;;) { size_t size = min(chunk, count); ret = -EFAULT; if (copy_from_iter(tty->write_buf, size, from) != size) break; ret = ld->ops->write(tty, file, tty->write_buf, size); if (ret <= 0) break; written += ret; if (ret > size) break; /* FIXME! Have Al check this! */ if (ret != size) iov_iter_revert(from, size-ret); count -= ret; if (!count) break; ret = -ERESTARTSYS; if (signal_pending(current)) break; cond_resched(); } if (written) { tty_update_time(tty, true); ret = written; } out: tty_write_unlock(tty); return ret; } #ifdef CONFIG_PRINT_QUOTA_WARNING /** * tty_write_message - write a message to a certain tty, not just the console. * @tty: the destination tty_struct * @msg: the message to write * * This is used for messages that need to be redirected to a specific tty. We * don't put it into the syslog queue right now maybe in the future if really * needed. * * We must still hold the BTM and test the CLOSING flag for the moment. * * This function is DEPRECATED, do not use in new code. */ void tty_write_message(struct tty_struct *tty, char *msg) { if (tty) { mutex_lock(&tty->atomic_write_lock); tty_lock(tty); if (tty->ops->write && tty->count > 0) tty->ops->write(tty, msg, strlen(msg)); tty_unlock(tty); tty_write_unlock(tty); } } #endif static ssize_t file_tty_write(struct file *file, struct kiocb *iocb, struct iov_iter *from) { struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; ssize_t ret; if (tty_paranoia_check(tty, file_inode(file), "tty_write")) return -EIO; if (!tty || !tty->ops->write || tty_io_error(tty)) return -EIO; /* Short term debug to catch buggy drivers */ if (tty->ops->write_room == NULL) tty_err(tty, "missing write_room method\n"); ld = tty_ldisc_ref_wait(tty); if (!ld) return hung_up_tty_write(iocb, from); if (!ld->ops->write) ret = -EIO; else ret = iterate_tty_write(ld, tty, file, from); tty_ldisc_deref(ld); return ret; } /** * tty_write - write method for tty device file * @iocb: kernel I/O control block * @from: iov_iter with data to write * * Write data to a tty device via the line discipline. * * Locking: * Locks the line discipline as required * Writes to the tty driver are serialized by the atomic_write_lock * and are then processed in chunks to the device. The line * discipline write method will not be invoked in parallel for * each device. */ static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) { return file_tty_write(iocb->ki_filp, iocb, from); } ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *p = NULL; spin_lock(&redirect_lock); if (redirect) p = get_file(redirect); spin_unlock(&redirect_lock); /* * We know the redirected tty is just another tty, we can * call file_tty_write() directly with that file pointer. */ if (p) { ssize_t res; res = file_tty_write(p, iocb, iter); fput(p); return res; } return tty_write(iocb, iter); } /** * tty_send_xchar - send priority character * @tty: the tty to send to * @ch: xchar to send * * Send a high priority character to the tty even if stopped. * * Locking: none for xchar method, write ordering for write method. */ int tty_send_xchar(struct tty_struct *tty, u8 ch) { bool was_stopped = tty->flow.stopped; if (tty->ops->send_xchar) { down_read(&tty->termios_rwsem); tty->ops->send_xchar(tty, ch); up_read(&tty->termios_rwsem); return 0; } if (tty_write_lock(tty, false) < 0) return -ERESTARTSYS; down_read(&tty->termios_rwsem); if (was_stopped) start_tty(tty); tty->ops->write(tty, &ch, 1); if (was_stopped) stop_tty(tty); up_read(&tty->termios_rwsem); tty_write_unlock(tty); return 0; } /** * pty_line_name - generate name for a pty * @driver: the tty driver in use * @index: the minor number * @p: output buffer of at least 6 bytes * * Generate a name from a @driver reference and write it to the output buffer * @p. * * Locking: None */ static void pty_line_name(struct tty_driver *driver, int index, char *p) { static const char ptychar[] = "pqrstuvwxyzabcde"; int i = index + driver->name_base; /* ->name is initialized to "ttyp", but "tty" is expected */ sprintf(p, "%s%c%x", driver->subtype == PTY_TYPE_SLAVE ? "tty" : driver->name, ptychar[i >> 4 & 0xf], i & 0xf); } /** * tty_line_name - generate name for a tty * @driver: the tty driver in use * @index: the minor number * @p: output buffer of at least 7 bytes * * Generate a name from a @driver reference and write it to the output buffer * @p. * * Locking: None */ static ssize_t tty_line_name(struct tty_driver *driver, int index, char *p) { if (driver->flags & TTY_DRIVER_UNNUMBERED_NODE) return sprintf(p, "%s", driver->name); else return sprintf(p, "%s%d", driver->name, index + driver->name_base); } /** * tty_driver_lookup_tty() - find an existing tty, if any * @driver: the driver for the tty * @file: file object * @idx: the minor number * * Return: the tty, if found. If not found, return %NULL or ERR_PTR() if the * driver lookup() method returns an error. * * Locking: tty_mutex must be held. If the tty is found, bump the tty kref. */ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver, struct file *file, int idx) { struct tty_struct *tty; if (driver->ops->lookup) { if (!file) tty = ERR_PTR(-EIO); else tty = driver->ops->lookup(driver, file, idx); } else { if (idx >= driver->num) return ERR_PTR(-EINVAL); tty = driver->ttys[idx]; } if (!IS_ERR(tty)) tty_kref_get(tty); return tty; } /** * tty_init_termios - helper for termios setup * @tty: the tty to set up * * Initialise the termios structure for this tty. This runs under the * %tty_mutex currently so we can be relaxed about ordering. */ void tty_init_termios(struct tty_struct *tty) { struct ktermios *tp; int idx = tty->index; if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) tty->termios = tty->driver->init_termios; else { /* Check for lazy saved data */ tp = tty->driver->termios[idx]; if (tp != NULL) { tty->termios = *tp; tty->termios.c_line = tty->driver->init_termios.c_line; } else tty->termios = tty->driver->init_termios; } /* Compatibility until drivers always set this */ tty->termios.c_ispeed = tty_termios_input_baud_rate(&tty->termios); tty->termios.c_ospeed = tty_termios_baud_rate(&tty->termios); } EXPORT_SYMBOL_GPL(tty_init_termios); /** * tty_standard_install - usual tty->ops->install * @driver: the driver for the tty * @tty: the tty * * If the @driver overrides @tty->ops->install, it still can call this function * to perform the standard install operations. */ int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty) { tty_init_termios(tty); tty_driver_kref_get(driver); tty->count++; driver->ttys[tty->index] = tty; return 0; } EXPORT_SYMBOL_GPL(tty_standard_install); /** * tty_driver_install_tty() - install a tty entry in the driver * @driver: the driver for the tty * @tty: the tty * * Install a tty object into the driver tables. The @tty->index field will be * set by the time this is called. This method is responsible for ensuring any * need additional structures are allocated and configured. * * Locking: tty_mutex for now */ static int tty_driver_install_tty(struct tty_driver *driver, struct tty_struct *tty) { return driver->ops->install ? driver->ops->install(driver, tty) : tty_standard_install(driver, tty); } /** * tty_driver_remove_tty() - remove a tty from the driver tables * @driver: the driver for the tty * @tty: tty to remove * * Remove a tty object from the driver tables. The tty->index field will be set * by the time this is called. * * Locking: tty_mutex for now */ static void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty) { if (driver->ops->remove) driver->ops->remove(driver, tty); else driver->ttys[tty->index] = NULL; } /** * tty_reopen() - fast re-open of an open tty * @tty: the tty to open * * Re-opens on master ptys are not allowed and return -%EIO. * * Locking: Caller must hold tty_lock * Return: 0 on success, -errno on error. */ static int tty_reopen(struct tty_struct *tty) { struct tty_driver *driver = tty->driver; struct tty_ldisc *ld; int retval = 0; if (driver->type == TTY_DRIVER_TYPE_PTY && driver->subtype == PTY_TYPE_MASTER) return -EIO; if (!tty->count) return -EAGAIN; if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN)) return -EBUSY; ld = tty_ldisc_ref_wait(tty); if (ld) { tty_ldisc_deref(ld); } else { retval = tty_ldisc_lock(tty, 5 * HZ); if (retval) return retval; if (!tty->ldisc) retval = tty_ldisc_reinit(tty, tty->termios.c_line); tty_ldisc_unlock(tty); } if (retval == 0) tty->count++; return retval; } /** * tty_init_dev - initialise a tty device * @driver: tty driver we are opening a device on * @idx: device index * * Prepare a tty device. This may not be a "new" clean device but could also be * an active device. The pty drivers require special handling because of this. * * Locking: * The function is called under the tty_mutex, which protects us from the * tty struct or driver itself going away. * * On exit the tty device has the line discipline attached and a reference * count of 1. If a pair was created for pty/tty use and the other was a pty * master then it too has a reference count of 1. * * WSH 06/09/97: Rewritten to remove races and properly clean up after a failed * open. The new code protects the open with a mutex, so it's really quite * straightforward. The mutex locking can probably be relaxed for the (most * common) case of reopening a tty. * * Return: new tty structure */ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) { struct tty_struct *tty; int retval; /* * First time open is complex, especially for PTY devices. * This code guarantees that either everything succeeds and the * TTY is ready for operation, or else the table slots are vacated * and the allocated memory released. (Except that the termios * may be retained.) */ if (!try_module_get(driver->owner)) return ERR_PTR(-ENODEV); tty = alloc_tty_struct(driver, idx); if (!tty) { retval = -ENOMEM; goto err_module_put; } tty_lock(tty); retval = tty_driver_install_tty(driver, tty); if (retval < 0) goto err_free_tty; if (!tty->port) tty->port = driver->ports[idx]; if (WARN_RATELIMIT(!tty->port, "%s: %s driver does not set tty->port. This would crash the kernel. Fix the driver!\n", __func__, tty->driver->name)) { retval = -EINVAL; goto err_release_lock; } retval = tty_ldisc_lock(tty, 5 * HZ); if (retval) goto err_release_lock; tty->port->itty = tty; /* * Structures all installed ... call the ldisc open routines. * If we fail here just call release_tty to clean up. No need * to decrement the use counts, as release_tty doesn't care. */ retval = tty_ldisc_setup(tty, tty->link); if (retval) goto err_release_tty; tty_ldisc_unlock(tty); /* Return the tty locked so that it cannot vanish under the caller */ return tty; err_free_tty: tty_unlock(tty); free_tty_struct(tty); err_module_put: module_put(driver->owner); return ERR_PTR(retval); /* call the tty release_tty routine to clean out this slot */ err_release_tty: tty_ldisc_unlock(tty); tty_info_ratelimited(tty, "ldisc open failed (%d), clearing slot %d\n", retval, idx); err_release_lock: tty_unlock(tty); release_tty(tty, idx); return ERR_PTR(retval); } /** * tty_save_termios() - save tty termios data in driver table * @tty: tty whose termios data to save * * Locking: Caller guarantees serialisation with tty_init_termios(). */ void tty_save_termios(struct tty_struct *tty) { struct ktermios *tp; int idx = tty->index; /* If the port is going to reset then it has no termios to save */ if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) return; /* Stash the termios data */ tp = tty->driver->termios[idx]; if (tp == NULL) { tp = kmalloc(sizeof(*tp), GFP_KERNEL); if (tp == NULL) return; tty->driver->termios[idx] = tp; } *tp = tty->termios; } EXPORT_SYMBOL_GPL(tty_save_termios); /** * tty_flush_works - flush all works of a tty/pty pair * @tty: tty device to flush works for (or either end of a pty pair) * * Sync flush all works belonging to @tty (and the 'other' tty). */ static void tty_flush_works(struct tty_struct *tty) { flush_work(&tty->SAK_work); flush_work(&tty->hangup_work); if (tty->link) { flush_work(&tty->link->SAK_work); flush_work(&tty->link->hangup_work); } } /** * release_one_tty - release tty structure memory * @work: work of tty we are obliterating * * Releases memory associated with a tty structure, and clears out the * driver table slots. This function is called when a device is no longer * in use. It also gets called when setup of a device fails. * * Locking: * takes the file list lock internally when working on the list of ttys * that the driver keeps. * * This method gets called from a work queue so that the driver private * cleanup ops can sleep (needed for USB at least) */ static void release_one_tty(struct work_struct *work) { struct tty_struct *tty = container_of(work, struct tty_struct, hangup_work); struct tty_driver *driver = tty->driver; struct module *owner = driver->owner; if (tty->ops->cleanup) tty->ops->cleanup(tty); tty_driver_kref_put(driver); module_put(owner); spin_lock(&tty->files_lock); list_del_init(&tty->tty_files); spin_unlock(&tty->files_lock); put_pid(tty->ctrl.pgrp); put_pid(tty->ctrl.session); free_tty_struct(tty); } static void queue_release_one_tty(struct kref *kref) { struct tty_struct *tty = container_of(kref, struct tty_struct, kref); /* The hangup queue is now free so we can reuse it rather than * waste a chunk of memory for each port. */ INIT_WORK(&tty->hangup_work, release_one_tty); schedule_work(&tty->hangup_work); } /** * tty_kref_put - release a tty kref * @tty: tty device * * Release a reference to the @tty device and if need be let the kref layer * destruct the object for us. */ void tty_kref_put(struct tty_struct *tty) { if (tty) kref_put(&tty->kref, queue_release_one_tty); } EXPORT_SYMBOL(tty_kref_put); /** * release_tty - release tty structure memory * @tty: tty device release * @idx: index of the tty device release * * Release both @tty and a possible linked partner (think pty pair), * and decrement the refcount of the backing module. * * Locking: * tty_mutex * takes the file list lock internally when working on the list of ttys * that the driver keeps. */ static void release_tty(struct tty_struct *tty, int idx) { /* This should always be true but check for the moment */ WARN_ON(tty->index != idx); WARN_ON(!mutex_is_locked(&tty_mutex)); if (tty->ops->shutdown) tty->ops->shutdown(tty); tty_save_termios(tty); tty_driver_remove_tty(tty->driver, tty); if (tty->port) tty->port->itty = NULL; if (tty->link) tty->link->port->itty = NULL; if (tty->port) tty_buffer_cancel_work(tty->port); if (tty->link) tty_buffer_cancel_work(tty->link->port); tty_kref_put(tty->link); tty_kref_put(tty); } /** * tty_release_checks - check a tty before real release * @tty: tty to check * @idx: index of the tty * * Performs some paranoid checking before true release of the @tty. This is a * no-op unless %TTY_PARANOIA_CHECK is defined. */ static int tty_release_checks(struct tty_struct *tty, int idx) { #ifdef TTY_PARANOIA_CHECK if (idx < 0 || idx >= tty->driver->num) { tty_debug(tty, "bad idx %d\n", idx); return -1; } /* not much to check for devpts */ if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) return 0; if (tty != tty->driver->ttys[idx]) { tty_debug(tty, "bad driver table[%d] = %p\n", idx, tty->driver->ttys[idx]); return -1; } if (tty->driver->other) { struct tty_struct *o_tty = tty->link; if (o_tty != tty->driver->other->ttys[idx]) { tty_debug(tty, "bad other table[%d] = %p\n", idx, tty->driver->other->ttys[idx]); return -1; } if (o_tty->link != tty) { tty_debug(tty, "bad link = %p\n", o_tty->link); return -1; } } #endif return 0; } /** * tty_kclose - closes tty opened by tty_kopen * @tty: tty device * * Performs the final steps to release and free a tty device. It is the same as * tty_release_struct() except that it also resets %TTY_PORT_KOPENED flag on * @tty->port. */ void tty_kclose(struct tty_struct *tty) { /* * Ask the line discipline code to release its structures */ tty_ldisc_release(tty); /* Wait for pending work before tty destruction commences */ tty_flush_works(tty); tty_debug_hangup(tty, "freeing structure\n"); /* * The release_tty function takes care of the details of clearing * the slots and preserving the termios structure. */ mutex_lock(&tty_mutex); tty_port_set_kopened(tty->port, 0); release_tty(tty, tty->index); mutex_unlock(&tty_mutex); } EXPORT_SYMBOL_GPL(tty_kclose); /** * tty_release_struct - release a tty struct * @tty: tty device * @idx: index of the tty * * Performs the final steps to release and free a tty device. It is roughly the * reverse of tty_init_dev(). */ void tty_release_struct(struct tty_struct *tty, int idx) { /* * Ask the line discipline code to release its structures */ tty_ldisc_release(tty); /* Wait for pending work before tty destruction commmences */ tty_flush_works(tty); tty_debug_hangup(tty, "freeing structure\n"); /* * The release_tty function takes care of the details of clearing * the slots and preserving the termios structure. */ mutex_lock(&tty_mutex); release_tty(tty, idx); mutex_unlock(&tty_mutex); } EXPORT_SYMBOL_GPL(tty_release_struct); /** * tty_release - vfs callback for close * @inode: inode of tty * @filp: file pointer for handle to tty * * Called the last time each file handle is closed that references this tty. * There may however be several such references. * * Locking: * Takes BKL. See tty_release_dev(). * * Even releasing the tty structures is a tricky business. We have to be very * careful that the structures are all released at the same time, as interrupts * might otherwise get the wrong pointers. * * WSH 09/09/97: rewritten to avoid some nasty race conditions that could * lead to double frees or releasing memory still in use. */ int tty_release(struct inode *inode, struct file *filp) { struct tty_struct *tty = file_tty(filp); struct tty_struct *o_tty = NULL; int do_sleep, final; int idx; long timeout = 0; int once = 1; if (tty_paranoia_check(tty, inode, __func__)) return 0; tty_lock(tty); check_tty_count(tty, __func__); __tty_fasync(-1, filp, 0); idx = tty->index; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) o_tty = tty->link; if (tty_release_checks(tty, idx)) { tty_unlock(tty); return 0; } tty_debug_hangup(tty, "releasing (count=%d)\n", tty->count); if (tty->ops->close) tty->ops->close(tty, filp); /* If tty is pty master, lock the slave pty (stable lock order) */ tty_lock_slave(o_tty); /* * Sanity check: if tty->count is going to zero, there shouldn't be * any waiters on tty->read_wait or tty->write_wait. We test the * wait queues and kick everyone out _before_ actually starting to * close. This ensures that we won't block while releasing the tty * structure. * * The test for the o_tty closing is necessary, since the master and * slave sides may close in any order. If the slave side closes out * first, its count will be one, since the master side holds an open. * Thus this test wouldn't be triggered at the time the slave closed, * so we do it now. */ while (1) { do_sleep = 0; if (tty->count <= 1) { if (waitqueue_active(&tty->read_wait)) { wake_up_poll(&tty->read_wait, EPOLLIN); do_sleep++; } if (waitqueue_active(&tty->write_wait)) { wake_up_poll(&tty->write_wait, EPOLLOUT); do_sleep++; } } if (o_tty && o_tty->count <= 1) { if (waitqueue_active(&o_tty->read_wait)) { wake_up_poll(&o_tty->read_wait, EPOLLIN); do_sleep++; } if (waitqueue_active(&o_tty->write_wait)) { wake_up_poll(&o_tty->write_wait, EPOLLOUT); do_sleep++; } } if (!do_sleep) break; if (once) { once = 0; tty_warn(tty, "read/write wait queue active!\n"); } schedule_timeout_killable(timeout); if (timeout < 120 * HZ) timeout = 2 * timeout + 1; else timeout = MAX_SCHEDULE_TIMEOUT; } if (o_tty) { if (--o_tty->count < 0) { tty_warn(tty, "bad slave count (%d)\n", o_tty->count); o_tty->count = 0; } } if (--tty->count < 0) { tty_warn(tty, "bad tty->count (%d)\n", tty->count); tty->count = 0; } /* * We've decremented tty->count, so we need to remove this file * descriptor off the tty->tty_files list; this serves two * purposes: * - check_tty_count sees the correct number of file descriptors * associated with this tty. * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ tty_del_file(filp); /* * Perform some housekeeping before deciding whether to return. * * If _either_ side is closing, make sure there aren't any * processes that still think tty or o_tty is their controlling * tty. */ if (!tty->count) { read_lock(&tasklist_lock); session_clear_tty(tty->ctrl.session); if (o_tty) session_clear_tty(o_tty->ctrl.session); read_unlock(&tasklist_lock); } /* check whether both sides are closing ... */ final = !tty->count && !(o_tty && o_tty->count); tty_unlock_slave(o_tty); tty_unlock(tty); /* At this point, the tty->count == 0 should ensure a dead tty * cannot be re-opened by a racing opener. */ if (!final) return 0; tty_debug_hangup(tty, "final close\n"); tty_release_struct(tty, idx); return 0; } /** * tty_open_current_tty - get locked tty of current task * @device: device number * @filp: file pointer to tty * @return: locked tty of the current task iff @device is /dev/tty * * Performs a re-open of the current task's controlling tty. * * We cannot return driver and index like for the other nodes because devpts * will not work then. It expects inodes to be from devpts FS. */ static struct tty_struct *tty_open_current_tty(dev_t device, struct file *filp) { struct tty_struct *tty; int retval; if (device != MKDEV(TTYAUX_MAJOR, 0)) return NULL; tty = get_current_tty(); if (!tty) return ERR_PTR(-ENXIO); filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ /* noctty = 1; */ tty_lock(tty); tty_kref_put(tty); /* safe to drop the kref now */ retval = tty_reopen(tty); if (retval < 0) { tty_unlock(tty); tty = ERR_PTR(retval); } return tty; } /** * tty_lookup_driver - lookup a tty driver for a given device file * @device: device number * @filp: file pointer to tty * @index: index for the device in the @return driver * * If returned value is not erroneous, the caller is responsible to decrement * the refcount by tty_driver_kref_put(). * * Locking: %tty_mutex protects get_tty_driver() * * Return: driver for this inode (with increased refcount) */ static struct tty_driver *tty_lookup_driver(dev_t device, struct file *filp, int *index) { struct tty_driver *driver = NULL; switch (device) { #ifdef CONFIG_VT case MKDEV(TTY_MAJOR, 0): { extern struct tty_driver *console_driver; driver = tty_driver_kref_get(console_driver); *index = fg_console; break; } #endif case MKDEV(TTYAUX_MAJOR, 1): { struct tty_driver *console_driver = console_device(index); if (console_driver) { driver = tty_driver_kref_get(console_driver); if (driver && filp) { /* Don't let /dev/console block */ filp->f_flags |= O_NONBLOCK; break; } } if (driver) tty_driver_kref_put(driver); return ERR_PTR(-ENODEV); } default: driver = get_tty_driver(device, index); if (!driver) return ERR_PTR(-ENODEV); break; } return driver; } static struct tty_struct *tty_kopen(dev_t device, int shared) { struct tty_struct *tty; struct tty_driver *driver; int index = -1; mutex_lock(&tty_mutex); driver = tty_lookup_driver(device, NULL, &index); if (IS_ERR(driver)) { mutex_unlock(&tty_mutex); return ERR_CAST(driver); } /* check whether we're reopening an existing tty */ tty = tty_driver_lookup_tty(driver, NULL, index); if (IS_ERR(tty) || shared) goto out; if (tty) { /* drop kref from tty_driver_lookup_tty() */ tty_kref_put(tty); tty = ERR_PTR(-EBUSY); } else { /* tty_init_dev returns tty with the tty_lock held */ tty = tty_init_dev(driver, index); if (IS_ERR(tty)) goto out; tty_port_set_kopened(tty->port, 1); } out: mutex_unlock(&tty_mutex); tty_driver_kref_put(driver); return tty; } /** * tty_kopen_exclusive - open a tty device for kernel * @device: dev_t of device to open * * Opens tty exclusively for kernel. Performs the driver lookup, makes sure * it's not already opened and performs the first-time tty initialization. * * Claims the global %tty_mutex to serialize: * * concurrent first-time tty initialization * * concurrent tty driver removal w/ lookup * * concurrent tty removal from driver table * * Return: the locked initialized &tty_struct */ struct tty_struct *tty_kopen_exclusive(dev_t device) { return tty_kopen(device, 0); } EXPORT_SYMBOL_GPL(tty_kopen_exclusive); /** * tty_kopen_shared - open a tty device for shared in-kernel use * @device: dev_t of device to open * * Opens an already existing tty for in-kernel use. Compared to * tty_kopen_exclusive() above it doesn't ensure to be the only user. * * Locking: identical to tty_kopen() above. */ struct tty_struct *tty_kopen_shared(dev_t device) { return tty_kopen(device, 1); } EXPORT_SYMBOL_GPL(tty_kopen_shared); /** * tty_open_by_driver - open a tty device * @device: dev_t of device to open * @filp: file pointer to tty * * Performs the driver lookup, checks for a reopen, or otherwise performs the * first-time tty initialization. * * * Claims the global tty_mutex to serialize: * * concurrent first-time tty initialization * * concurrent tty driver removal w/ lookup * * concurrent tty removal from driver table * * Return: the locked initialized or re-opened &tty_struct */ static struct tty_struct *tty_open_by_driver(dev_t device, struct file *filp) { struct tty_struct *tty; struct tty_driver *driver = NULL; int index = -1; int retval; mutex_lock(&tty_mutex); driver = tty_lookup_driver(device, filp, &index); if (IS_ERR(driver)) { mutex_unlock(&tty_mutex); return ERR_CAST(driver); } /* check whether we're reopening an existing tty */ tty = tty_driver_lookup_tty(driver, filp, index); if (IS_ERR(tty)) { mutex_unlock(&tty_mutex); goto out; } if (tty) { if (tty_port_kopened(tty->port)) { tty_kref_put(tty); mutex_unlock(&tty_mutex); tty = ERR_PTR(-EBUSY); goto out; } mutex_unlock(&tty_mutex); retval = tty_lock_interruptible(tty); tty_kref_put(tty); /* drop kref from tty_driver_lookup_tty() */ if (retval) { if (retval == -EINTR) retval = -ERESTARTSYS; tty = ERR_PTR(retval); goto out; } retval = tty_reopen(tty); if (retval < 0) { tty_unlock(tty); tty = ERR_PTR(retval); } } else { /* Returns with the tty_lock held for now */ tty = tty_init_dev(driver, index); mutex_unlock(&tty_mutex); } out: tty_driver_kref_put(driver); return tty; } /** * tty_open - open a tty device * @inode: inode of device file * @filp: file pointer to tty * * tty_open() and tty_release() keep up the tty count that contains the number * of opens done on a tty. We cannot use the inode-count, as different inodes * might point to the same tty. * * Open-counting is needed for pty masters, as well as for keeping track of * serial lines: DTR is dropped when the last close happens. * (This is not done solely through tty->count, now. - Ted 1/27/92) * * The termios state of a pty is reset on the first open so that settings don't * persist across reuse. * * Locking: * * %tty_mutex protects tty, tty_lookup_driver() and tty_init_dev(). * * @tty->count should protect the rest. * * ->siglock protects ->signal/->sighand * * Note: the tty_unlock/lock cases without a ref are only safe due to %tty_mutex */ static int tty_open(struct inode *inode, struct file *filp) { struct tty_struct *tty; int noctty, retval; dev_t device = inode->i_rdev; unsigned saved_flags = filp->f_flags; nonseekable_open(inode, filp); retry_open: retval = tty_alloc_file(filp); if (retval) return -ENOMEM; tty = tty_open_current_tty(device, filp); if (!tty) tty = tty_open_by_driver(device, filp); if (IS_ERR(tty)) { tty_free_file(filp); retval = PTR_ERR(tty); if (retval != -EAGAIN || signal_pending(current)) return retval; schedule(); goto retry_open; } tty_add_file(tty, filp); check_tty_count(tty, __func__); tty_debug_hangup(tty, "opening (count=%d)\n", tty->count); if (tty->ops->open) retval = tty->ops->open(tty, filp); else retval = -ENODEV; filp->f_flags = saved_flags; if (retval) { tty_debug_hangup(tty, "open error %d, releasing\n", retval); tty_unlock(tty); /* need to call tty_release without BTM */ tty_release(inode, filp); if (retval != -ERESTARTSYS) return retval; if (signal_pending(current)) return retval; schedule(); /* * Need to reset f_op in case a hangup happened. */ if (tty_hung_up_p(filp)) filp->f_op = &tty_fops; goto retry_open; } clear_bit(TTY_HUPPED, &tty->flags); noctty = (filp->f_flags & O_NOCTTY) || (IS_ENABLED(CONFIG_VT) && device == MKDEV(TTY_MAJOR, 0)) || device == MKDEV(TTYAUX_MAJOR, 1) || (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER); if (!noctty) tty_open_proc_set_tty(filp, tty); tty_unlock(tty); return 0; } /** * tty_poll - check tty status * @filp: file being polled * @wait: poll wait structures to update * * Call the line discipline polling method to obtain the poll status of the * device. * * Locking: locks called line discipline but ldisc poll method may be * re-entered freely by other callers. */ static __poll_t tty_poll(struct file *filp, poll_table *wait) { struct tty_struct *tty = file_tty(filp); struct tty_ldisc *ld; __poll_t ret = 0; if (tty_paranoia_check(tty, file_inode(filp), "tty_poll")) return 0; ld = tty_ldisc_ref_wait(tty); if (!ld) return hung_up_tty_poll(filp, wait); if (ld->ops->poll) ret = ld->ops->poll(tty, filp, wait); tty_ldisc_deref(ld); return ret; } static int __tty_fasync(int fd, struct file *filp, int on) { struct tty_struct *tty = file_tty(filp); unsigned long flags; int retval = 0; if (tty_paranoia_check(tty, file_inode(filp), "tty_fasync")) goto out; if (on) { retval = file_f_owner_allocate(filp); if (retval) goto out; } retval = fasync_helper(fd, filp, on, &tty->fasync); if (retval <= 0) goto out; if (on) { enum pid_type type; struct pid *pid; spin_lock_irqsave(&tty->ctrl.lock, flags); if (tty->ctrl.pgrp) { pid = tty->ctrl.pgrp; type = PIDTYPE_PGID; } else { pid = task_pid(current); type = PIDTYPE_TGID; } get_pid(pid); spin_unlock_irqrestore(&tty->ctrl.lock, flags); __f_setown(filp, pid, type, 0); put_pid(pid); retval = 0; } out: return retval; } static int tty_fasync(int fd, struct file *filp, int on) { struct tty_struct *tty = file_tty(filp); int retval = -ENOTTY; tty_lock(tty); if (!tty_hung_up_p(filp)) retval = __tty_fasync(fd, filp, on); tty_unlock(tty); return retval; } static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI); /** * tiocsti - fake input character * @tty: tty to fake input into * @p: pointer to character * * Fake input to a tty device. Does the necessary locking and input management. * * FIXME: does not honour flow control ?? * * Locking: * * Called functions take tty_ldiscs_lock * * current->signal->tty check is safe without locks */ static int tiocsti(struct tty_struct *tty, u8 __user *p) { struct tty_ldisc *ld; u8 ch; if (!tty_legacy_tiocsti && !capable(CAP_SYS_ADMIN)) return -EIO; if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ch, p)) return -EFAULT; tty_audit_tiocsti(tty, ch); ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; tty_buffer_lock_exclusive(tty->port); if (ld->ops->receive_buf) ld->ops->receive_buf(tty, &ch, NULL, 1); tty_buffer_unlock_exclusive(tty->port); tty_ldisc_deref(ld); return 0; } /** * tiocgwinsz - implement window query ioctl * @tty: tty * @arg: user buffer for result * * Copies the kernel idea of the window size into the user buffer. * * Locking: @tty->winsize_mutex is taken to ensure the winsize data is * consistent. */ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg) { guard(mutex)(&tty->winsize_mutex); if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) return -EFAULT; return 0; } /** * tty_do_resize - resize event * @tty: tty being resized * @ws: new dimensions * * Update the termios variables and send the necessary signals to peform a * terminal resize correctly. */ int tty_do_resize(struct tty_struct *tty, struct winsize *ws) { struct pid *pgrp; guard(mutex)(&tty->winsize_mutex); if (!memcmp(ws, &tty->winsize, sizeof(*ws))) return 0; /* Signal the foreground process group */ pgrp = tty_get_pgrp(tty); if (pgrp) kill_pgrp(pgrp, SIGWINCH, 1); put_pid(pgrp); tty->winsize = *ws; return 0; } EXPORT_SYMBOL(tty_do_resize); /** * tiocswinsz - implement window size set ioctl * @tty: tty side of tty * @arg: user buffer for result * * Copies the user idea of the window size to the kernel. Traditionally this is * just advisory information but for the Linux console it actually has driver * level meaning and triggers a VC resize. * * Locking: * Driver dependent. The default do_resize method takes the tty termios * mutex and ctrl.lock. The console takes its own lock then calls into the * default method. */ static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg) { struct winsize tmp_ws; if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) return -EFAULT; if (tty->ops->resize) return tty->ops->resize(tty, &tmp_ws); else return tty_do_resize(tty, &tmp_ws); } /** * tioccons - allow admin to move logical console * @file: the file to become console * * Allow the administrator to move the redirected console device. * * Locking: uses redirect_lock to guard the redirect information */ static int tioccons(struct file *file) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (file->f_op->write_iter == redirected_tty_write) { struct file *f; spin_lock(&redirect_lock); f = redirect; redirect = NULL; spin_unlock(&redirect_lock); if (f) fput(f); return 0; } if (file->f_op->write_iter != tty_write) return -ENOTTY; if (!(file->f_mode & FMODE_WRITE)) return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; guard(spinlock)(&redirect_lock); if (redirect) return -EBUSY; redirect = get_file(file); return 0; } /** * tiocsetd - set line discipline * @tty: tty device * @p: pointer to user data * * Set the line discipline according to user request. * * Locking: see tty_set_ldisc(), this function is just a helper */ static int tiocsetd(struct tty_struct *tty, int __user *p) { int disc; int ret; if (get_user(disc, p)) return -EFAULT; ret = tty_set_ldisc(tty, disc); return ret; } /** * tiocgetd - get line discipline * @tty: tty device * @p: pointer to user data * * Retrieves the line discipline id directly from the ldisc. * * Locking: waits for ldisc reference (in case the line discipline is changing * or the @tty is being hungup) */ static int tiocgetd(struct tty_struct *tty, int __user *p) { struct tty_ldisc *ld; int ret; ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; ret = put_user(ld->ops->num, p); tty_ldisc_deref(ld); return ret; } /** * send_break - performed time break * @tty: device to break on * @duration: timeout in mS * * Perform a timed break on hardware that lacks its own driver level timed * break functionality. * * Locking: * @tty->atomic_write_lock serializes */ static int send_break(struct tty_struct *tty, unsigned int duration) { int retval; if (tty->ops->break_ctl == NULL) return 0; if (tty->driver->flags & TTY_DRIVER_HARDWARE_BREAK) return tty->ops->break_ctl(tty, duration); /* Do the work ourselves */ if (tty_write_lock(tty, false) < 0) return -EINTR; retval = tty->ops->break_ctl(tty, -1); if (!retval) { msleep_interruptible(duration); retval = tty->ops->break_ctl(tty, 0); } else if (retval == -EOPNOTSUPP) { /* some drivers can tell only dynamically */ retval = 0; } tty_write_unlock(tty); if (signal_pending(current)) retval = -EINTR; return retval; } /** * tty_get_tiocm - get tiocm status register * @tty: tty device * * Obtain the modem status bits from the tty driver if the feature * is supported. */ int tty_get_tiocm(struct tty_struct *tty) { int retval = -ENOTTY; if (tty->ops->tiocmget) retval = tty->ops->tiocmget(tty); return retval; } EXPORT_SYMBOL_GPL(tty_get_tiocm); /** * tty_tiocmget - get modem status * @tty: tty device * @p: pointer to result * * Obtain the modem status bits from the tty driver if the feature is * supported. Return -%ENOTTY if it is not available. * * Locking: none (up to the driver) */ static int tty_tiocmget(struct tty_struct *tty, int __user *p) { int retval; retval = tty_get_tiocm(tty); if (retval >= 0) retval = put_user(retval, p); return retval; } /** * tty_tiocmset - set modem status * @tty: tty device * @cmd: command - clear bits, set bits or set all * @p: pointer to desired bits * * Set the modem status bits from the tty driver if the feature * is supported. Return -%ENOTTY if it is not available. * * Locking: none (up to the driver) */ static int tty_tiocmset(struct tty_struct *tty, unsigned int cmd, unsigned __user *p) { int retval; unsigned int set, clear, val; if (tty->ops->tiocmset == NULL) return -ENOTTY; retval = get_user(val, p); if (retval) return retval; set = clear = 0; switch (cmd) { case TIOCMBIS: set = val; break; case TIOCMBIC: clear = val; break; case TIOCMSET: set = val; clear = ~val; break; } set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; return tty->ops->tiocmset(tty, set, clear); } /** * tty_get_icount - get tty statistics * @tty: tty device * @icount: output parameter * * Gets a copy of the @tty's icount statistics. * * Locking: none (up to the driver) */ int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount) { memset(icount, 0, sizeof(*icount)); if (tty->ops->get_icount) return tty->ops->get_icount(tty, icount); else return -ENOTTY; } EXPORT_SYMBOL_GPL(tty_get_icount); static int tty_tiocgicount(struct tty_struct *tty, void __user *arg) { struct serial_icounter_struct icount; int retval; retval = tty_get_icount(tty, &icount); if (retval != 0) return retval; if (copy_to_user(arg, &icount, sizeof(icount))) return -EFAULT; return 0; } static int tty_set_serial(struct tty_struct *tty, struct serial_struct *ss) { int flags; flags = ss->flags & ASYNC_DEPRECATED; if (flags) pr_warn_ratelimited("%s: '%s' is using deprecated serial flags (with no effect): %.8x\n", __func__, current->comm, flags); if (!tty->ops->set_serial) return -ENOTTY; return tty->ops->set_serial(tty, ss); } static int tty_tiocsserial(struct tty_struct *tty, struct serial_struct __user *ss) { struct serial_struct v; if (copy_from_user(&v, ss, sizeof(*ss))) return -EFAULT; return tty_set_serial(tty, &v); } static int tty_tiocgserial(struct tty_struct *tty, struct serial_struct __user *ss) { struct serial_struct v; int err; memset(&v, 0, sizeof(v)); if (!tty->ops->get_serial) return -ENOTTY; err = tty->ops->get_serial(tty, &v); if (!err && copy_to_user(ss, &v, sizeof(v))) err = -EFAULT; return err; } /* * if pty, return the slave side (real_tty) * otherwise, return self */ static struct tty_struct *tty_pair_get_tty(struct tty_struct *tty) { if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) tty = tty->link; return tty; } /* * Split this up, as gcc can choke on it otherwise.. */ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct tty_struct *tty = file_tty(file); struct tty_struct *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl")) return -EINVAL; real_tty = tty_pair_get_tty(tty); /* * Factor out some common prep work */ switch (cmd) { case TIOCSETD: case TIOCSBRK: case TIOCCBRK: case TCSBRK: case TCSBRKP: retval = tty_check_change(tty); if (retval) return retval; if (cmd != TIOCCBRK) { tty_wait_until_sent(tty, 0); if (signal_pending(current)) return -EINTR; } break; } /* * Now do the stuff. */ switch (cmd) { case TIOCSTI: return tiocsti(tty, p); case TIOCGWINSZ: return tiocgwinsz(real_tty, p); case TIOCSWINSZ: return tiocswinsz(real_tty, p); case TIOCCONS: return real_tty != tty ? -EINVAL : tioccons(file); case TIOCEXCL: set_bit(TTY_EXCLUSIVE, &tty->flags); return 0; case TIOCNXCL: clear_bit(TTY_EXCLUSIVE, &tty->flags); return 0; case TIOCGEXCL: { int excl = test_bit(TTY_EXCLUSIVE, &tty->flags); return put_user(excl, (int __user *)p); } case TIOCGETD: return tiocgetd(tty, p); case TIOCSETD: return tiocsetd(tty, p); case TIOCVHANGUP: if (!capable(CAP_SYS_ADMIN)) return -EPERM; tty_vhangup(tty); return 0; case TIOCGDEV: { unsigned int ret = new_encode_dev(tty_devnum(real_tty)); return put_user(ret, (unsigned int __user *)p); } /* * Break handling */ case TIOCSBRK: /* Turn break on, unconditionally */ if (tty->ops->break_ctl) return tty->ops->break_ctl(tty, -1); return 0; case TIOCCBRK: /* Turn break off, unconditionally */ if (tty->ops->break_ctl) return tty->ops->break_ctl(tty, 0); return 0; case TCSBRK: /* SVID version: non-zero arg --> no break */ /* non-zero arg means wait for all output data * to be sent (performed above) but don't send break. * This is used by the tcdrain() termios function. */ if (!arg) return send_break(tty, 250); return 0; case TCSBRKP: /* support for POSIX tcsendbreak() */ return send_break(tty, arg ? arg*100 : 250); case TIOCMGET: return tty_tiocmget(tty, p); case TIOCMSET: case TIOCMBIC: case TIOCMBIS: return tty_tiocmset(tty, cmd, p); case TIOCGICOUNT: return tty_tiocgicount(tty, p); case TCFLSH: switch (arg) { case TCIFLUSH: case TCIOFLUSH: /* flush tty buffer and allow ldisc to process ioctl */ tty_buffer_flush(tty, NULL); break; } break; case TIOCSSERIAL: return tty_tiocsserial(tty, p); case TIOCGSERIAL: return tty_tiocgserial(tty, p); case TIOCGPTPEER: /* Special because the struct file is needed */ return ptm_open_peer(file, tty, (int)arg); default: retval = tty_jobctrl_ioctl(tty, real_tty, file, cmd, arg); if (retval != -ENOIOCTLCMD) return retval; } if (tty->ops->ioctl) { retval = tty->ops->ioctl(tty, cmd, arg); if (retval != -ENOIOCTLCMD) return retval; } ld = tty_ldisc_ref_wait(tty); if (!ld) return hung_up_tty_ioctl(file, cmd, arg); retval = -EINVAL; if (ld->ops->ioctl) { retval = ld->ops->ioctl(tty, cmd, arg); if (retval == -ENOIOCTLCMD) retval = -ENOTTY; } tty_ldisc_deref(ld); return retval; } #ifdef CONFIG_COMPAT struct serial_struct32 { compat_int_t type; compat_int_t line; compat_uint_t port; compat_int_t irq; compat_int_t flags; compat_int_t xmit_fifo_size; compat_int_t custom_divisor; compat_int_t baud_base; unsigned short close_delay; char io_type; char reserved_char; compat_int_t hub6; unsigned short closing_wait; /* time to wait before closing */ unsigned short closing_wait2; /* no longer used... */ compat_uint_t iomem_base; unsigned short iomem_reg_shift; unsigned int port_high; /* compat_ulong_t iomap_base FIXME */ compat_int_t reserved; }; static int compat_tty_tiocsserial(struct tty_struct *tty, struct serial_struct32 __user *ss) { struct serial_struct32 v32; struct serial_struct v; if (copy_from_user(&v32, ss, sizeof(*ss))) return -EFAULT; memcpy(&v, &v32, offsetof(struct serial_struct32, iomem_base)); v.iomem_base = compat_ptr(v32.iomem_base); v.iomem_reg_shift = v32.iomem_reg_shift; v.port_high = v32.port_high; v.iomap_base = 0; return tty_set_serial(tty, &v); } static int compat_tty_tiocgserial(struct tty_struct *tty, struct serial_struct32 __user *ss) { struct serial_struct32 v32; struct serial_struct v; int err; memset(&v, 0, sizeof(v)); memset(&v32, 0, sizeof(v32)); if (!tty->ops->get_serial) return -ENOTTY; err = tty->ops->get_serial(tty, &v); if (!err) { memcpy(&v32, &v, offsetof(struct serial_struct32, iomem_base)); v32.iomem_base = (unsigned long)v.iomem_base >> 32 ? 0xfffffff : ptr_to_compat(v.iomem_base); v32.iomem_reg_shift = v.iomem_reg_shift; v32.port_high = v.port_high; if (copy_to_user(ss, &v32, sizeof(v32))) err = -EFAULT; } return err; } static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; int retval = -ENOIOCTLCMD; switch (cmd) { case TIOCOUTQ: case TIOCSTI: case TIOCGWINSZ: case TIOCSWINSZ: case TIOCGEXCL: case TIOCGETD: case TIOCSETD: case TIOCGDEV: case TIOCMGET: case TIOCMSET: case TIOCMBIC: case TIOCMBIS: case TIOCGICOUNT: case TIOCGPGRP: case TIOCSPGRP: case TIOCGSID: case TIOCSERGETLSR: case TIOCGRS485: case TIOCSRS485: #ifdef TIOCGETP case TIOCGETP: case TIOCSETP: case TIOCSETN: #endif #ifdef TIOCGETC case TIOCGETC: case TIOCSETC: #endif #ifdef TIOCGLTC case TIOCGLTC: case TIOCSLTC: #endif case TCSETSF: case TCSETSW: case TCSETS: case TCGETS: #ifdef TCGETS2 case TCGETS2: case TCSETSF2: case TCSETSW2: case TCSETS2: #endif case TCGETA: case TCSETAF: case TCSETAW: case TCSETA: case TIOCGLCKTRMIOS: case TIOCSLCKTRMIOS: #ifdef TCGETX case TCGETX: case TCSETX: case TCSETXW: case TCSETXF: #endif case TIOCGSOFTCAR: case TIOCSSOFTCAR: case PPPIOCGCHAN: case PPPIOCGUNIT: return tty_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); case TIOCCONS: case TIOCEXCL: case TIOCNXCL: case TIOCVHANGUP: case TIOCSBRK: case TIOCCBRK: case TCSBRK: case TCSBRKP: case TCFLSH: case TIOCGPTPEER: case TIOCNOTTY: case TIOCSCTTY: case TCXONC: case TIOCMIWAIT: case TIOCSERCONFIG: return tty_ioctl(file, cmd, arg); } if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl")) return -EINVAL; switch (cmd) { case TIOCSSERIAL: return compat_tty_tiocsserial(tty, compat_ptr(arg)); case TIOCGSERIAL: return compat_tty_tiocgserial(tty, compat_ptr(arg)); } if (tty->ops->compat_ioctl) { retval = tty->ops->compat_ioctl(tty, cmd, arg); if (retval != -ENOIOCTLCMD) return retval; } ld = tty_ldisc_ref_wait(tty); if (!ld) return hung_up_tty_compat_ioctl(file, cmd, arg); if (ld->ops->compat_ioctl) retval = ld->ops->compat_ioctl(tty, cmd, arg); if (retval == -ENOIOCTLCMD && ld->ops->ioctl) retval = ld->ops->ioctl(tty, (unsigned long)compat_ptr(cmd), arg); tty_ldisc_deref(ld); return retval; } #endif static int this_tty(const void *t, struct file *file, unsigned fd) { if (likely(file->f_op->read_iter != tty_read)) return 0; return file_tty(file) != t ? 0 : fd + 1; } /* * This implements the "Secure Attention Key" --- the idea is to * prevent trojan horses by killing all processes associated with this * tty when the user hits the "Secure Attention Key". Required for * super-paranoid applications --- see the Orange Book for more details. * * This code could be nicer; ideally it should send a HUP, wait a few * seconds, then send a INT, and then a KILL signal. But you then * have to coordinate with the init process, since all processes associated * with the current tty must be dead before the new getty is allowed * to spawn. * * Now, if it would be correct ;-/ The current code has a nasty hole - * it doesn't catch files in flight. We may send the descriptor to ourselves * via AF_UNIX socket, close it and later fetch from socket. FIXME. * * Nasty bug: do_SAK is being called in interrupt context. This can * deadlock. We punt it up to process context. AKPM - 16Mar2001 */ void __do_SAK(struct tty_struct *tty) { struct task_struct *g, *p; struct pid *session; int i; scoped_guard(spinlock_irqsave, &tty->ctrl.lock) session = get_pid(tty->ctrl.session); tty_ldisc_flush(tty); tty_driver_flush_buffer(tty); read_lock(&tasklist_lock); /* Kill the entire session */ do_each_pid_task(session, PIDTYPE_SID, p) { tty_notice(tty, "SAK: killed process %d (%s): by session\n", task_pid_nr(p), p->comm); group_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_SID); } while_each_pid_task(session, PIDTYPE_SID, p); /* Now kill any processes that happen to have the tty open */ for_each_process_thread(g, p) { if (p->signal->tty == tty) { tty_notice(tty, "SAK: killed process %d (%s): by controlling tty\n", task_pid_nr(p), p->comm); group_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_SID); continue; } guard(task_lock)(p); i = iterate_fd(p->files, 0, this_tty, tty); if (i != 0) { tty_notice(tty, "SAK: killed process %d (%s): by fd#%d\n", task_pid_nr(p), p->comm, i - 1); group_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_SID); } } read_unlock(&tasklist_lock); put_pid(session); } static void do_SAK_work(struct work_struct *work) { struct tty_struct *tty = container_of(work, struct tty_struct, SAK_work); __do_SAK(tty); } /* * The tq handling here is a little racy - tty->SAK_work may already be queued. * Fortunately we don't need to worry, because if ->SAK_work is already queued, * the values which we write to it will be identical to the values which it * already has. --akpm */ void do_SAK(struct tty_struct *tty) { if (!tty) return; schedule_work(&tty->SAK_work); } EXPORT_SYMBOL(do_SAK); /* Must put_device() after it's unused! */ static struct device *tty_get_device(struct tty_struct *tty) { dev_t devt = tty_devnum(tty); return class_find_device_by_devt(&tty_class, devt); } /** * alloc_tty_struct - allocate a new tty * @driver: driver which will handle the returned tty * @idx: minor of the tty * * This subroutine allocates and initializes a tty structure. * * Locking: none - @tty in question is not exposed at this point */ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx) { struct tty_struct *tty; tty = kzalloc(sizeof(*tty), GFP_KERNEL_ACCOUNT); if (!tty) return NULL; kref_init(&tty->kref); if (tty_ldisc_init(tty)) { kfree(tty); return NULL; } tty->ctrl.session = NULL; tty->ctrl.pgrp = NULL; mutex_init(&tty->legacy_mutex); mutex_init(&tty->throttle_mutex); init_rwsem(&tty->termios_rwsem); mutex_init(&tty->winsize_mutex); init_ldsem(&tty->ldisc_sem); init_waitqueue_head(&tty->write_wait); init_waitqueue_head(&tty->read_wait); INIT_WORK(&tty->hangup_work, do_tty_hangup); mutex_init(&tty->atomic_write_lock); spin_lock_init(&tty->ctrl.lock); spin_lock_init(&tty->flow.lock); spin_lock_init(&tty->files_lock); INIT_LIST_HEAD(&tty->tty_files); INIT_WORK(&tty->SAK_work, do_SAK_work); tty->driver = driver; tty->ops = driver->ops; tty->index = idx; tty_line_name(driver, idx, tty->name); tty->dev = tty_get_device(tty); return tty; } /** * tty_put_char - write one character to a tty * @tty: tty * @ch: character to write * * Write one byte to the @tty using the provided @tty->ops->put_char() method * if present. * * Note: the specific put_char operation in the driver layer may go * away soon. Don't call it directly, use this method * * Return: the number of characters successfully output. */ int tty_put_char(struct tty_struct *tty, u8 ch) { if (tty->ops->put_char) return tty->ops->put_char(tty, ch); return tty->ops->write(tty, &ch, 1); } EXPORT_SYMBOL_GPL(tty_put_char); static int tty_cdev_add(struct tty_driver *driver, dev_t dev, unsigned int index, unsigned int count) { int err; /* init here, since reused cdevs cause crashes */ driver->cdevs[index] = cdev_alloc(); if (!driver->cdevs[index]) return -ENOMEM; driver->cdevs[index]->ops = &tty_fops; driver->cdevs[index]->owner = driver->owner; err = cdev_add(driver->cdevs[index], dev, count); if (err) kobject_put(&driver->cdevs[index]->kobj); return err; } /** * tty_register_device - register a tty device * @driver: the tty driver that describes the tty device * @index: the index in the tty driver for this tty device * @device: a struct device that is associated with this tty device. * This field is optional, if there is no known struct device * for this tty device it can be set to NULL safely. * * This call is required to be made to register an individual tty device * if the tty driver's flags have the %TTY_DRIVER_DYNAMIC_DEV bit set. If * that bit is not set, this function should not be called by a tty * driver. * * Locking: ?? * * Return: A pointer to the struct device for this tty device (or * ERR_PTR(-EFOO) on error). */ struct device *tty_register_device(struct tty_driver *driver, unsigned index, struct device *device) { return tty_register_device_attr(driver, index, device, NULL, NULL); } EXPORT_SYMBOL(tty_register_device); static void tty_device_create_release(struct device *dev) { dev_dbg(dev, "releasing...\n"); kfree(dev); } /** * tty_register_device_attr - register a tty device * @driver: the tty driver that describes the tty device * @index: the index in the tty driver for this tty device * @device: a struct device that is associated with this tty device. * This field is optional, if there is no known struct device * for this tty device it can be set to %NULL safely. * @drvdata: Driver data to be set to device. * @attr_grp: Attribute group to be set on device. * * This call is required to be made to register an individual tty device if the * tty driver's flags have the %TTY_DRIVER_DYNAMIC_DEV bit set. If that bit is * not set, this function should not be called by a tty driver. * * Locking: ?? * * Return: A pointer to the struct device for this tty device (or * ERR_PTR(-EFOO) on error). */ struct device *tty_register_device_attr(struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp) { char name[64]; dev_t devt = MKDEV(driver->major, driver->minor_start) + index; struct ktermios *tp; struct device *dev; int retval; if (index >= driver->num) { pr_err("%s: Attempt to register invalid tty line number (%d)\n", driver->name, index); return ERR_PTR(-EINVAL); } if (driver->type == TTY_DRIVER_TYPE_PTY) pty_line_name(driver, index, name); else tty_line_name(driver, index, name); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); dev->devt = devt; dev->class = &tty_class; dev->parent = device; dev->release = tty_device_create_release; dev_set_name(dev, "%s", name); dev->groups = attr_grp; dev_set_drvdata(dev, drvdata); dev_set_uevent_suppress(dev, 1); retval = device_register(dev); if (retval) goto err_put; if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) { /* * Free any saved termios data so that the termios state is * reset when reusing a minor number. */ tp = driver->termios[index]; if (tp) { driver->termios[index] = NULL; kfree(tp); } retval = tty_cdev_add(driver, devt, index, 1); if (retval) goto err_del; } dev_set_uevent_suppress(dev, 0); kobject_uevent(&dev->kobj, KOBJ_ADD); return dev; err_del: device_del(dev); err_put: put_device(dev); return ERR_PTR(retval); } EXPORT_SYMBOL_GPL(tty_register_device_attr); /** * tty_unregister_device - unregister a tty device * @driver: the tty driver that describes the tty device * @index: the index in the tty driver for this tty device * * If a tty device is registered with a call to tty_register_device() then * this function must be called when the tty device is gone. * * Locking: ?? */ void tty_unregister_device(struct tty_driver *driver, unsigned index) { device_destroy(&tty_class, MKDEV(driver->major, driver->minor_start) + index); if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) { cdev_del(driver->cdevs[index]); driver->cdevs[index] = NULL; } } EXPORT_SYMBOL(tty_unregister_device); /** * __tty_alloc_driver - allocate tty driver * @lines: count of lines this driver can handle at most * @owner: module which is responsible for this driver * @flags: some of enum tty_driver_flag, will be set in driver->flags * * This should not be called directly, tty_alloc_driver() should be used * instead. * * Returns: struct tty_driver or a PTR-encoded error (use IS_ERR() and friends). */ struct tty_driver *__tty_alloc_driver(unsigned int lines, struct module *owner, unsigned long flags) { struct tty_driver *driver; unsigned int cdevs = 1; int err; if (!lines || (flags & TTY_DRIVER_UNNUMBERED_NODE && lines > 1)) return ERR_PTR(-EINVAL); driver = kzalloc(sizeof(*driver), GFP_KERNEL); if (!driver) return ERR_PTR(-ENOMEM); kref_init(&driver->kref); driver->num = lines; driver->owner = owner; driver->flags = flags; if (!(flags & TTY_DRIVER_DEVPTS_MEM)) { driver->ttys = kcalloc(lines, sizeof(*driver->ttys), GFP_KERNEL); driver->termios = kcalloc(lines, sizeof(*driver->termios), GFP_KERNEL); if (!driver->ttys || !driver->termios) { err = -ENOMEM; goto err_free_all; } } if (!(flags & TTY_DRIVER_DYNAMIC_ALLOC)) { driver->ports = kcalloc(lines, sizeof(*driver->ports), GFP_KERNEL); if (!driver->ports) { err = -ENOMEM; goto err_free_all; } cdevs = lines; } driver->cdevs = kcalloc(cdevs, sizeof(*driver->cdevs), GFP_KERNEL); if (!driver->cdevs) { err = -ENOMEM; goto err_free_all; } return driver; err_free_all: kfree(driver->ports); kfree(driver->ttys); kfree(driver->termios); kfree(driver->cdevs); kfree(driver); return ERR_PTR(err); } EXPORT_SYMBOL(__tty_alloc_driver); static void destruct_tty_driver(struct kref *kref) { struct tty_driver *driver = container_of(kref, struct tty_driver, kref); int i; struct ktermios *tp; if (driver->flags & TTY_DRIVER_INSTALLED) { for (i = 0; i < driver->num; i++) { tp = driver->termios[i]; if (tp) { driver->termios[i] = NULL; kfree(tp); } if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) tty_unregister_device(driver, i); } proc_tty_unregister_driver(driver); if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC) cdev_del(driver->cdevs[0]); } kfree(driver->cdevs); kfree(driver->ports); kfree(driver->termios); kfree(driver->ttys); kfree(driver); } /** * tty_driver_kref_put - drop a reference to a tty driver * @driver: driver of which to drop the reference * * The final put will destroy and free up the driver. */ void tty_driver_kref_put(struct tty_driver *driver) { kref_put(&driver->kref, destruct_tty_driver); } EXPORT_SYMBOL(tty_driver_kref_put); /** * tty_register_driver - register a tty driver * @driver: driver to register * * Called by a tty driver to register itself. */ int tty_register_driver(struct tty_driver *driver) { int error; int i; dev_t dev; struct device *d; if (!driver->major) { error = alloc_chrdev_region(&dev, driver->minor_start, driver->num, driver->name); if (!error) { driver->major = MAJOR(dev); driver->minor_start = MINOR(dev); } } else { dev = MKDEV(driver->major, driver->minor_start); error = register_chrdev_region(dev, driver->num, driver->name); } if (error < 0) goto err; if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC) { error = tty_cdev_add(driver, dev, 0, driver->num); if (error) goto err_unreg_char; } scoped_guard(mutex, &tty_mutex) list_add(&driver->tty_drivers, &tty_drivers); if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) { for (i = 0; i < driver->num; i++) { d = tty_register_device(driver, i, NULL); if (IS_ERR(d)) { error = PTR_ERR(d); goto err_unreg_devs; } } } proc_tty_register_driver(driver); driver->flags |= TTY_DRIVER_INSTALLED; return 0; err_unreg_devs: for (i--; i >= 0; i--) tty_unregister_device(driver, i); scoped_guard(mutex, &tty_mutex) list_del(&driver->tty_drivers); err_unreg_char: unregister_chrdev_region(dev, driver->num); err: return error; } EXPORT_SYMBOL(tty_register_driver); /** * tty_unregister_driver - unregister a tty driver * @driver: driver to unregister * * Called by a tty driver to unregister itself. */ void tty_unregister_driver(struct tty_driver *driver) { unregister_chrdev_region(MKDEV(driver->major, driver->minor_start), driver->num); scoped_guard(mutex, &tty_mutex) list_del(&driver->tty_drivers); } EXPORT_SYMBOL(tty_unregister_driver); dev_t tty_devnum(struct tty_struct *tty) { return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; } EXPORT_SYMBOL(tty_devnum); void tty_default_fops(struct file_operations *fops) { *fops = tty_fops; } static char *tty_devnode(const struct device *dev, umode_t *mode) { if (!mode) return NULL; if (dev->devt == MKDEV(TTYAUX_MAJOR, 0) || dev->devt == MKDEV(TTYAUX_MAJOR, 2)) *mode = 0666; return NULL; } const struct class tty_class = { .name = "tty", .devnode = tty_devnode, }; static int __init tty_class_init(void) { return class_register(&tty_class); } postcore_initcall(tty_class_init); /* 3/2004 jmc: why do these devices exist? */ static struct cdev tty_cdev, console_cdev; static ssize_t show_cons_active(struct device *dev, struct device_attribute *attr, char *buf) { struct console *cs[16]; int i = 0; struct console *c; ssize_t count = 0; /* * Hold the console_list_lock to guarantee that no consoles are * unregistered until all console processing is complete. * This also allows safe traversal of the console list and * race-free reading of @flags. */ console_list_lock(); for_each_console(c) { if (!c->device) continue; if (!(c->flags & CON_NBCON) && !c->write) continue; if ((c->flags & CON_ENABLED) == 0) continue; cs[i++] = c; if (i >= ARRAY_SIZE(cs)) break; } /* * Take console_lock to serialize device() callback with * other console operations. For example, fg_console is * modified under console_lock when switching vt. */ console_lock(); while (i--) { int index = cs[i]->index; struct tty_driver *drv = cs[i]->device(cs[i], &index); /* don't resolve tty0 as some programs depend on it */ if (drv && (cs[i]->index > 0 || drv->major != TTY_MAJOR)) count += tty_line_name(drv, index, buf + count); else count += sprintf(buf + count, "%s%d", cs[i]->name, cs[i]->index); count += sprintf(buf + count, "%c", i ? ' ':'\n'); } console_unlock(); console_list_unlock(); return count; } static DEVICE_ATTR(active, S_IRUGO, show_cons_active, NULL); static struct attribute *cons_dev_attrs[] = { &dev_attr_active.attr, NULL }; ATTRIBUTE_GROUPS(cons_dev); static struct device *consdev; void console_sysfs_notify(void) { if (consdev) sysfs_notify(&consdev->kobj, NULL, "active"); } static const struct ctl_table tty_table[] = { { .procname = "legacy_tiocsti", .data = &tty_legacy_tiocsti, .maxlen = sizeof(tty_legacy_tiocsti), .mode = 0644, .proc_handler = proc_dobool, }, { .procname = "ldisc_autoload", .data = &tty_ldisc_autoload, .maxlen = sizeof(tty_ldisc_autoload), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, }; /* * Ok, now we can initialize the rest of the tty devices and can count * on memory allocations, interrupts etc.. */ int __init tty_init(void) { register_sysctl_init("dev/tty", tty_table); cdev_init(&tty_cdev, &tty_fops); if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0) panic("Couldn't register /dev/tty driver\n"); device_create(&tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty"); cdev_init(&console_cdev, &console_fops); if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0) panic("Couldn't register /dev/console driver\n"); consdev = device_create_with_groups(&tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL, cons_dev_groups, "console"); if (IS_ERR(consdev)) consdev = NULL; #ifdef CONFIG_VT vty_init(&console_fops); #endif return 0; }
2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2020 Xillybus Ltd, http://xillybus.com * * Driver for the XillyUSB FPGA/host framework. * * This driver interfaces with a special IP core in an FPGA, setting up * a pipe between a hardware FIFO in the programmable logic and a device * file in the host. The number of such pipes and their attributes are * set up on the logic. This driver detects these automatically and * creates the device files accordingly. */ #include <linux/types.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/device.h> #include <linux/module.h> #include <asm/byteorder.h> #include <linux/io.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/workqueue.h> #include <linux/crc32.h> #include <linux/poll.h> #include <linux/delay.h> #include <linux/usb.h> #include "xillybus_class.h" MODULE_DESCRIPTION("Driver for XillyUSB FPGA IP Core"); MODULE_AUTHOR("Eli Billauer, Xillybus Ltd."); MODULE_ALIAS("xillyusb"); MODULE_LICENSE("GPL v2"); #define XILLY_RX_TIMEOUT (10 * HZ / 1000) #define XILLY_RESPONSE_TIMEOUT (500 * HZ / 1000) #define BUF_SIZE_ORDER 4 #define BUFNUM 8 #define LOG2_IDT_FIFO_SIZE 16 #define LOG2_INITIAL_FIFO_BUF_SIZE 16 #define MSG_EP_NUM 1 #define IN_EP_NUM 1 static const char xillyname[] = "xillyusb"; static unsigned int fifo_buf_order; static struct workqueue_struct *wakeup_wq; #define USB_VENDOR_ID_XILINX 0x03fd #define USB_VENDOR_ID_ALTERA 0x09fb #define USB_PRODUCT_ID_XILLYUSB 0xebbe static const struct usb_device_id xillyusb_table[] = { { USB_DEVICE(USB_VENDOR_ID_XILINX, USB_PRODUCT_ID_XILLYUSB) }, { USB_DEVICE(USB_VENDOR_ID_ALTERA, USB_PRODUCT_ID_XILLYUSB) }, { } }; MODULE_DEVICE_TABLE(usb, xillyusb_table); struct xillyusb_dev; struct xillyfifo { unsigned int bufsize; /* In bytes, always a power of 2 */ unsigned int bufnum; unsigned int size; /* Lazy: Equals bufsize * bufnum */ unsigned int buf_order; int fill; /* Number of bytes in the FIFO */ spinlock_t lock; wait_queue_head_t waitq; unsigned int readpos; unsigned int readbuf; unsigned int writepos; unsigned int writebuf; char **mem; }; struct xillyusb_channel; struct xillyusb_endpoint { struct xillyusb_dev *xdev; struct mutex ep_mutex; /* serialize operations on endpoint */ struct list_head buffers; struct list_head filled_buffers; spinlock_t buffers_lock; /* protect these two lists */ unsigned int order; unsigned int buffer_size; unsigned int fill_mask; int outstanding_urbs; struct usb_anchor anchor; struct xillyfifo fifo; struct work_struct workitem; bool shutting_down; bool drained; bool wake_on_drain; u8 ep_num; }; struct xillyusb_channel { struct xillyusb_dev *xdev; struct xillyfifo *in_fifo; struct xillyusb_endpoint *out_ep; struct mutex lock; /* protect @out_ep, @in_fifo, bit fields below */ struct mutex in_mutex; /* serialize fops on FPGA to host stream */ struct mutex out_mutex; /* serialize fops on host to FPGA stream */ wait_queue_head_t flushq; int chan_idx; u32 in_consumed_bytes; u32 in_current_checkpoint; u32 out_bytes; unsigned int in_log2_element_size; unsigned int out_log2_element_size; unsigned int in_log2_fifo_size; unsigned int out_log2_fifo_size; unsigned int read_data_ok; /* EOF not arrived (yet) */ unsigned int poll_used; unsigned int flushing; unsigned int flushed; unsigned int canceled; /* Bit fields protected by @lock except for initialization */ unsigned readable:1; unsigned writable:1; unsigned open_for_read:1; unsigned open_for_write:1; unsigned in_synchronous:1; unsigned out_synchronous:1; unsigned in_seekable:1; unsigned out_seekable:1; }; struct xillybuffer { struct list_head entry; struct xillyusb_endpoint *ep; void *buf; unsigned int len; }; struct xillyusb_dev { struct xillyusb_channel *channels; struct usb_device *udev; struct device *dev; /* For dev_err() and such */ struct kref kref; struct workqueue_struct *workq; int error; spinlock_t error_lock; /* protect @error */ struct work_struct wakeup_workitem; int num_channels; struct xillyusb_endpoint *msg_ep; struct xillyusb_endpoint *in_ep; struct mutex msg_mutex; /* serialize opcode transmission */ int in_bytes_left; int leftover_chan_num; unsigned int in_counter; struct mutex process_in_mutex; /* synchronize wakeup_all() */ }; /* * kref_mutex is used in xillyusb_open() to prevent the xillyusb_dev * struct from being freed during the gap between being found by * xillybus_find_inode() and having its reference count incremented. */ static DEFINE_MUTEX(kref_mutex); /* FPGA to host opcodes */ enum { OPCODE_DATA = 0, OPCODE_QUIESCE_ACK = 1, OPCODE_EOF = 2, OPCODE_REACHED_CHECKPOINT = 3, OPCODE_CANCELED_CHECKPOINT = 4, }; /* Host to FPGA opcodes */ enum { OPCODE_QUIESCE = 0, OPCODE_REQ_IDT = 1, OPCODE_SET_CHECKPOINT = 2, OPCODE_CLOSE = 3, OPCODE_SET_PUSH = 4, OPCODE_UPDATE_PUSH = 5, OPCODE_CANCEL_CHECKPOINT = 6, OPCODE_SET_ADDR = 7, }; /* * fifo_write() and fifo_read() are NOT reentrant (i.e. concurrent multiple * calls to each on the same FIFO is not allowed) however it's OK to have * threads calling each of the two functions once on the same FIFO, and * at the same time. */ static int fifo_write(struct xillyfifo *fifo, const void *data, unsigned int len, int (*copier)(void *, const void *, int)) { unsigned int done = 0; unsigned int todo = len; unsigned int nmax; unsigned int writepos = fifo->writepos; unsigned int writebuf = fifo->writebuf; unsigned long flags; int rc; nmax = fifo->size - READ_ONCE(fifo->fill); while (1) { unsigned int nrail = fifo->bufsize - writepos; unsigned int n = min(todo, nmax); if (n == 0) { spin_lock_irqsave(&fifo->lock, flags); fifo->fill += done; spin_unlock_irqrestore(&fifo->lock, flags); fifo->writepos = writepos; fifo->writebuf = writebuf; return done; } if (n > nrail) n = nrail; rc = (*copier)(fifo->mem[writebuf] + writepos, data + done, n); if (rc) return rc; done += n; todo -= n; writepos += n; nmax -= n; if (writepos == fifo->bufsize) { writepos = 0; writebuf++; if (writebuf == fifo->bufnum) writebuf = 0; } } } static int fifo_read(struct xillyfifo *fifo, void *data, unsigned int len, int (*copier)(void *, const void *, int)) { unsigned int done = 0; unsigned int todo = len; unsigned int fill; unsigned int readpos = fifo->readpos; unsigned int readbuf = fifo->readbuf; unsigned long flags; int rc; /* * The spinlock here is necessary, because otherwise fifo->fill * could have been increased by fifo_write() after writing data * to the buffer, but this data would potentially not have been * visible on this thread at the time the updated fifo->fill was. * That could lead to reading invalid data. */ spin_lock_irqsave(&fifo->lock, flags); fill = fifo->fill; spin_unlock_irqrestore(&fifo->lock, flags); while (1) { unsigned int nrail = fifo->bufsize - readpos; unsigned int n = min(todo, fill); if (n == 0) { spin_lock_irqsave(&fifo->lock, flags); fifo->fill -= done; spin_unlock_irqrestore(&fifo->lock, flags); fifo->readpos = readpos; fifo->readbuf = readbuf; return done; } if (n > nrail) n = nrail; rc = (*copier)(data + done, fifo->mem[readbuf] + readpos, n); if (rc) return rc; done += n; todo -= n; readpos += n; fill -= n; if (readpos == fifo->bufsize) { readpos = 0; readbuf++; if (readbuf == fifo->bufnum) readbuf = 0; } } } /* * These three wrapper functions are used as the @copier argument to * fifo_write() and fifo_read(), so that they can work directly with * user memory as well. */ static int xilly_copy_from_user(void *dst, const void *src, int n) { if (copy_from_user(dst, (const void __user *)src, n)) return -EFAULT; return 0; } static int xilly_copy_to_user(void *dst, const void *src, int n) { if (copy_to_user((void __user *)dst, src, n)) return -EFAULT; return 0; } static int xilly_memcpy(void *dst, const void *src, int n) { memcpy(dst, src, n); return 0; } static int fifo_init(struct xillyfifo *fifo, unsigned int log2_size) { unsigned int log2_bufnum; unsigned int buf_order; int i; unsigned int log2_fifo_buf_size; retry: log2_fifo_buf_size = fifo_buf_order + PAGE_SHIFT; if (log2_size > log2_fifo_buf_size) { log2_bufnum = log2_size - log2_fifo_buf_size; buf_order = fifo_buf_order; fifo->bufsize = 1 << log2_fifo_buf_size; } else { log2_bufnum = 0; buf_order = (log2_size > PAGE_SHIFT) ? log2_size - PAGE_SHIFT : 0; fifo->bufsize = 1 << log2_size; } fifo->bufnum = 1 << log2_bufnum; fifo->size = fifo->bufnum * fifo->bufsize; fifo->buf_order = buf_order; fifo->mem = kmalloc_array(fifo->bufnum, sizeof(void *), GFP_KERNEL); if (!fifo->mem) return -ENOMEM; for (i = 0; i < fifo->bufnum; i++) { fifo->mem[i] = (void *) __get_free_pages(GFP_KERNEL, buf_order); if (!fifo->mem[i]) goto memfail; } fifo->fill = 0; fifo->readpos = 0; fifo->readbuf = 0; fifo->writepos = 0; fifo->writebuf = 0; spin_lock_init(&fifo->lock); init_waitqueue_head(&fifo->waitq); return 0; memfail: for (i--; i >= 0; i--) free_pages((unsigned long)fifo->mem[i], buf_order); kfree(fifo->mem); fifo->mem = NULL; if (fifo_buf_order) { fifo_buf_order--; goto retry; } else { return -ENOMEM; } } static void fifo_mem_release(struct xillyfifo *fifo) { int i; if (!fifo->mem) return; for (i = 0; i < fifo->bufnum; i++) free_pages((unsigned long)fifo->mem[i], fifo->buf_order); kfree(fifo->mem); } /* * When endpoint_quiesce() returns, the endpoint has no URBs submitted, * won't accept any new URB submissions, and its related work item doesn't * and won't run anymore. */ static void endpoint_quiesce(struct xillyusb_endpoint *ep) { mutex_lock(&ep->ep_mutex); ep->shutting_down = true; mutex_unlock(&ep->ep_mutex); usb_kill_anchored_urbs(&ep->anchor); cancel_work_sync(&ep->workitem); } /* * Note that endpoint_dealloc() also frees fifo memory (if allocated), even * though endpoint_alloc doesn't allocate that memory. */ static void endpoint_dealloc(struct xillyusb_endpoint *ep) { struct list_head *this, *next; fifo_mem_release(&ep->fifo); /* Join @filled_buffers with @buffers to free these entries too */ list_splice(&ep->filled_buffers, &ep->buffers); list_for_each_safe(this, next, &ep->buffers) { struct xillybuffer *xb = list_entry(this, struct xillybuffer, entry); free_pages((unsigned long)xb->buf, ep->order); kfree(xb); } kfree(ep); } static struct xillyusb_endpoint *endpoint_alloc(struct xillyusb_dev *xdev, u8 ep_num, void (*work)(struct work_struct *), unsigned int order, int bufnum) { int i; struct xillyusb_endpoint *ep; ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (!ep) return NULL; INIT_LIST_HEAD(&ep->buffers); INIT_LIST_HEAD(&ep->filled_buffers); spin_lock_init(&ep->buffers_lock); mutex_init(&ep->ep_mutex); init_usb_anchor(&ep->anchor); INIT_WORK(&ep->workitem, work); ep->order = order; ep->buffer_size = 1 << (PAGE_SHIFT + order); ep->outstanding_urbs = 0; ep->drained = true; ep->wake_on_drain = false; ep->xdev = xdev; ep->ep_num = ep_num; ep->shutting_down = false; for (i = 0; i < bufnum; i++) { struct xillybuffer *xb; unsigned long addr; xb = kzalloc(sizeof(*xb), GFP_KERNEL); if (!xb) { endpoint_dealloc(ep); return NULL; } addr = __get_free_pages(GFP_KERNEL, order); if (!addr) { kfree(xb); endpoint_dealloc(ep); return NULL; } xb->buf = (void *)addr; xb->ep = ep; list_add_tail(&xb->entry, &ep->buffers); } return ep; } static void cleanup_dev(struct kref *kref) { struct xillyusb_dev *xdev = container_of(kref, struct xillyusb_dev, kref); if (xdev->in_ep) endpoint_dealloc(xdev->in_ep); if (xdev->msg_ep) endpoint_dealloc(xdev->msg_ep); if (xdev->workq) destroy_workqueue(xdev->workq); usb_put_dev(xdev->udev); kfree(xdev->channels); /* Argument may be NULL, and that's fine */ kfree(xdev); } /* * @process_in_mutex is taken to ensure that bulk_in_work() won't call * process_bulk_in() after wakeup_all()'s execution: The latter zeroes all * @read_data_ok entries, which will make process_bulk_in() report false * errors if executed. The mechanism relies on that xdev->error is assigned * a non-zero value by report_io_error() prior to queueing wakeup_all(), * which prevents bulk_in_work() from calling process_bulk_in(). */ static void wakeup_all(struct work_struct *work) { int i; struct xillyusb_dev *xdev = container_of(work, struct xillyusb_dev, wakeup_workitem); mutex_lock(&xdev->process_in_mutex); for (i = 0; i < xdev->num_channels; i++) { struct xillyusb_channel *chan = &xdev->channels[i]; mutex_lock(&chan->lock); if (chan->in_fifo) { /* * Fake an EOF: Even if such arrives, it won't be * processed. */ chan->read_data_ok = 0; wake_up_interruptible(&chan->in_fifo->waitq); } if (chan->out_ep) wake_up_interruptible(&chan->out_ep->fifo.waitq); mutex_unlock(&chan->lock); wake_up_interruptible(&chan->flushq); } mutex_unlock(&xdev->process_in_mutex); wake_up_interruptible(&xdev->msg_ep->fifo.waitq); kref_put(&xdev->kref, cleanup_dev); } static void report_io_error(struct xillyusb_dev *xdev, int errcode) { unsigned long flags; bool do_once = false; spin_lock_irqsave(&xdev->error_lock, flags); if (!xdev->error) { xdev->error = errcode; do_once = true; } spin_unlock_irqrestore(&xdev->error_lock, flags); if (do_once) { kref_get(&xdev->kref); /* xdev is used by work item */ queue_work(wakeup_wq, &xdev->wakeup_workitem); } } /* * safely_assign_in_fifo() changes the value of chan->in_fifo and ensures * the previous pointer is never used after its return. */ static void safely_assign_in_fifo(struct xillyusb_channel *chan, struct xillyfifo *fifo) { mutex_lock(&chan->lock); chan->in_fifo = fifo; mutex_unlock(&chan->lock); flush_work(&chan->xdev->in_ep->workitem); } static void bulk_in_completer(struct urb *urb) { struct xillybuffer *xb = urb->context; struct xillyusb_endpoint *ep = xb->ep; unsigned long flags; if (urb->status) { if (!(urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN)) report_io_error(ep->xdev, -EIO); spin_lock_irqsave(&ep->buffers_lock, flags); list_add_tail(&xb->entry, &ep->buffers); ep->outstanding_urbs--; spin_unlock_irqrestore(&ep->buffers_lock, flags); return; } xb->len = urb->actual_length; spin_lock_irqsave(&ep->buffers_lock, flags); list_add_tail(&xb->entry, &ep->filled_buffers); spin_unlock_irqrestore(&ep->buffers_lock, flags); if (!ep->shutting_down) queue_work(ep->xdev->workq, &ep->workitem); } static void bulk_out_completer(struct urb *urb) { struct xillybuffer *xb = urb->context; struct xillyusb_endpoint *ep = xb->ep; unsigned long flags; if (urb->status && (!(urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN))) report_io_error(ep->xdev, -EIO); spin_lock_irqsave(&ep->buffers_lock, flags); list_add_tail(&xb->entry, &ep->buffers); ep->outstanding_urbs--; spin_unlock_irqrestore(&ep->buffers_lock, flags); if (!ep->shutting_down) queue_work(ep->xdev->workq, &ep->workitem); } static void try_queue_bulk_in(struct xillyusb_endpoint *ep) { struct xillyusb_dev *xdev = ep->xdev; struct xillybuffer *xb; struct urb *urb; int rc; unsigned long flags; unsigned int bufsize = ep->buffer_size; mutex_lock(&ep->ep_mutex); if (ep->shutting_down || xdev->error) goto done; while (1) { spin_lock_irqsave(&ep->buffers_lock, flags); if (list_empty(&ep->buffers)) { spin_unlock_irqrestore(&ep->buffers_lock, flags); goto done; } xb = list_first_entry(&ep->buffers, struct xillybuffer, entry); list_del(&xb->entry); ep->outstanding_urbs++; spin_unlock_irqrestore(&ep->buffers_lock, flags); urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { report_io_error(xdev, -ENOMEM); goto relist; } usb_fill_bulk_urb(urb, xdev->udev, usb_rcvbulkpipe(xdev->udev, ep->ep_num), xb->buf, bufsize, bulk_in_completer, xb); usb_anchor_urb(urb, &ep->anchor); rc = usb_submit_urb(urb, GFP_KERNEL); if (rc) { report_io_error(xdev, (rc == -ENOMEM) ? -ENOMEM : -EIO); goto unanchor; } usb_free_urb(urb); /* This just decrements reference count */ } unanchor: usb_unanchor_urb(urb); usb_free_urb(urb); relist: spin_lock_irqsave(&ep->buffers_lock, flags); list_add_tail(&xb->entry, &ep->buffers); ep->outstanding_urbs--; spin_unlock_irqrestore(&ep->buffers_lock, flags); done: mutex_unlock(&ep->ep_mutex); } static void try_queue_bulk_out(struct xillyusb_endpoint *ep) { struct xillyfifo *fifo = &ep->fifo; struct xillyusb_dev *xdev = ep->xdev; struct xillybuffer *xb; struct urb *urb; int rc; unsigned int fill; unsigned long flags; bool do_wake = false; mutex_lock(&ep->ep_mutex); if (ep->shutting_down || xdev->error) goto done; fill = READ_ONCE(fifo->fill) & ep->fill_mask; while (1) { int count; unsigned int max_read; spin_lock_irqsave(&ep->buffers_lock, flags); /* * Race conditions might have the FIFO filled while the * endpoint is marked as drained here. That doesn't matter, * because the sole purpose of @drained is to ensure that * certain data has been sent on the USB channel before * shutting it down. Hence knowing that the FIFO appears * to be empty with no outstanding URBs at some moment * is good enough. */ if (!fill) { ep->drained = !ep->outstanding_urbs; if (ep->drained && ep->wake_on_drain) do_wake = true; spin_unlock_irqrestore(&ep->buffers_lock, flags); goto done; } ep->drained = false; if ((fill < ep->buffer_size && ep->outstanding_urbs) || list_empty(&ep->buffers)) { spin_unlock_irqrestore(&ep->buffers_lock, flags); goto done; } xb = list_first_entry(&ep->buffers, struct xillybuffer, entry); list_del(&xb->entry); ep->outstanding_urbs++; spin_unlock_irqrestore(&ep->buffers_lock, flags); max_read = min(fill, ep->buffer_size); count = fifo_read(&ep->fifo, xb->buf, max_read, xilly_memcpy); /* * xilly_memcpy always returns 0 => fifo_read can't fail => * count > 0 */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { report_io_error(xdev, -ENOMEM); goto relist; } usb_fill_bulk_urb(urb, xdev->udev, usb_sndbulkpipe(xdev->udev, ep->ep_num), xb->buf, count, bulk_out_completer, xb); usb_anchor_urb(urb, &ep->anchor); rc = usb_submit_urb(urb, GFP_KERNEL); if (rc) { report_io_error(xdev, (rc == -ENOMEM) ? -ENOMEM : -EIO); goto unanchor; } usb_free_urb(urb); /* This just decrements reference count */ fill -= count; do_wake = true; } unanchor: usb_unanchor_urb(urb); usb_free_urb(urb); relist: spin_lock_irqsave(&ep->buffers_lock, flags); list_add_tail(&xb->entry, &ep->buffers); ep->outstanding_urbs--; spin_unlock_irqrestore(&ep->buffers_lock, flags); done: mutex_unlock(&ep->ep_mutex); if (do_wake) wake_up_interruptible(&fifo->waitq); } static void bulk_out_work(struct work_struct *work) { struct xillyusb_endpoint *ep = container_of(work, struct xillyusb_endpoint, workitem); try_queue_bulk_out(ep); } static int process_in_opcode(struct xillyusb_dev *xdev, int opcode, int chan_num) { struct xillyusb_channel *chan; struct device *dev = xdev->dev; int chan_idx = chan_num >> 1; if (chan_idx >= xdev->num_channels) { dev_err(dev, "Received illegal channel ID %d from FPGA\n", chan_num); return -EIO; } chan = &xdev->channels[chan_idx]; switch (opcode) { case OPCODE_EOF: if (!chan->read_data_ok) { dev_err(dev, "Received unexpected EOF for channel %d\n", chan_num); return -EIO; } /* * A write memory barrier ensures that the FIFO's fill level * is visible before read_data_ok turns zero, so the data in * the FIFO isn't missed by the consumer. */ smp_wmb(); WRITE_ONCE(chan->read_data_ok, 0); wake_up_interruptible(&chan->in_fifo->waitq); break; case OPCODE_REACHED_CHECKPOINT: chan->flushing = 0; wake_up_interruptible(&chan->flushq); break; case OPCODE_CANCELED_CHECKPOINT: chan->canceled = 1; wake_up_interruptible(&chan->flushq); break; default: dev_err(dev, "Received illegal opcode %d from FPGA\n", opcode); return -EIO; } return 0; } static int process_bulk_in(struct xillybuffer *xb) { struct xillyusb_endpoint *ep = xb->ep; struct xillyusb_dev *xdev = ep->xdev; struct device *dev = xdev->dev; int dws = xb->len >> 2; __le32 *p = xb->buf; u32 ctrlword; struct xillyusb_channel *chan; struct xillyfifo *fifo; int chan_num = 0, opcode; int chan_idx; int bytes, count, dwconsume; int in_bytes_left = 0; int rc; if ((dws << 2) != xb->len) { dev_err(dev, "Received BULK IN transfer with %d bytes, not a multiple of 4\n", xb->len); return -EIO; } if (xdev->in_bytes_left) { bytes = min(xdev->in_bytes_left, dws << 2); in_bytes_left = xdev->in_bytes_left - bytes; chan_num = xdev->leftover_chan_num; goto resume_leftovers; } while (dws) { ctrlword = le32_to_cpu(*p++); dws--; chan_num = ctrlword & 0xfff; count = (ctrlword >> 12) & 0x3ff; opcode = (ctrlword >> 24) & 0xf; if (opcode != OPCODE_DATA) { unsigned int in_counter = xdev->in_counter++ & 0x3ff; if (count != in_counter) { dev_err(dev, "Expected opcode counter %d, got %d\n", in_counter, count); return -EIO; } rc = process_in_opcode(xdev, opcode, chan_num); if (rc) return rc; continue; } bytes = min(count + 1, dws << 2); in_bytes_left = count + 1 - bytes; resume_leftovers: chan_idx = chan_num >> 1; if (!(chan_num & 1) || chan_idx >= xdev->num_channels || !xdev->channels[chan_idx].read_data_ok) { dev_err(dev, "Received illegal channel ID %d from FPGA\n", chan_num); return -EIO; } chan = &xdev->channels[chan_idx]; fifo = chan->in_fifo; if (unlikely(!fifo)) return -EIO; /* We got really unexpected data */ if (bytes != fifo_write(fifo, p, bytes, xilly_memcpy)) { dev_err(dev, "Misbehaving FPGA overflowed an upstream FIFO!\n"); return -EIO; } wake_up_interruptible(&fifo->waitq); dwconsume = (bytes + 3) >> 2; dws -= dwconsume; p += dwconsume; } xdev->in_bytes_left = in_bytes_left; xdev->leftover_chan_num = chan_num; return 0; } static void bulk_in_work(struct work_struct *work) { struct xillyusb_endpoint *ep = container_of(work, struct xillyusb_endpoint, workitem); struct xillyusb_dev *xdev = ep->xdev; unsigned long flags; struct xillybuffer *xb; bool consumed = false; int rc = 0; mutex_lock(&xdev->process_in_mutex); spin_lock_irqsave(&ep->buffers_lock, flags); while (1) { if (rc || list_empty(&ep->filled_buffers)) { spin_unlock_irqrestore(&ep->buffers_lock, flags); mutex_unlock(&xdev->process_in_mutex); if (rc) report_io_error(xdev, rc); else if (consumed) try_queue_bulk_in(ep); return; } xb = list_first_entry(&ep->filled_buffers, struct xillybuffer, entry); list_del(&xb->entry); spin_unlock_irqrestore(&ep->buffers_lock, flags); consumed = true; if (!xdev->error) rc = process_bulk_in(xb); spin_lock_irqsave(&ep->buffers_lock, flags); list_add_tail(&xb->entry, &ep->buffers); ep->outstanding_urbs--; } } static int xillyusb_send_opcode(struct xillyusb_dev *xdev, int chan_num, char opcode, u32 data) { struct xillyusb_endpoint *ep = xdev->msg_ep; struct xillyfifo *fifo = &ep->fifo; __le32 msg[2]; int rc = 0; msg[0] = cpu_to_le32((chan_num & 0xfff) | ((opcode & 0xf) << 24)); msg[1] = cpu_to_le32(data); mutex_lock(&xdev->msg_mutex); /* * The wait queue is woken with the interruptible variant, so the * wait function matches, however returning because of an interrupt * will mess things up considerably, in particular when the caller is * the release method. And the xdev->error part prevents being stuck * forever in the event of a bizarre hardware bug: Pull the USB plug. */ while (wait_event_interruptible(fifo->waitq, fifo->fill <= (fifo->size - 8) || xdev->error)) ; /* Empty loop */ if (xdev->error) { rc = xdev->error; goto unlock_done; } fifo_write(fifo, (void *)msg, 8, xilly_memcpy); try_queue_bulk_out(ep); unlock_done: mutex_unlock(&xdev->msg_mutex); return rc; } /* * Note that flush_downstream() merely waits for the data to arrive to * the application logic at the FPGA -- unlike PCIe Xillybus' counterpart, * it does nothing to make it happen (and neither is it necessary). * * This function is not reentrant for the same @chan, but this is covered * by the fact that for any given @chan, it's called either by the open, * write, llseek and flush fops methods, which can't run in parallel (and the * write + flush and llseek method handlers are protected with out_mutex). * * chan->flushed is there to avoid multiple flushes at the same position, * in particular as a result of programs that close the file descriptor * e.g. after a dup2() for redirection. */ static int flush_downstream(struct xillyusb_channel *chan, long timeout, bool interruptible) { struct xillyusb_dev *xdev = chan->xdev; int chan_num = chan->chan_idx << 1; long deadline, left_to_sleep; int rc; if (chan->flushed) return 0; deadline = jiffies + 1 + timeout; if (chan->flushing) { long cancel_deadline = jiffies + 1 + XILLY_RESPONSE_TIMEOUT; chan->canceled = 0; rc = xillyusb_send_opcode(xdev, chan_num, OPCODE_CANCEL_CHECKPOINT, 0); if (rc) return rc; /* Only real error, never -EINTR */ /* Ignoring interrupts. Cancellation must be handled */ while (!chan->canceled) { left_to_sleep = cancel_deadline - ((long)jiffies); if (left_to_sleep <= 0) { report_io_error(xdev, -EIO); return -EIO; } rc = wait_event_interruptible_timeout(chan->flushq, chan->canceled || xdev->error, left_to_sleep); if (xdev->error) return xdev->error; } } chan->flushing = 1; /* * The checkpoint is given in terms of data elements, not bytes. As * a result, if less than an element's worth of data is stored in the * FIFO, it's not flushed, including the flush before closing, which * means that such data is lost. This is consistent with PCIe Xillybus. */ rc = xillyusb_send_opcode(xdev, chan_num, OPCODE_SET_CHECKPOINT, chan->out_bytes >> chan->out_log2_element_size); if (rc) return rc; /* Only real error, never -EINTR */ if (!timeout) { while (chan->flushing) { rc = wait_event_interruptible(chan->flushq, !chan->flushing || xdev->error); if (xdev->error) return xdev->error; if (interruptible && rc) return -EINTR; } goto done; } while (chan->flushing) { left_to_sleep = deadline - ((long)jiffies); if (left_to_sleep <= 0) return -ETIMEDOUT; rc = wait_event_interruptible_timeout(chan->flushq, !chan->flushing || xdev->error, left_to_sleep); if (xdev->error) return xdev->error; if (interruptible && rc < 0) return -EINTR; } done: chan->flushed = 1; return 0; } /* request_read_anything(): Ask the FPGA for any little amount of data */ static int request_read_anything(struct xillyusb_channel *chan, char opcode) { struct xillyusb_dev *xdev = chan->xdev; unsigned int sh = chan->in_log2_element_size; int chan_num = (chan->chan_idx << 1) | 1; u32 mercy = chan->in_consumed_bytes + (2 << sh) - 1; return xillyusb_send_opcode(xdev, chan_num, opcode, mercy >> sh); } static int xillyusb_open(struct inode *inode, struct file *filp) { struct xillyusb_dev *xdev; struct xillyusb_channel *chan; struct xillyfifo *in_fifo = NULL; struct xillyusb_endpoint *out_ep = NULL; int rc; int index; mutex_lock(&kref_mutex); rc = xillybus_find_inode(inode, (void **)&xdev, &index); if (rc) { mutex_unlock(&kref_mutex); return rc; } kref_get(&xdev->kref); mutex_unlock(&kref_mutex); chan = &xdev->channels[index]; filp->private_data = chan; mutex_lock(&chan->lock); rc = -ENODEV; if (xdev->error) goto unmutex_fail; if (((filp->f_mode & FMODE_READ) && !chan->readable) || ((filp->f_mode & FMODE_WRITE) && !chan->writable)) goto unmutex_fail; if ((filp->f_flags & O_NONBLOCK) && (filp->f_mode & FMODE_READ) && chan->in_synchronous) { dev_err(xdev->dev, "open() failed: O_NONBLOCK not allowed for read on this device\n"); goto unmutex_fail; } if ((filp->f_flags & O_NONBLOCK) && (filp->f_mode & FMODE_WRITE) && chan->out_synchronous) { dev_err(xdev->dev, "open() failed: O_NONBLOCK not allowed for write on this device\n"); goto unmutex_fail; } rc = -EBUSY; if (((filp->f_mode & FMODE_READ) && chan->open_for_read) || ((filp->f_mode & FMODE_WRITE) && chan->open_for_write)) goto unmutex_fail; if (filp->f_mode & FMODE_READ) chan->open_for_read = 1; if (filp->f_mode & FMODE_WRITE) chan->open_for_write = 1; mutex_unlock(&chan->lock); if (filp->f_mode & FMODE_WRITE) { out_ep = endpoint_alloc(xdev, (chan->chan_idx + 2) | USB_DIR_OUT, bulk_out_work, BUF_SIZE_ORDER, BUFNUM); if (!out_ep) { rc = -ENOMEM; goto unopen; } rc = fifo_init(&out_ep->fifo, chan->out_log2_fifo_size); if (rc) goto late_unopen; out_ep->fill_mask = -(1 << chan->out_log2_element_size); chan->out_bytes = 0; chan->flushed = 0; /* * Sending a flush request to a previously closed stream * effectively opens it, and also waits until the command is * confirmed by the FPGA. The latter is necessary because the * data is sent through a separate BULK OUT endpoint, and the * xHCI controller is free to reorder transmissions. * * This can't go wrong unless there's a serious hardware error * (or the computer is stuck for 500 ms?) */ rc = flush_downstream(chan, XILLY_RESPONSE_TIMEOUT, false); if (rc == -ETIMEDOUT) { rc = -EIO; report_io_error(xdev, rc); } if (rc) goto late_unopen; } if (filp->f_mode & FMODE_READ) { in_fifo = kzalloc(sizeof(*in_fifo), GFP_KERNEL); if (!in_fifo) { rc = -ENOMEM; goto late_unopen; } rc = fifo_init(in_fifo, chan->in_log2_fifo_size); if (rc) { kfree(in_fifo); goto late_unopen; } } mutex_lock(&chan->lock); if (in_fifo) { chan->in_fifo = in_fifo; chan->read_data_ok = 1; } if (out_ep) chan->out_ep = out_ep; mutex_unlock(&chan->lock); if (in_fifo) { u32 in_checkpoint = 0; if (!chan->in_synchronous) in_checkpoint = in_fifo->size >> chan->in_log2_element_size; chan->in_consumed_bytes = 0; chan->poll_used = 0; chan->in_current_checkpoint = in_checkpoint; rc = xillyusb_send_opcode(xdev, (chan->chan_idx << 1) | 1, OPCODE_SET_CHECKPOINT, in_checkpoint); if (rc) /* Failure guarantees that opcode wasn't sent */ goto unfifo; /* * In non-blocking mode, request the FPGA to send any data it * has right away. Otherwise, the first read() will always * return -EAGAIN, which is OK strictly speaking, but ugly. * Checking and unrolling if this fails isn't worth the * effort -- the error is propagated to the first read() * anyhow. */ if (filp->f_flags & O_NONBLOCK) request_read_anything(chan, OPCODE_SET_PUSH); } return 0; unfifo: chan->read_data_ok = 0; safely_assign_in_fifo(chan, NULL); fifo_mem_release(in_fifo); kfree(in_fifo); if (out_ep) { mutex_lock(&chan->lock); chan->out_ep = NULL; mutex_unlock(&chan->lock); } late_unopen: if (out_ep) endpoint_dealloc(out_ep); unopen: mutex_lock(&chan->lock); if (filp->f_mode & FMODE_READ) chan->open_for_read = 0; if (filp->f_mode & FMODE_WRITE) chan->open_for_write = 0; mutex_unlock(&chan->lock); kref_put(&xdev->kref, cleanup_dev); return rc; unmutex_fail: kref_put(&xdev->kref, cleanup_dev); mutex_unlock(&chan->lock); return rc; } static ssize_t xillyusb_read(struct file *filp, char __user *userbuf, size_t count, loff_t *f_pos) { struct xillyusb_channel *chan = filp->private_data; struct xillyusb_dev *xdev = chan->xdev; struct xillyfifo *fifo = chan->in_fifo; int chan_num = (chan->chan_idx << 1) | 1; long deadline, left_to_sleep; int bytes_done = 0; bool sent_set_push = false; int rc; deadline = jiffies + 1 + XILLY_RX_TIMEOUT; rc = mutex_lock_interruptible(&chan->in_mutex); if (rc) return rc; while (1) { u32 fifo_checkpoint_bytes, complete_checkpoint_bytes; u32 complete_checkpoint, fifo_checkpoint; u32 checkpoint; s32 diff, leap; unsigned int sh = chan->in_log2_element_size; bool checkpoint_for_complete; rc = fifo_read(fifo, (__force void *)userbuf + bytes_done, count - bytes_done, xilly_copy_to_user); if (rc < 0) break; bytes_done += rc; chan->in_consumed_bytes += rc; left_to_sleep = deadline - ((long)jiffies); /* * Some 32-bit arithmetic that may wrap. Note that * complete_checkpoint is rounded up to the closest element * boundary, because the read() can't be completed otherwise. * fifo_checkpoint_bytes is rounded down, because it protects * in_fifo from overflowing. */ fifo_checkpoint_bytes = chan->in_consumed_bytes + fifo->size; complete_checkpoint_bytes = chan->in_consumed_bytes + count - bytes_done; fifo_checkpoint = fifo_checkpoint_bytes >> sh; complete_checkpoint = (complete_checkpoint_bytes + (1 << sh) - 1) >> sh; diff = (fifo_checkpoint - complete_checkpoint) << sh; if (chan->in_synchronous && diff >= 0) { checkpoint = complete_checkpoint; checkpoint_for_complete = true; } else { checkpoint = fifo_checkpoint; checkpoint_for_complete = false; } leap = (checkpoint - chan->in_current_checkpoint) << sh; /* * To prevent flooding of OPCODE_SET_CHECKPOINT commands as * data is consumed, it's issued only if it moves the * checkpoint by at least an 8th of the FIFO's size, or if * it's necessary to complete the number of bytes requested by * the read() call. * * chan->read_data_ok is checked to spare an unnecessary * submission after receiving EOF, however it's harmless if * such slips away. */ if (chan->read_data_ok && (leap > (fifo->size >> 3) || (checkpoint_for_complete && leap > 0))) { chan->in_current_checkpoint = checkpoint; rc = xillyusb_send_opcode(xdev, chan_num, OPCODE_SET_CHECKPOINT, checkpoint); if (rc) break; } if (bytes_done == count || (left_to_sleep <= 0 && bytes_done)) break; /* * Reaching here means that the FIFO was empty when * fifo_read() returned, but not necessarily right now. Error * and EOF are checked and reported only now, so that no data * that managed its way to the FIFO is lost. */ if (!READ_ONCE(chan->read_data_ok)) { /* FPGA has sent EOF */ /* Has data slipped into the FIFO since fifo_read()? */ smp_rmb(); if (READ_ONCE(fifo->fill)) continue; rc = 0; break; } if (xdev->error) { rc = xdev->error; break; } if (filp->f_flags & O_NONBLOCK) { rc = -EAGAIN; break; } if (!sent_set_push) { rc = xillyusb_send_opcode(xdev, chan_num, OPCODE_SET_PUSH, complete_checkpoint); if (rc) break; sent_set_push = true; } if (left_to_sleep > 0) { /* * Note that when xdev->error is set (e.g. when the * device is unplugged), read_data_ok turns zero and * fifo->waitq is awaken. * Therefore no special attention to xdev->error. */ rc = wait_event_interruptible_timeout (fifo->waitq, fifo->fill || !chan->read_data_ok, left_to_sleep); } else { /* bytes_done == 0 */ /* Tell FPGA to send anything it has */ rc = request_read_anything(chan, OPCODE_UPDATE_PUSH); if (rc) break; rc = wait_event_interruptible (fifo->waitq, fifo->fill || !chan->read_data_ok); } if (rc < 0) { rc = -EINTR; break; } } if (((filp->f_flags & O_NONBLOCK) || chan->poll_used) && !READ_ONCE(fifo->fill)) request_read_anything(chan, OPCODE_SET_PUSH); mutex_unlock(&chan->in_mutex); if (bytes_done) return bytes_done; return rc; } static int xillyusb_flush(struct file *filp, fl_owner_t id) { struct xillyusb_channel *chan = filp->private_data; int rc; if (!(filp->f_mode & FMODE_WRITE)) return 0; rc = mutex_lock_interruptible(&chan->out_mutex); if (rc) return rc; /* * One second's timeout on flushing. Interrupts are ignored, because if * the user pressed CTRL-C, that interrupt will still be in flight by * the time we reach here, and the opportunity to flush is lost. */ rc = flush_downstream(chan, HZ, false); mutex_unlock(&chan->out_mutex); if (rc == -ETIMEDOUT) { /* The things you do to use dev_warn() and not pr_warn() */ struct xillyusb_dev *xdev = chan->xdev; mutex_lock(&chan->lock); if (!xdev->error) dev_warn(xdev->dev, "Timed out while flushing. Output data may be lost.\n"); mutex_unlock(&chan->lock); } return rc; } static ssize_t xillyusb_write(struct file *filp, const char __user *userbuf, size_t count, loff_t *f_pos) { struct xillyusb_channel *chan = filp->private_data; struct xillyusb_dev *xdev = chan->xdev; struct xillyfifo *fifo = &chan->out_ep->fifo; int rc; rc = mutex_lock_interruptible(&chan->out_mutex); if (rc) return rc; while (1) { if (xdev->error) { rc = xdev->error; break; } if (count == 0) break; rc = fifo_write(fifo, (__force void *)userbuf, count, xilly_copy_from_user); if (rc != 0) break; if (filp->f_flags & O_NONBLOCK) { rc = -EAGAIN; break; } if (wait_event_interruptible (fifo->waitq, fifo->fill != fifo->size || xdev->error)) { rc = -EINTR; break; } } if (rc < 0) goto done; chan->out_bytes += rc; if (rc) { try_queue_bulk_out(chan->out_ep); chan->flushed = 0; } if (chan->out_synchronous) { int flush_rc = flush_downstream(chan, 0, true); if (flush_rc && !rc) rc = flush_rc; } done: mutex_unlock(&chan->out_mutex); return rc; } static int xillyusb_release(struct inode *inode, struct file *filp) { struct xillyusb_channel *chan = filp->private_data; struct xillyusb_dev *xdev = chan->xdev; int rc_read = 0, rc_write = 0; if (filp->f_mode & FMODE_READ) { struct xillyfifo *in_fifo = chan->in_fifo; rc_read = xillyusb_send_opcode(xdev, (chan->chan_idx << 1) | 1, OPCODE_CLOSE, 0); /* * If rc_read is nonzero, xdev->error indicates a global * device error. The error is reported later, so that * resources are freed. * * Looping on wait_event_interruptible() kinda breaks the idea * of being interruptible, and this should have been * wait_event(). Only it's being waken with * wake_up_interruptible() for the sake of other uses. If * there's a global device error, chan->read_data_ok is * deasserted and the wait queue is awaken, so this is covered. */ while (wait_event_interruptible(in_fifo->waitq, !chan->read_data_ok)) ; /* Empty loop */ safely_assign_in_fifo(chan, NULL); fifo_mem_release(in_fifo); kfree(in_fifo); mutex_lock(&chan->lock); chan->open_for_read = 0; mutex_unlock(&chan->lock); } if (filp->f_mode & FMODE_WRITE) { struct xillyusb_endpoint *ep = chan->out_ep; /* * chan->flushing isn't zeroed. If the pre-release flush timed * out, a cancel request will be sent before the next * OPCODE_SET_CHECKPOINT (i.e. when the file is opened again). * This is despite that the FPGA forgets about the checkpoint * request as the file closes. Still, in an exceptional race * condition, the FPGA could send an OPCODE_REACHED_CHECKPOINT * just before closing that would reach the host after the * file has re-opened. */ mutex_lock(&chan->lock); chan->out_ep = NULL; mutex_unlock(&chan->lock); endpoint_quiesce(ep); endpoint_dealloc(ep); /* See comments on rc_read above */ rc_write = xillyusb_send_opcode(xdev, chan->chan_idx << 1, OPCODE_CLOSE, 0); mutex_lock(&chan->lock); chan->open_for_write = 0; mutex_unlock(&chan->lock); } kref_put(&xdev->kref, cleanup_dev); return rc_read ? rc_read : rc_write; } /* * Xillybus' API allows device nodes to be seekable, giving the user * application access to a RAM array on the FPGA (or logic emulating it). */ static loff_t xillyusb_llseek(struct file *filp, loff_t offset, int whence) { struct xillyusb_channel *chan = filp->private_data; struct xillyusb_dev *xdev = chan->xdev; loff_t pos = filp->f_pos; int rc = 0; unsigned int log2_element_size = chan->readable ? chan->in_log2_element_size : chan->out_log2_element_size; /* * Take both mutexes not allowing interrupts, since it seems like * common applications don't expect an -EINTR here. Besides, multiple * access to a single file descriptor on seekable devices is a mess * anyhow. */ mutex_lock(&chan->out_mutex); mutex_lock(&chan->in_mutex); switch (whence) { case SEEK_SET: pos = offset; break; case SEEK_CUR: pos += offset; break; case SEEK_END: pos = offset; /* Going to the end => to the beginning */ break; default: rc = -EINVAL; goto end; } /* In any case, we must finish on an element boundary */ if (pos & ((1 << log2_element_size) - 1)) { rc = -EINVAL; goto end; } rc = xillyusb_send_opcode(xdev, chan->chan_idx << 1, OPCODE_SET_ADDR, pos >> log2_element_size); if (rc) goto end; if (chan->writable) { chan->flushed = 0; rc = flush_downstream(chan, HZ, false); } end: mutex_unlock(&chan->out_mutex); mutex_unlock(&chan->in_mutex); if (rc) /* Return error after releasing mutexes */ return rc; filp->f_pos = pos; return pos; } static __poll_t xillyusb_poll(struct file *filp, poll_table *wait) { struct xillyusb_channel *chan = filp->private_data; __poll_t mask = 0; if (chan->in_fifo) poll_wait(filp, &chan->in_fifo->waitq, wait); if (chan->out_ep) poll_wait(filp, &chan->out_ep->fifo.waitq, wait); /* * If this is the first time poll() is called, and the file is * readable, set the relevant flag. Also tell the FPGA to send all it * has, to kickstart the mechanism that ensures there's always some * data in in_fifo unless the stream is dry end-to-end. Note that the * first poll() may not return a EPOLLIN, even if there's data on the * FPGA. Rather, the data will arrive soon, and trigger the relevant * wait queue. */ if (!chan->poll_used && chan->in_fifo) { chan->poll_used = 1; request_read_anything(chan, OPCODE_SET_PUSH); } /* * poll() won't play ball regarding read() channels which * are synchronous. Allowing that will create situations where data has * been delivered at the FPGA, and users expecting select() to wake up, * which it may not. So make it never work. */ if (chan->in_fifo && !chan->in_synchronous && (READ_ONCE(chan->in_fifo->fill) || !chan->read_data_ok)) mask |= EPOLLIN | EPOLLRDNORM; if (chan->out_ep && (READ_ONCE(chan->out_ep->fifo.fill) != chan->out_ep->fifo.size)) mask |= EPOLLOUT | EPOLLWRNORM; if (chan->xdev->error) mask |= EPOLLERR; return mask; } static const struct file_operations xillyusb_fops = { .owner = THIS_MODULE, .read = xillyusb_read, .write = xillyusb_write, .open = xillyusb_open, .flush = xillyusb_flush, .release = xillyusb_release, .llseek = xillyusb_llseek, .poll = xillyusb_poll, }; static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev) { struct usb_device *udev = xdev->udev; /* Verify that device has the two fundamental bulk in/out endpoints */ if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) || usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM))) return -ENODEV; xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT, bulk_out_work, 1, 2); if (!xdev->msg_ep) return -ENOMEM; if (fifo_init(&xdev->msg_ep->fifo, 13)) /* 8 kiB */ goto dealloc; xdev->msg_ep->fill_mask = -8; /* 8 bytes granularity */ xdev->in_ep = endpoint_alloc(xdev, IN_EP_NUM | USB_DIR_IN, bulk_in_work, BUF_SIZE_ORDER, BUFNUM); if (!xdev->in_ep) goto dealloc; try_queue_bulk_in(xdev->in_ep); return 0; dealloc: endpoint_dealloc(xdev->msg_ep); /* Also frees FIFO mem if allocated */ xdev->msg_ep = NULL; return -ENOMEM; } static int setup_channels(struct xillyusb_dev *xdev, __le16 *chandesc, int num_channels) { struct usb_device *udev = xdev->udev; struct xillyusb_channel *chan, *new_channels; int i; chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; new_channels = chan; for (i = 0; i < num_channels; i++, chan++) { unsigned int in_desc = le16_to_cpu(*chandesc++); unsigned int out_desc = le16_to_cpu(*chandesc++); chan->xdev = xdev; mutex_init(&chan->in_mutex); mutex_init(&chan->out_mutex); mutex_init(&chan->lock); init_waitqueue_head(&chan->flushq); chan->chan_idx = i; if (in_desc & 0x80) { /* Entry is valid */ chan->readable = 1; chan->in_synchronous = !!(in_desc & 0x40); chan->in_seekable = !!(in_desc & 0x20); chan->in_log2_element_size = in_desc & 0x0f; chan->in_log2_fifo_size = ((in_desc >> 8) & 0x1f) + 16; } /* * A downstream channel should never exist above index 13, * as it would request a nonexistent BULK endpoint > 15. * In the peculiar case that it does, it's ignored silently. */ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */ if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, i + 2))) { dev_err(xdev->dev, "Missing BULK OUT endpoint %d\n", i + 2); kfree(new_channels); return -ENODEV; } chan->writable = 1; chan->out_synchronous = !!(out_desc & 0x40); chan->out_seekable = !!(out_desc & 0x20); chan->out_log2_element_size = out_desc & 0x0f; chan->out_log2_fifo_size = ((out_desc >> 8) & 0x1f) + 16; } } xdev->channels = new_channels; return 0; } static int xillyusb_discovery(struct usb_interface *interface) { int rc; struct xillyusb_dev *xdev = usb_get_intfdata(interface); __le16 bogus_chandesc[2]; struct xillyfifo idt_fifo; struct xillyusb_channel *chan; unsigned int idt_len, names_offset; unsigned char *idt; int num_channels; rc = xillyusb_send_opcode(xdev, ~0, OPCODE_QUIESCE, 0); if (rc) { dev_err(&interface->dev, "Failed to send quiesce request. Aborting.\n"); return rc; } /* Phase I: Set up one fake upstream channel and obtain IDT */ /* Set up a fake IDT with one async IN stream */ bogus_chandesc[0] = cpu_to_le16(0x80); bogus_chandesc[1] = cpu_to_le16(0); rc = setup_channels(xdev, bogus_chandesc, 1); if (rc) return rc; rc = fifo_init(&idt_fifo, LOG2_IDT_FIFO_SIZE); if (rc) return rc; chan = xdev->channels; chan->in_fifo = &idt_fifo; chan->read_data_ok = 1; xdev->num_channels = 1; rc = xillyusb_send_opcode(xdev, ~0, OPCODE_REQ_IDT, 0); if (rc) { dev_err(&interface->dev, "Failed to send IDT request. Aborting.\n"); goto unfifo; } rc = wait_event_interruptible_timeout(idt_fifo.waitq, !chan->read_data_ok, XILLY_RESPONSE_TIMEOUT); if (xdev->error) { rc = xdev->error; goto unfifo; } if (rc < 0) { rc = -EINTR; /* Interrupt on probe method? Interesting. */ goto unfifo; } if (chan->read_data_ok) { rc = -ETIMEDOUT; dev_err(&interface->dev, "No response from FPGA. Aborting.\n"); goto unfifo; } idt_len = READ_ONCE(idt_fifo.fill); idt = kmalloc(idt_len, GFP_KERNEL); if (!idt) { rc = -ENOMEM; goto unfifo; } fifo_read(&idt_fifo, idt, idt_len, xilly_memcpy); if (crc32_le(~0, idt, idt_len) != 0) { dev_err(&interface->dev, "IDT failed CRC check. Aborting.\n"); rc = -ENODEV; goto unidt; } if (*idt > 0x90) { dev_err(&interface->dev, "No support for IDT version 0x%02x. Maybe the xillyusb driver needs an upgrade. Aborting.\n", (int)*idt); rc = -ENODEV; goto unidt; } /* Phase II: Set up the streams as defined in IDT */ num_channels = le16_to_cpu(*((__le16 *)(idt + 1))); names_offset = 3 + num_channels * 4; idt_len -= 4; /* Exclude CRC */ if (idt_len < names_offset) { dev_err(&interface->dev, "IDT too short. This is exceptionally weird, because its CRC is OK\n"); rc = -ENODEV; goto unidt; } rc = setup_channels(xdev, (void *)idt + 3, num_channels); if (rc) goto unidt; /* * Except for wildly misbehaving hardware, or if it was disconnected * just after responding with the IDT, there is no reason for any * work item to be running now. To be sure that xdev->channels * is updated on anything that might run in parallel, flush the * device's workqueue and the wakeup work item. This rarely * does anything. */ flush_workqueue(xdev->workq); flush_work(&xdev->wakeup_workitem); xdev->num_channels = num_channels; fifo_mem_release(&idt_fifo); kfree(chan); rc = xillybus_init_chrdev(&interface->dev, &xillyusb_fops, THIS_MODULE, xdev, idt + names_offset, idt_len - names_offset, num_channels, xillyname, true); kfree(idt); return rc; unidt: kfree(idt); unfifo: safely_assign_in_fifo(chan, NULL); fifo_mem_release(&idt_fifo); return rc; } static int xillyusb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct xillyusb_dev *xdev; int rc; xdev = kzalloc(sizeof(*xdev), GFP_KERNEL); if (!xdev) return -ENOMEM; kref_init(&xdev->kref); mutex_init(&xdev->process_in_mutex); mutex_init(&xdev->msg_mutex); xdev->udev = usb_get_dev(interface_to_usbdev(interface)); xdev->dev = &interface->dev; xdev->error = 0; spin_lock_init(&xdev->error_lock); xdev->in_counter = 0; xdev->in_bytes_left = 0; xdev->workq = alloc_workqueue(xillyname, WQ_HIGHPRI, 0); if (!xdev->workq) { dev_err(&interface->dev, "Failed to allocate work queue\n"); rc = -ENOMEM; goto fail; } INIT_WORK(&xdev->wakeup_workitem, wakeup_all); usb_set_intfdata(interface, xdev); rc = xillyusb_setup_base_eps(xdev); if (rc) goto fail; rc = xillyusb_discovery(interface); if (rc) goto latefail; return 0; latefail: endpoint_quiesce(xdev->in_ep); endpoint_quiesce(xdev->msg_ep); fail: usb_set_intfdata(interface, NULL); kref_put(&xdev->kref, cleanup_dev); return rc; } static void xillyusb_disconnect(struct usb_interface *interface) { struct xillyusb_dev *xdev = usb_get_intfdata(interface); struct xillyusb_endpoint *msg_ep = xdev->msg_ep; struct xillyfifo *fifo = &msg_ep->fifo; int rc; int i; xillybus_cleanup_chrdev(xdev, &interface->dev); /* * Try to send OPCODE_QUIESCE, which will fail silently if the device * was disconnected, but makes sense on module unload. */ msg_ep->wake_on_drain = true; xillyusb_send_opcode(xdev, ~0, OPCODE_QUIESCE, 0); /* * If the device has been disconnected, sending the opcode causes * a global device error with xdev->error, if such error didn't * occur earlier. Hence timing out means that the USB link is fine, * but somehow the message wasn't sent. Should never happen. */ rc = wait_event_interruptible_timeout(fifo->waitq, msg_ep->drained || xdev->error, XILLY_RESPONSE_TIMEOUT); if (!rc) dev_err(&interface->dev, "Weird timeout condition on sending quiesce request.\n"); report_io_error(xdev, -ENODEV); /* Discourage further activity */ /* * This device driver is declared with soft_unbind set, or else * sending OPCODE_QUIESCE above would always fail. The price is * that the USB framework didn't kill outstanding URBs, so it has * to be done explicitly before returning from this call. */ for (i = 0; i < xdev->num_channels; i++) { struct xillyusb_channel *chan = &xdev->channels[i]; /* * Lock taken to prevent chan->out_ep from changing. It also * ensures xillyusb_open() and xillyusb_flush() don't access * xdev->dev after being nullified below. */ mutex_lock(&chan->lock); if (chan->out_ep) endpoint_quiesce(chan->out_ep); mutex_unlock(&chan->lock); } endpoint_quiesce(xdev->in_ep); endpoint_quiesce(xdev->msg_ep); usb_set_intfdata(interface, NULL); xdev->dev = NULL; mutex_lock(&kref_mutex); kref_put(&xdev->kref, cleanup_dev); mutex_unlock(&kref_mutex); } static struct usb_driver xillyusb_driver = { .name = xillyname, .id_table = xillyusb_table, .probe = xillyusb_probe, .disconnect = xillyusb_disconnect, .soft_unbind = 1, }; static int __init xillyusb_init(void) { int rc = 0; wakeup_wq = alloc_workqueue(xillyname, 0, 0); if (!wakeup_wq) return -ENOMEM; if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT) fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT; else fifo_buf_order = 0; rc = usb_register(&xillyusb_driver); if (rc) destroy_workqueue(wakeup_wq); return rc; } static void __exit xillyusb_exit(void) { usb_deregister(&xillyusb_driver); destroy_workqueue(wakeup_wq); } module_init(xillyusb_init); module_exit(xillyusb_exit);
231 1526 66 5114 115 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2021, Google LLC. * Pasha Tatashin <pasha.tatashin@soleen.com> */ #ifndef __LINUX_PAGE_TABLE_CHECK_H #define __LINUX_PAGE_TABLE_CHECK_H #ifdef CONFIG_PAGE_TABLE_CHECK #include <linux/jump_label.h> extern struct static_key_true page_table_check_disabled; extern struct page_ext_operations page_table_check_ops; void __page_table_check_zero(struct page *page, unsigned int order); void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte); void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd); void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud); void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, unsigned int nr); void __page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd, unsigned int nr); void __page_table_check_puds_set(struct mm_struct *mm, pud_t *pudp, pud_t pud, unsigned int nr); void __page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd); static inline void page_table_check_alloc(struct page *page, unsigned int order) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_zero(page, order); } static inline void page_table_check_free(struct page *page, unsigned int order) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_zero(page, order); } static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_pte_clear(mm, pte); } static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_pmd_clear(mm, pmd); } static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_pud_clear(mm, pud); } static inline void page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_ptes_set(mm, ptep, pte, nr); } static inline void page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_pmds_set(mm, pmdp, pmd, nr); } static inline void page_table_check_puds_set(struct mm_struct *mm, pud_t *pudp, pud_t pud, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_puds_set(mm, pudp, pud, nr); } static inline void page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { if (static_branch_likely(&page_table_check_disabled)) return; __page_table_check_pte_clear_range(mm, addr, pmd); } #else static inline void page_table_check_alloc(struct page *page, unsigned int order) { } static inline void page_table_check_free(struct page *page, unsigned int order) { } static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { } static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { } static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { } static inline void page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, unsigned int nr) { } static inline void page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd, unsigned int nr) { } static inline void page_table_check_puds_set(struct mm_struct *mm, pud_t *pudp, pud_t pud, unsigned int nr) { } static inline void page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { } #endif /* CONFIG_PAGE_TABLE_CHECK */ #define page_table_check_pmd_set(mm, pmdp, pmd) page_table_check_pmds_set(mm, pmdp, pmd, 1) #define page_table_check_pud_set(mm, pudp, pud) page_table_check_puds_set(mm, pudp, pud, 1) #endif /* __LINUX_PAGE_TABLE_CHECK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 /* SPDX-License-Identifier: GPL-2.0 */ /* * Routines to manage notifier chains for passing status changes to any * interested routines. We need this instead of hard coded call lists so * that modules can poke their nose into the innards. The network devices * needed them so here they are for the rest of you. * * Alan Cox <Alan.Cox@linux.org> */ #ifndef _LINUX_NOTIFIER_H #define _LINUX_NOTIFIER_H #include <linux/errno.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/srcu.h> /* * Notifier chains are of four types: * * Atomic notifier chains: Chain callbacks run in interrupt/atomic * context. Callouts are not allowed to block. * Blocking notifier chains: Chain callbacks run in process context. * Callouts are allowed to block. * Raw notifier chains: There are no restrictions on callbacks, * registration, or unregistration. All locking and protection * must be provided by the caller. * SRCU notifier chains: A variant of blocking notifier chains, with * the same restrictions. * * atomic_notifier_chain_register() may be called from an atomic context, * but blocking_notifier_chain_register() and srcu_notifier_chain_register() * must be called from a process context. Ditto for the corresponding * _unregister() routines. * * atomic_notifier_chain_unregister(), blocking_notifier_chain_unregister(), * and srcu_notifier_chain_unregister() _must not_ be called from within * the call chain. * * SRCU notifier chains are an alternative form of blocking notifier chains. * They use SRCU (Sleepable Read-Copy Update) instead of rw-semaphores for * protection of the chain links. This means there is _very_ low overhead * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. * As compensation, srcu_notifier_chain_unregister() is rather expensive. * SRCU notifier chains should be used when the chain will be called very * often but notifier_blocks will seldom be removed. */ struct notifier_block; typedef int (*notifier_fn_t)(struct notifier_block *nb, unsigned long action, void *data); struct notifier_block { notifier_fn_t notifier_call; struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; struct notifier_block __rcu *head; }; struct raw_notifier_head { struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_usage srcuu; struct srcu_struct srcu; struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ spin_lock_init(&(name)->lock); \ (name)->head = NULL; \ } while (0) #define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \ init_rwsem(&(name)->rwsem); \ (name)->head = NULL; \ } while (0) #define RAW_INIT_NOTIFIER_HEAD(name) do { \ (name)->head = NULL; \ } while (0) /* srcu_notifier_heads must be cleaned up dynamically */ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define srcu_cleanup_notifier_head(name) \ cleanup_srcu_struct(&(name)->srcu); #define ATOMIC_NOTIFIER_INIT(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .head = NULL } #define BLOCKING_NOTIFIER_INIT(name) { \ .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ .head = NULL } #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } #define SRCU_NOTIFIER_INIT(name, pcpu) \ { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .head = NULL, \ .srcuu = __SRCU_USAGE_INIT(name.srcuu), \ .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ } #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ ATOMIC_NOTIFIER_INIT(name) #define BLOCKING_NOTIFIER_HEAD(name) \ struct blocking_notifier_head name = \ BLOCKING_NOTIFIER_INIT(name) #define RAW_NOTIFIER_HEAD(name) \ struct raw_notifier_head name = \ RAW_NOTIFIER_INIT(name) #ifdef CONFIG_TREE_SRCU #define _SRCU_NOTIFIER_HEAD(name, mod) \ static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \ mod struct srcu_notifier_head name = \ SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) #else #define _SRCU_NOTIFIER_HEAD(name, mod) \ mod struct srcu_notifier_head name = \ SRCU_NOTIFIER_INIT(name, name) #endif #define SRCU_NOTIFIER_HEAD(name) \ _SRCU_NOTIFIER_HEAD(name, /* not static */) #define SRCU_NOTIFIER_HEAD_STATIC(name) \ _SRCU_NOTIFIER_HEAD(name, static) #ifdef __KERNEL__ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_register(struct blocking_notifier_head *nh, struct notifier_block *nb); extern int raw_notifier_chain_register(struct raw_notifier_head *nh, struct notifier_block *nb); extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); extern int atomic_notifier_chain_register_unique_prio( struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_register_unique_prio( struct blocking_notifier_head *nh, struct notifier_block *nb); extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, struct notifier_block *nb); extern int raw_notifier_chain_unregister(struct raw_notifier_head *nh, struct notifier_block *nb); extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, struct notifier_block *nb); extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, unsigned long val, void *v); extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, unsigned long val, void *v); extern int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v); extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, unsigned long val, void *v); extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); extern bool atomic_notifier_call_chain_is_empty(struct atomic_notifier_head *nh); #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ #define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */ /* * Clean way to return from the notifier and stop further calls. */ #define NOTIFY_STOP (NOTIFY_OK|NOTIFY_STOP_MASK) /* Encapsulate (negative) errno value (in particular, NOTIFY_BAD <=> EPERM). */ static inline int notifier_from_errno(int err) { if (err) return NOTIFY_STOP_MASK | (NOTIFY_OK - err); return NOTIFY_OK; } /* Restore (negative) errno value from notify return value. */ static inline int notifier_to_errno(int ret) { ret &= ~NOTIFY_STOP_MASK; return ret > NOTIFY_OK ? NOTIFY_OK - ret : 0; } /* * Declared notifiers so far. I can imagine quite a few more chains * over time (eg laptop power reset chains, reboot chain (to clean * device units up), device [un]mount chain, module load/unload chain, * low memory chain, screenblank chain (for plug in modular screenblankers) * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... */ /* CPU notfiers are defined in include/linux/cpu.h. */ /* netdevice notifiers are defined in include/linux/netdevice.h */ /* reboot notifiers are defined in include/linux/reboot.h. */ /* Hibernation and suspend events are defined in include/linux/suspend.h. */ /* Virtual Terminal events are defined in include/linux/vt.h. */ #define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ /* Console keyboard events. * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and * KBD_KEYSYM. */ #define KBD_KEYCODE 0x0001 /* Keyboard keycode, called before any other */ #define KBD_UNBOUND_KEYCODE 0x0002 /* Keyboard keycode which is not bound to any other */ #define KBD_UNICODE 0x0003 /* Keyboard unicode */ #define KBD_KEYSYM 0x0004 /* Keyboard keysym */ #define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */ #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */
118 117 116 3 3 1 1 115 3 3 1 54 4 1 53 113 112 118 6 108 5 2 112 113 106 5 124 123 124 123 124 123 124 122 122 6 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2005,2006,2007,2008 IBM Corporation * * Authors: * Mimi Zohar <zohar@us.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * * File: ima_crypto.c * Calculates md5/sha1 file hash, template hash, boot-aggreate hash */ #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/ratelimit.h> #include <linux/file.h> #include <linux/crypto.h> #include <linux/scatterlist.h> #include <linux/err.h> #include <linux/slab.h> #include <crypto/hash.h> #include "ima.h" /* minimum file size for ahash use */ static unsigned long ima_ahash_minsize; module_param_named(ahash_minsize, ima_ahash_minsize, ulong, 0644); MODULE_PARM_DESC(ahash_minsize, "Minimum file size for ahash use"); /* default is 0 - 1 page. */ static int ima_maxorder; static unsigned int ima_bufsize = PAGE_SIZE; static int param_set_bufsize(const char *val, const struct kernel_param *kp) { unsigned long long size; int order; size = memparse(val, NULL); order = get_order(size); if (order > MAX_PAGE_ORDER) return -EINVAL; ima_maxorder = order; ima_bufsize = PAGE_SIZE << order; return 0; } static const struct kernel_param_ops param_ops_bufsize = { .set = param_set_bufsize, .get = param_get_uint, }; #define param_check_bufsize(name, p) __param_check(name, p, unsigned int) module_param_named(ahash_bufsize, ima_bufsize, bufsize, 0644); MODULE_PARM_DESC(ahash_bufsize, "Maximum ahash buffer size"); static struct crypto_shash *ima_shash_tfm; static struct crypto_ahash *ima_ahash_tfm; int ima_sha1_idx __ro_after_init; int ima_hash_algo_idx __ro_after_init; /* * Additional number of slots reserved, as needed, for SHA1 * and IMA default algo. */ int ima_extra_slots __ro_after_init; struct ima_algo_desc *ima_algo_array __ro_after_init; static int __init ima_init_ima_crypto(void) { long rc; ima_shash_tfm = crypto_alloc_shash(hash_algo_name[ima_hash_algo], 0, 0); if (IS_ERR(ima_shash_tfm)) { rc = PTR_ERR(ima_shash_tfm); pr_err("Can not allocate %s (reason: %ld)\n", hash_algo_name[ima_hash_algo], rc); return rc; } pr_info("Allocated hash algorithm: %s\n", hash_algo_name[ima_hash_algo]); return 0; } static struct crypto_shash *ima_alloc_tfm(enum hash_algo algo) { struct crypto_shash *tfm = ima_shash_tfm; int rc, i; if (algo < 0 || algo >= HASH_ALGO__LAST) algo = ima_hash_algo; if (algo == ima_hash_algo) return tfm; for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) if (ima_algo_array[i].tfm && ima_algo_array[i].algo == algo) return ima_algo_array[i].tfm; tfm = crypto_alloc_shash(hash_algo_name[algo], 0, 0); if (IS_ERR(tfm)) { rc = PTR_ERR(tfm); pr_err("Can not allocate %s (reason: %d)\n", hash_algo_name[algo], rc); } return tfm; } int __init ima_init_crypto(void) { enum hash_algo algo; long rc; int i; rc = ima_init_ima_crypto(); if (rc) return rc; ima_sha1_idx = -1; ima_hash_algo_idx = -1; for (i = 0; i < NR_BANKS(ima_tpm_chip); i++) { algo = ima_tpm_chip->allocated_banks[i].crypto_id; if (algo == HASH_ALGO_SHA1) ima_sha1_idx = i; if (algo == ima_hash_algo) ima_hash_algo_idx = i; } if (ima_sha1_idx < 0) { ima_sha1_idx = NR_BANKS(ima_tpm_chip) + ima_extra_slots++; if (ima_hash_algo == HASH_ALGO_SHA1) ima_hash_algo_idx = ima_sha1_idx; } if (ima_hash_algo_idx < 0) ima_hash_algo_idx = NR_BANKS(ima_tpm_chip) + ima_extra_slots++; ima_algo_array = kcalloc(NR_BANKS(ima_tpm_chip) + ima_extra_slots, sizeof(*ima_algo_array), GFP_KERNEL); if (!ima_algo_array) { rc = -ENOMEM; goto out; } for (i = 0; i < NR_BANKS(ima_tpm_chip); i++) { algo = ima_tpm_chip->allocated_banks[i].crypto_id; ima_algo_array[i].algo = algo; /* unknown TPM algorithm */ if (algo == HASH_ALGO__LAST) continue; if (algo == ima_hash_algo) { ima_algo_array[i].tfm = ima_shash_tfm; continue; } ima_algo_array[i].tfm = ima_alloc_tfm(algo); if (IS_ERR(ima_algo_array[i].tfm)) { if (algo == HASH_ALGO_SHA1) { rc = PTR_ERR(ima_algo_array[i].tfm); ima_algo_array[i].tfm = NULL; goto out_array; } ima_algo_array[i].tfm = NULL; } } if (ima_sha1_idx >= NR_BANKS(ima_tpm_chip)) { if (ima_hash_algo == HASH_ALGO_SHA1) { ima_algo_array[ima_sha1_idx].tfm = ima_shash_tfm; } else { ima_algo_array[ima_sha1_idx].tfm = ima_alloc_tfm(HASH_ALGO_SHA1); if (IS_ERR(ima_algo_array[ima_sha1_idx].tfm)) { rc = PTR_ERR(ima_algo_array[ima_sha1_idx].tfm); goto out_array; } } ima_algo_array[ima_sha1_idx].algo = HASH_ALGO_SHA1; } if (ima_hash_algo_idx >= NR_BANKS(ima_tpm_chip) && ima_hash_algo_idx != ima_sha1_idx) { ima_algo_array[ima_hash_algo_idx].tfm = ima_shash_tfm; ima_algo_array[ima_hash_algo_idx].algo = ima_hash_algo; } return 0; out_array: for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) { if (!ima_algo_array[i].tfm || ima_algo_array[i].tfm == ima_shash_tfm) continue; crypto_free_shash(ima_algo_array[i].tfm); } kfree(ima_algo_array); out: crypto_free_shash(ima_shash_tfm); return rc; } static void ima_free_tfm(struct crypto_shash *tfm) { int i; if (tfm == ima_shash_tfm) return; for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) if (ima_algo_array[i].tfm == tfm) return; crypto_free_shash(tfm); } /** * ima_alloc_pages() - Allocate contiguous pages. * @max_size: Maximum amount of memory to allocate. * @allocated_size: Returned size of actual allocation. * @last_warn: Should the min_size allocation warn or not. * * Tries to do opportunistic allocation for memory first trying to allocate * max_size amount of memory and then splitting that until zero order is * reached. Allocation is tried without generating allocation warnings unless * last_warn is set. Last_warn set affects only last allocation of zero order. * * By default, ima_maxorder is 0 and it is equivalent to kmalloc(GFP_KERNEL) * * Return pointer to allocated memory, or NULL on failure. */ static void *ima_alloc_pages(loff_t max_size, size_t *allocated_size, int last_warn) { void *ptr; int order = ima_maxorder; gfp_t gfp_mask = __GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY; if (order) order = min(get_order(max_size), order); for (; order; order--) { ptr = (void *)__get_free_pages(gfp_mask, order); if (ptr) { *allocated_size = PAGE_SIZE << order; return ptr; } } /* order is zero - one page */ gfp_mask = GFP_KERNEL; if (!last_warn) gfp_mask |= __GFP_NOWARN; ptr = (void *)__get_free_pages(gfp_mask, 0); if (ptr) { *allocated_size = PAGE_SIZE; return ptr; } *allocated_size = 0; return NULL; } /** * ima_free_pages() - Free pages allocated by ima_alloc_pages(). * @ptr: Pointer to allocated pages. * @size: Size of allocated buffer. */ static void ima_free_pages(void *ptr, size_t size) { if (!ptr) return; free_pages((unsigned long)ptr, get_order(size)); } static struct crypto_ahash *ima_alloc_atfm(enum hash_algo algo) { struct crypto_ahash *tfm = ima_ahash_tfm; int rc; if (algo < 0 || algo >= HASH_ALGO__LAST) algo = ima_hash_algo; if (algo != ima_hash_algo || !tfm) { tfm = crypto_alloc_ahash(hash_algo_name[algo], 0, 0); if (!IS_ERR(tfm)) { if (algo == ima_hash_algo) ima_ahash_tfm = tfm; } else { rc = PTR_ERR(tfm); pr_err("Can not allocate %s (reason: %d)\n", hash_algo_name[algo], rc); } } return tfm; } static void ima_free_atfm(struct crypto_ahash *tfm) { if (tfm != ima_ahash_tfm) crypto_free_ahash(tfm); } static inline int ahash_wait(int err, struct crypto_wait *wait) { err = crypto_wait_req(err, wait); if (err) pr_crit_ratelimited("ahash calculation failed: err: %d\n", err); return err; } static int ima_calc_file_hash_atfm(struct file *file, struct ima_digest_data *hash, struct crypto_ahash *tfm) { loff_t i_size, offset; char *rbuf[2] = { NULL, }; int rc, rbuf_len, active = 0, ahash_rc = 0; struct ahash_request *req; struct scatterlist sg[1]; struct crypto_wait wait; size_t rbuf_size[2]; hash->length = crypto_ahash_digestsize(tfm); req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) return -ENOMEM; crypto_init_wait(&wait); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); rc = ahash_wait(crypto_ahash_init(req), &wait); if (rc) goto out1; i_size = i_size_read(file_inode(file)); if (i_size == 0) goto out2; /* * Try to allocate maximum size of memory. * Fail if even a single page cannot be allocated. */ rbuf[0] = ima_alloc_pages(i_size, &rbuf_size[0], 1); if (!rbuf[0]) { rc = -ENOMEM; goto out1; } /* Only allocate one buffer if that is enough. */ if (i_size > rbuf_size[0]) { /* * Try to allocate secondary buffer. If that fails fallback to * using single buffering. Use previous memory allocation size * as baseline for possible allocation size. */ rbuf[1] = ima_alloc_pages(i_size - rbuf_size[0], &rbuf_size[1], 0); } for (offset = 0; offset < i_size; offset += rbuf_len) { if (!rbuf[1] && offset) { /* Not using two buffers, and it is not the first * read/request, wait for the completion of the * previous ahash_update() request. */ rc = ahash_wait(ahash_rc, &wait); if (rc) goto out3; } /* read buffer */ rbuf_len = min_t(loff_t, i_size - offset, rbuf_size[active]); rc = integrity_kernel_read(file, offset, rbuf[active], rbuf_len); if (rc != rbuf_len) { if (rc >= 0) rc = -EINVAL; /* * Forward current rc, do not overwrite with return value * from ahash_wait() */ ahash_wait(ahash_rc, &wait); goto out3; } if (rbuf[1] && offset) { /* Using two buffers, and it is not the first * read/request, wait for the completion of the * previous ahash_update() request. */ rc = ahash_wait(ahash_rc, &wait); if (rc) goto out3; } sg_init_one(&sg[0], rbuf[active], rbuf_len); ahash_request_set_crypt(req, sg, NULL, rbuf_len); ahash_rc = crypto_ahash_update(req); if (rbuf[1]) active = !active; /* swap buffers, if we use two */ } /* wait for the last update request to complete */ rc = ahash_wait(ahash_rc, &wait); out3: ima_free_pages(rbuf[0], rbuf_size[0]); ima_free_pages(rbuf[1], rbuf_size[1]); out2: if (!rc) { ahash_request_set_crypt(req, NULL, hash->digest, 0); rc = ahash_wait(crypto_ahash_final(req), &wait); } out1: ahash_request_free(req); return rc; } static int ima_calc_file_ahash(struct file *file, struct ima_digest_data *hash) { struct crypto_ahash *tfm; int rc; tfm = ima_alloc_atfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = ima_calc_file_hash_atfm(file, hash, tfm); ima_free_atfm(tfm); return rc; } static int ima_calc_file_hash_tfm(struct file *file, struct ima_digest_data *hash, struct crypto_shash *tfm) { loff_t i_size, offset = 0; char *rbuf; int rc; SHASH_DESC_ON_STACK(shash, tfm); shash->tfm = tfm; hash->length = crypto_shash_digestsize(tfm); rc = crypto_shash_init(shash); if (rc != 0) return rc; i_size = i_size_read(file_inode(file)); if (i_size == 0) goto out; rbuf = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!rbuf) return -ENOMEM; while (offset < i_size) { int rbuf_len; rbuf_len = integrity_kernel_read(file, offset, rbuf, PAGE_SIZE); if (rbuf_len < 0) { rc = rbuf_len; break; } if (rbuf_len == 0) { /* unexpected EOF */ rc = -EINVAL; break; } offset += rbuf_len; rc = crypto_shash_update(shash, rbuf, rbuf_len); if (rc) break; } kfree(rbuf); out: if (!rc) rc = crypto_shash_final(shash, hash->digest); return rc; } static int ima_calc_file_shash(struct file *file, struct ima_digest_data *hash) { struct crypto_shash *tfm; int rc; tfm = ima_alloc_tfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = ima_calc_file_hash_tfm(file, hash, tfm); ima_free_tfm(tfm); return rc; } /* * ima_calc_file_hash - calculate file hash * * Asynchronous hash (ahash) allows using HW acceleration for calculating * a hash. ahash performance varies for different data sizes on different * crypto accelerators. shash performance might be better for smaller files. * The 'ima.ahash_minsize' module parameter allows specifying the best * minimum file size for using ahash on the system. * * If the ima.ahash_minsize parameter is not specified, this function uses * shash for the hash calculation. If ahash fails, it falls back to using * shash. */ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) { loff_t i_size; int rc; struct file *f = file; bool new_file_instance = false; /* * For consistency, fail file's opened with the O_DIRECT flag on * filesystems mounted with/without DAX option. */ if (file->f_flags & O_DIRECT) { hash->length = hash_digest_size[ima_hash_algo]; hash->algo = ima_hash_algo; return -EINVAL; } /* Open a new file instance in O_RDONLY if we cannot read */ if (!(file->f_mode & FMODE_READ)) { int flags = file->f_flags & ~(O_WRONLY | O_APPEND | O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL); flags |= O_RDONLY; f = dentry_open(&file->f_path, flags, file->f_cred); if (IS_ERR(f)) return PTR_ERR(f); new_file_instance = true; } i_size = i_size_read(file_inode(f)); if (ima_ahash_minsize && i_size >= ima_ahash_minsize) { rc = ima_calc_file_ahash(f, hash); if (!rc) goto out; } rc = ima_calc_file_shash(f, hash); out: if (new_file_instance) fput(f); return rc; } /* * Calculate the hash of template data */ static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data, struct ima_template_entry *entry, int tfm_idx) { SHASH_DESC_ON_STACK(shash, ima_algo_array[tfm_idx].tfm); struct ima_template_desc *td = entry->template_desc; int num_fields = entry->template_desc->num_fields; int rc, i; shash->tfm = ima_algo_array[tfm_idx].tfm; rc = crypto_shash_init(shash); if (rc != 0) return rc; for (i = 0; i < num_fields; i++) { u8 buffer[IMA_EVENT_NAME_LEN_MAX + 1] = { 0 }; u8 *data_to_hash = field_data[i].data; u32 datalen = field_data[i].len; u32 datalen_to_hash = !ima_canonical_fmt ? datalen : (__force u32)cpu_to_le32(datalen); if (strcmp(td->name, IMA_TEMPLATE_IMA_NAME) != 0) { rc = crypto_shash_update(shash, (const u8 *) &datalen_to_hash, sizeof(datalen_to_hash)); if (rc) break; } else if (strcmp(td->fields[i]->field_id, "n") == 0) { memcpy(buffer, data_to_hash, datalen); data_to_hash = buffer; datalen = IMA_EVENT_NAME_LEN_MAX + 1; } rc = crypto_shash_update(shash, data_to_hash, datalen); if (rc) break; } if (!rc) rc = crypto_shash_final(shash, entry->digests[tfm_idx].digest); return rc; } int ima_calc_field_array_hash(struct ima_field_data *field_data, struct ima_template_entry *entry) { u16 alg_id; int rc, i; rc = ima_calc_field_array_hash_tfm(field_data, entry, ima_sha1_idx); if (rc) return rc; entry->digests[ima_sha1_idx].alg_id = TPM_ALG_SHA1; for (i = 0; i < NR_BANKS(ima_tpm_chip) + ima_extra_slots; i++) { if (i == ima_sha1_idx) continue; if (i < NR_BANKS(ima_tpm_chip)) { alg_id = ima_tpm_chip->allocated_banks[i].alg_id; entry->digests[i].alg_id = alg_id; } /* for unmapped TPM algorithms digest is still a padded SHA1 */ if (!ima_algo_array[i].tfm) { memcpy(entry->digests[i].digest, entry->digests[ima_sha1_idx].digest, TPM_DIGEST_SIZE); continue; } rc = ima_calc_field_array_hash_tfm(field_data, entry, i); if (rc) return rc; } return rc; } static int calc_buffer_ahash_atfm(const void *buf, loff_t len, struct ima_digest_data *hash, struct crypto_ahash *tfm) { struct ahash_request *req; struct scatterlist sg; struct crypto_wait wait; int rc, ahash_rc = 0; hash->length = crypto_ahash_digestsize(tfm); req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) return -ENOMEM; crypto_init_wait(&wait); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); rc = ahash_wait(crypto_ahash_init(req), &wait); if (rc) goto out; sg_init_one(&sg, buf, len); ahash_request_set_crypt(req, &sg, NULL, len); ahash_rc = crypto_ahash_update(req); /* wait for the update request to complete */ rc = ahash_wait(ahash_rc, &wait); if (!rc) { ahash_request_set_crypt(req, NULL, hash->digest, 0); rc = ahash_wait(crypto_ahash_final(req), &wait); } out: ahash_request_free(req); return rc; } static int calc_buffer_ahash(const void *buf, loff_t len, struct ima_digest_data *hash) { struct crypto_ahash *tfm; int rc; tfm = ima_alloc_atfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = calc_buffer_ahash_atfm(buf, len, hash, tfm); ima_free_atfm(tfm); return rc; } static int calc_buffer_shash_tfm(const void *buf, loff_t size, struct ima_digest_data *hash, struct crypto_shash *tfm) { SHASH_DESC_ON_STACK(shash, tfm); unsigned int len; int rc; shash->tfm = tfm; hash->length = crypto_shash_digestsize(tfm); rc = crypto_shash_init(shash); if (rc != 0) return rc; while (size) { len = size < PAGE_SIZE ? size : PAGE_SIZE; rc = crypto_shash_update(shash, buf, len); if (rc) break; buf += len; size -= len; } if (!rc) rc = crypto_shash_final(shash, hash->digest); return rc; } static int calc_buffer_shash(const void *buf, loff_t len, struct ima_digest_data *hash) { struct crypto_shash *tfm; int rc; tfm = ima_alloc_tfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); rc = calc_buffer_shash_tfm(buf, len, hash, tfm); ima_free_tfm(tfm); return rc; } int ima_calc_buffer_hash(const void *buf, loff_t len, struct ima_digest_data *hash) { int rc; if (ima_ahash_minsize && len >= ima_ahash_minsize) { rc = calc_buffer_ahash(buf, len, hash); if (!rc) return 0; } return calc_buffer_shash(buf, len, hash); } static void ima_pcrread(u32 idx, struct tpm_digest *d) { if (!ima_tpm_chip) return; if (tpm_pcr_read(ima_tpm_chip, idx, d) != 0) pr_err("Error Communicating to TPM chip\n"); } /* * The boot_aggregate is a cumulative hash over TPM registers 0 - 7. With * TPM 1.2 the boot_aggregate was based on reading the SHA1 PCRs, but with * TPM 2.0 hash agility, TPM chips could support multiple TPM PCR banks, * allowing firmware to configure and enable different banks. * * Knowing which TPM bank is read to calculate the boot_aggregate digest * needs to be conveyed to a verifier. For this reason, use the same * hash algorithm for reading the TPM PCRs as for calculating the boot * aggregate digest as stored in the measurement list. */ static int ima_calc_boot_aggregate_tfm(char *digest, u16 alg_id, struct crypto_shash *tfm) { struct tpm_digest d = { .alg_id = alg_id, .digest = {0} }; int rc; u32 i; SHASH_DESC_ON_STACK(shash, tfm); shash->tfm = tfm; pr_devel("calculating the boot-aggregate based on TPM bank: %04x\n", d.alg_id); rc = crypto_shash_init(shash); if (rc != 0) return rc; /* cumulative digest over TPM registers 0-7 */ for (i = TPM_PCR0; i < TPM_PCR8; i++) { ima_pcrread(i, &d); /* now accumulate with current aggregate */ rc = crypto_shash_update(shash, d.digest, crypto_shash_digestsize(tfm)); if (rc != 0) return rc; } /* * Extend cumulative digest over TPM registers 8-9, which contain * measurement for the kernel command line (reg. 8) and image (reg. 9) * in a typical PCR allocation. Registers 8-9 are only included in * non-SHA1 boot_aggregate digests to avoid ambiguity. */ if (alg_id != TPM_ALG_SHA1) { for (i = TPM_PCR8; i < TPM_PCR10; i++) { ima_pcrread(i, &d); rc = crypto_shash_update(shash, d.digest, crypto_shash_digestsize(tfm)); } } if (!rc) crypto_shash_final(shash, digest); return rc; } int ima_calc_boot_aggregate(struct ima_digest_data *hash) { struct crypto_shash *tfm; u16 crypto_id, alg_id; int rc, i, bank_idx = -1; for (i = 0; i < ima_tpm_chip->nr_allocated_banks; i++) { crypto_id = ima_tpm_chip->allocated_banks[i].crypto_id; if (crypto_id == hash->algo) { bank_idx = i; break; } if (crypto_id == HASH_ALGO_SHA256) bank_idx = i; if (bank_idx == -1 && crypto_id == HASH_ALGO_SHA1) bank_idx = i; } if (bank_idx == -1) { pr_err("No suitable TPM algorithm for boot aggregate\n"); return 0; } hash->algo = ima_tpm_chip->allocated_banks[bank_idx].crypto_id; tfm = ima_alloc_tfm(hash->algo); if (IS_ERR(tfm)) return PTR_ERR(tfm); hash->length = crypto_shash_digestsize(tfm); alg_id = ima_tpm_chip->allocated_banks[bank_idx].alg_id; rc = ima_calc_boot_aggregate_tfm(hash->digest, alg_id, tfm); ima_free_tfm(tfm); return rc; }
3 3 1 1 1 1 3 1 1 1 6 2 2 2 2 2 2 1 2 2 1 1 1 1 1 1 1 1 3 1 2 1 1 1 1 1 1 1 6 6 7 1 6 2 4 2 6 1 5 4 2 6 1 6 1 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 // SPDX-License-Identifier: GPL-2.0 /* * Native support for the I/O-Warrior USB devices * * Copyright (c) 2003-2005, 2020 Code Mercenaries GmbH * written by Christian Lucht <lucht@codemercs.com> and * Christoph Jung <jung@codemercs.com> * * based on * usb-skeleton.c by Greg Kroah-Hartman <greg@kroah.com> * brlvger.c by Stephane Dalton <sdalton@videotron.ca> * and Stephane Doyon <s.doyon@videotron.ca> * * Released under the GPLv2. */ #include <linux/module.h> #include <linux/usb.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/usb/iowarrior.h> #define DRIVER_AUTHOR "Christian Lucht <lucht@codemercs.com>" #define DRIVER_DESC "USB IO-Warrior driver" #define USB_VENDOR_ID_CODEMERCS 1984 /* low speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW40 0x1500 #define USB_DEVICE_ID_CODEMERCS_IOW24 0x1501 #define USB_DEVICE_ID_CODEMERCS_IOWPV1 0x1511 #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 /* fuller speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 #define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 #define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a #define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define IOWARRIOR_MINOR_BASE 0 #else #define IOWARRIOR_MINOR_BASE 208 // SKELETON_MINOR_BASE 192 + 16, not official yet #endif /* interrupt input queue size */ #define MAX_INTERRUPT_BUFFER 16 /* maximum number of urbs that are submitted for writes at the same time, this applies to the IOWarrior56 only! IOWarrior24 and IOWarrior40 use synchronous usb_control_msg calls. */ #define MAX_WRITES_IN_FLIGHT 4 MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); static struct usb_driver iowarrior_driver; /*--------------*/ /* data */ /*--------------*/ /* Structure to hold all of our device specific stuff */ struct iowarrior { struct mutex mutex; /* locks this structure */ struct usb_device *udev; /* save off the usb device pointer */ struct usb_interface *interface; /* the interface for this device */ unsigned char minor; /* the starting minor number for this device */ struct usb_endpoint_descriptor *int_out_endpoint; /* endpoint for reading (needed for IOW56 only) */ struct usb_endpoint_descriptor *int_in_endpoint; /* endpoint for reading */ struct urb *int_in_urb; /* the urb for reading data */ unsigned char *int_in_buffer; /* buffer for data to be read */ unsigned char serial_number; /* to detect lost packages */ unsigned char *read_queue; /* size is MAX_INTERRUPT_BUFFER * packet size */ wait_queue_head_t read_wait; wait_queue_head_t write_wait; /* wait-queue for writing to the device */ atomic_t write_busy; /* number of write-urbs submitted */ atomic_t read_idx; atomic_t intr_idx; atomic_t overflow_flag; /* signals an index 'rollover' */ int present; /* this is 1 as long as the device is connected */ int opened; /* this is 1 if the device is currently open */ char chip_serial[9]; /* the serial number string of the chip connected */ int report_size; /* number of bytes in a report */ u16 product_id; struct usb_anchor submitted; }; /*--------------*/ /* globals */ /*--------------*/ #define USB_REQ_GET_REPORT 0x01 //#if 0 static int usb_get_report(struct usb_device *dev, struct usb_host_interface *inter, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_GET_REPORT, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, inter->desc.bInterfaceNumber, buf, size, USB_CTRL_GET_TIMEOUT); } //#endif #define USB_REQ_SET_REPORT 0x09 static int usb_set_report(struct usb_interface *intf, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(interface_to_usbdev(intf), usb_sndctrlpipe(interface_to_usbdev(intf), 0), USB_REQ_SET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, intf->cur_altsetting->desc.bInterfaceNumber, buf, size, 1000); } /*---------------------*/ /* driver registration */ /*---------------------*/ /* table of devices that work with this driver */ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW40)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); /* * USB callback handler for reading data */ static void iowarrior_callback(struct urb *urb) { struct iowarrior *dev = urb->context; int intr_idx; int read_idx; int aux_idx; int offset; int status = urb->status; int retval; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: return; default: goto exit; } intr_idx = atomic_read(&dev->intr_idx); /* aux_idx become previous intr_idx */ aux_idx = (intr_idx == 0) ? (MAX_INTERRUPT_BUFFER - 1) : (intr_idx - 1); read_idx = atomic_read(&dev->read_idx); /* queue is not empty and it's interface 0 */ if ((intr_idx != read_idx) && (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0)) { /* + 1 for serial number */ offset = aux_idx * (dev->report_size + 1); if (!memcmp (dev->read_queue + offset, urb->transfer_buffer, dev->report_size)) { /* equal values on interface 0 will be ignored */ goto exit; } } /* aux_idx become next intr_idx */ aux_idx = (intr_idx == (MAX_INTERRUPT_BUFFER - 1)) ? 0 : (intr_idx + 1); if (read_idx == aux_idx) { /* queue full, dropping oldest input */ read_idx = (++read_idx == MAX_INTERRUPT_BUFFER) ? 0 : read_idx; atomic_set(&dev->read_idx, read_idx); atomic_set(&dev->overflow_flag, 1); } /* +1 for serial number */ offset = intr_idx * (dev->report_size + 1); memcpy(dev->read_queue + offset, urb->transfer_buffer, dev->report_size); *(dev->read_queue + offset + (dev->report_size)) = dev->serial_number++; atomic_set(&dev->intr_idx, aux_idx); /* tell the blocking read about the new data */ wake_up_interruptible(&dev->read_wait); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&dev->interface->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } /* * USB Callback handler for write-ops */ static void iowarrior_write_callback(struct urb *urb) { struct iowarrior *dev; int status = urb->status; dev = urb->context; /* sync/async unlink faults aren't errors */ if (status && !(status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN)) { dev_dbg(&dev->interface->dev, "nonzero write bulk status received: %d\n", status); } /* free up our allocated buffer */ usb_free_coherent(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); /* tell a waiting writer the interrupt-out-pipe is available again */ atomic_dec(&dev->write_busy); wake_up_interruptible(&dev->write_wait); } /* * iowarrior_delete */ static inline void iowarrior_delete(struct iowarrior *dev) { dev_dbg(&dev->interface->dev, "minor %d\n", dev->minor); kfree(dev->int_in_buffer); usb_free_urb(dev->int_in_urb); kfree(dev->read_queue); usb_put_intf(dev->interface); kfree(dev); } /*---------------------*/ /* fops implementation */ /*---------------------*/ static int read_index(struct iowarrior *dev) { int intr_idx, read_idx; read_idx = atomic_read(&dev->read_idx); intr_idx = atomic_read(&dev->intr_idx); return (read_idx == intr_idx ? -1 : read_idx); } /* * iowarrior_read */ static ssize_t iowarrior_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct iowarrior *dev; int read_idx; int offset; int retval; dev = file->private_data; if (file->f_flags & O_NONBLOCK) { retval = mutex_trylock(&dev->mutex); if (!retval) return -EAGAIN; } else { retval = mutex_lock_interruptible(&dev->mutex); if (retval) return -ERESTARTSYS; } /* verify that the device wasn't unplugged */ if (!dev->present) { retval = -ENODEV; goto exit; } dev_dbg(&dev->interface->dev, "minor %d, count = %zd\n", dev->minor, count); /* read count must be packet size (+ time stamp) */ if ((count != dev->report_size) && (count != (dev->report_size + 1))) { retval = -EINVAL; goto exit; } /* repeat until no buffer overrun in callback handler occur */ do { atomic_set(&dev->overflow_flag, 0); if ((read_idx = read_index(dev)) == -1) { /* queue empty */ if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto exit; } else { //next line will return when there is either new data, or the device is unplugged int r = wait_event_interruptible(dev->read_wait, (!dev->present || (read_idx = read_index (dev)) != -1)); if (r) { //we were interrupted by a signal retval = -ERESTART; goto exit; } if (!dev->present) { //The device was unplugged retval = -ENODEV; goto exit; } if (read_idx == -1) { // Can this happen ??? retval = 0; goto exit; } } } offset = read_idx * (dev->report_size + 1); if (copy_to_user(buffer, dev->read_queue + offset, count)) { retval = -EFAULT; goto exit; } } while (atomic_read(&dev->overflow_flag)); read_idx = ++read_idx == MAX_INTERRUPT_BUFFER ? 0 : read_idx; atomic_set(&dev->read_idx, read_idx); mutex_unlock(&dev->mutex); return count; exit: mutex_unlock(&dev->mutex); return retval; } /* * iowarrior_write */ static ssize_t iowarrior_write(struct file *file, const char __user *user_buffer, size_t count, loff_t *ppos) { struct iowarrior *dev; int retval = 0; char *buf = NULL; /* for IOW24 and IOW56 we need a buffer */ struct urb *int_out_urb = NULL; dev = file->private_data; mutex_lock(&dev->mutex); /* verify that the device wasn't unplugged */ if (!dev->present) { retval = -ENODEV; goto exit; } dev_dbg(&dev->interface->dev, "minor %d, count = %zd\n", dev->minor, count); /* if count is 0 we're already done */ if (count == 0) { retval = 0; goto exit; } /* We only accept full reports */ if (count != dev->report_size) { retval = -EINVAL; goto exit; } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: /* IOW24 and IOW40 use a synchronous call */ buf = memdup_user(user_buffer, count); if (IS_ERR(buf)) { retval = PTR_ERR(buf); goto exit; } retval = usb_set_report(dev->interface, 2, 0, buf, count); kfree(buf); goto exit; case USB_DEVICE_ID_CODEMERCS_IOW56: case USB_DEVICE_ID_CODEMERCS_IOW56AM: case USB_DEVICE_ID_CODEMERCS_IOW28: case USB_DEVICE_ID_CODEMERCS_IOW28L: case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto exit; } else { retval = wait_event_interruptible(dev->write_wait, (!dev->present || (atomic_read (&dev-> write_busy) < MAX_WRITES_IN_FLIGHT))); if (retval) { /* we were interrupted by a signal */ retval = -ERESTART; goto exit; } if (!dev->present) { /* The device was unplugged */ retval = -ENODEV; goto exit; } if (!dev->opened) { /* We were closed while waiting for an URB */ retval = -ENODEV; goto exit; } } } atomic_inc(&dev->write_busy); int_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!int_out_urb) { retval = -ENOMEM; goto error_no_urb; } buf = usb_alloc_coherent(dev->udev, dev->report_size, GFP_KERNEL, &int_out_urb->transfer_dma); if (!buf) { retval = -ENOMEM; dev_dbg(&dev->interface->dev, "Unable to allocate buffer\n"); goto error_no_buffer; } usb_fill_int_urb(int_out_urb, dev->udev, usb_sndintpipe(dev->udev, dev->int_out_endpoint->bEndpointAddress), buf, dev->report_size, iowarrior_write_callback, dev, dev->int_out_endpoint->bInterval); int_out_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; if (copy_from_user(buf, user_buffer, count)) { retval = -EFAULT; goto error; } usb_anchor_urb(int_out_urb, &dev->submitted); retval = usb_submit_urb(int_out_urb, GFP_KERNEL); if (retval) { dev_dbg(&dev->interface->dev, "submit error %d for urb nr.%d\n", retval, atomic_read(&dev->write_busy)); usb_unanchor_urb(int_out_urb); goto error; } /* submit was ok */ retval = count; usb_free_urb(int_out_urb); goto exit; default: /* what do we have here ? An unsupported Product-ID ? */ dev_err(&dev->interface->dev, "%s - not supported for product=0x%x\n", __func__, dev->product_id); retval = -EFAULT; goto exit; } error: usb_free_coherent(dev->udev, dev->report_size, buf, int_out_urb->transfer_dma); error_no_buffer: usb_free_urb(int_out_urb); error_no_urb: atomic_dec(&dev->write_busy); wake_up_interruptible(&dev->write_wait); exit: mutex_unlock(&dev->mutex); return retval; } /* * iowarrior_ioctl */ static long iowarrior_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct iowarrior *dev = NULL; __u8 *buffer; __u8 __user *user_buffer; int retval; int io_res; /* checks for bytes read/written and copy_to/from_user results */ dev = file->private_data; if (!dev) return -ENODEV; buffer = kzalloc(dev->report_size, GFP_KERNEL); if (!buffer) return -ENOMEM; mutex_lock(&dev->mutex); /* verify that the device wasn't unplugged */ if (!dev->present) { retval = -ENODEV; goto error_out; } dev_dbg(&dev->interface->dev, "minor %d, cmd 0x%.4x, arg %ld\n", dev->minor, cmd, arg); retval = 0; switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { user_buffer = (__u8 __user *)arg; io_res = copy_from_user(buffer, user_buffer, dev->report_size); if (io_res) { retval = -EFAULT; } else { io_res = usb_set_report(dev->interface, 2, 0, buffer, dev->report_size); if (io_res < 0) retval = io_res; } } else { retval = -EINVAL; dev_err(&dev->interface->dev, "ioctl 'IOW_WRITE' is not supported for product=0x%x.\n", dev->product_id); } break; case IOW_READ: user_buffer = (__u8 __user *)arg; io_res = usb_get_report(dev->udev, dev->interface->cur_altsetting, 1, 0, buffer, dev->report_size); if (io_res < 0) retval = io_res; else { io_res = copy_to_user(user_buffer, buffer, dev->report_size); if (io_res) retval = -EFAULT; } break; case IOW_GETINFO: { /* Report available information for the device */ struct iowarrior_info info; /* needed for power consumption */ struct usb_config_descriptor *cfg_descriptor = &dev->udev->actconfig->desc; memset(&info, 0, sizeof(info)); /* directly from the descriptor */ info.vendor = le16_to_cpu(dev->udev->descriptor.idVendor); info.product = dev->product_id; info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice); /* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */ info.speed = dev->udev->speed; info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber; info.report_size = dev->report_size; /* serial number string has been read earlier 8 chars or empty string */ memcpy(info.serial, dev->chip_serial, sizeof(dev->chip_serial)); if (cfg_descriptor == NULL) { info.power = -1; /* no information available */ } else { /* the MaxPower is stored in units of 2mA to make it fit into a byte-value */ info.power = cfg_descriptor->bMaxPower * 2; } io_res = copy_to_user((struct iowarrior_info __user *)arg, &info, sizeof(struct iowarrior_info)); if (io_res) retval = -EFAULT; break; } default: /* return that we did not understand this ioctl call */ retval = -ENOTTY; break; } error_out: /* unlock the device */ mutex_unlock(&dev->mutex); kfree(buffer); return retval; } /* * iowarrior_open */ static int iowarrior_open(struct inode *inode, struct file *file) { struct iowarrior *dev = NULL; struct usb_interface *interface; int subminor; int retval = 0; subminor = iminor(inode); interface = usb_find_interface(&iowarrior_driver, subminor); if (!interface) { pr_err("%s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } dev = usb_get_intfdata(interface); if (!dev) return -ENODEV; mutex_lock(&dev->mutex); /* Only one process can open each device, no sharing. */ if (dev->opened) { retval = -EBUSY; goto out; } /* setup interrupt handler for receiving values */ if ((retval = usb_submit_urb(dev->int_in_urb, GFP_KERNEL)) < 0) { dev_err(&interface->dev, "Error %d while submitting URB\n", retval); retval = -EFAULT; goto out; } /* increment our usage count for the driver */ ++dev->opened; /* save our object in the file's private structure */ file->private_data = dev; retval = 0; out: mutex_unlock(&dev->mutex); return retval; } /* * iowarrior_release */ static int iowarrior_release(struct inode *inode, struct file *file) { struct iowarrior *dev; int retval = 0; dev = file->private_data; if (!dev) return -ENODEV; dev_dbg(&dev->interface->dev, "minor %d\n", dev->minor); /* lock our device */ mutex_lock(&dev->mutex); if (dev->opened <= 0) { retval = -ENODEV; /* close called more than once */ mutex_unlock(&dev->mutex); } else { dev->opened = 0; /* we're closing now */ retval = 0; if (dev->present) { /* The device is still connected so we only shutdown pending read-/write-ops. */ usb_kill_urb(dev->int_in_urb); wake_up_interruptible(&dev->read_wait); wake_up_interruptible(&dev->write_wait); mutex_unlock(&dev->mutex); } else { /* The device was unplugged, cleanup resources */ mutex_unlock(&dev->mutex); iowarrior_delete(dev); } } return retval; } static __poll_t iowarrior_poll(struct file *file, poll_table * wait) { struct iowarrior *dev = file->private_data; __poll_t mask = 0; if (!dev->present) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); poll_wait(file, &dev->write_wait, wait); if (!dev->present) return EPOLLERR | EPOLLHUP; if (read_index(dev) != -1) mask |= EPOLLIN | EPOLLRDNORM; if (atomic_read(&dev->write_busy) < MAX_WRITES_IN_FLIGHT) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } /* * File operations needed when we register this driver. * This assumes that this driver NEEDS file operations, * of course, which means that the driver is expected * to have a node in the /dev directory. If the USB * device were for a network interface then the driver * would use "struct net_driver" instead, and a serial * device would use "struct tty_driver". */ static const struct file_operations iowarrior_fops = { .owner = THIS_MODULE, .write = iowarrior_write, .read = iowarrior_read, .unlocked_ioctl = iowarrior_ioctl, .open = iowarrior_open, .release = iowarrior_release, .poll = iowarrior_poll, .llseek = noop_llseek, }; static char *iowarrior_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } /* * usb class driver info in order to get a minor number from the usb core, * and to have the device registered with devfs and the driver core */ static struct usb_class_driver iowarrior_class = { .name = "iowarrior%d", .devnode = iowarrior_devnode, .fops = &iowarrior_fops, .minor_base = IOWARRIOR_MINOR_BASE, }; /*---------------------------------*/ /* probe and disconnect functions */ /*---------------------------------*/ /* * iowarrior_probe * * Called by the usb core when a new device is connected that it thinks * this driver might be interested in. */ static int iowarrior_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct iowarrior *dev = NULL; struct usb_host_interface *iface_desc; int retval = -ENOMEM; int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(struct iowarrior), GFP_KERNEL); if (!dev) return retval; mutex_init(&dev->mutex); atomic_set(&dev->intr_idx, 0); atomic_set(&dev->read_idx, 0); atomic_set(&dev->overflow_flag, 0); init_waitqueue_head(&dev->read_wait); atomic_set(&dev->write_busy, 0); init_waitqueue_head(&dev->write_wait); dev->udev = udev; dev->interface = usb_get_intf(interface); iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); init_usb_anchor(&dev->submitted); res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint); if (res) { dev_err(&interface->dev, "no interrupt-in endpoint found\n"); retval = res; goto error; } if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { dev_err(&interface->dev, "no interrupt-out endpoint found\n"); retval = res; goto error; } } /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); /* * Some devices need the report size to be different than the * endpoint size. */ if (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) { switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW56: case USB_DEVICE_ID_CODEMERCS_IOW56AM: dev->report_size = 7; break; case USB_DEVICE_ID_CODEMERCS_IOW28: case USB_DEVICE_ID_CODEMERCS_IOW28L: dev->report_size = 4; break; case USB_DEVICE_ID_CODEMERCS_IOW100: dev->report_size = 12; break; } } /* create the urb and buffer for reading */ dev->int_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->int_in_urb) goto error; dev->int_in_buffer = kmalloc(dev->report_size, GFP_KERNEL); if (!dev->int_in_buffer) goto error; usb_fill_int_urb(dev->int_in_urb, dev->udev, usb_rcvintpipe(dev->udev, dev->int_in_endpoint->bEndpointAddress), dev->int_in_buffer, dev->report_size, iowarrior_callback, dev, dev->int_in_endpoint->bInterval); /* create an internal buffer for interrupt data from the device */ dev->read_queue = kmalloc_array(dev->report_size + 1, MAX_INTERRUPT_BUFFER, GFP_KERNEL); if (!dev->read_queue) goto error; /* Get the serial-number of the chip */ memset(dev->chip_serial, 0x00, sizeof(dev->chip_serial)); usb_string(udev, udev->descriptor.iSerialNumber, dev->chip_serial, sizeof(dev->chip_serial)); if (strlen(dev->chip_serial) != 8) memset(dev->chip_serial, 0x00, sizeof(dev->chip_serial)); /* Set the idle timeout to 0, if this is interface 0 */ if (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) { usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x0A, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } /* allow device read and ioctl */ dev->present = 1; /* we can register the device now, as it is ready */ usb_set_intfdata(interface, dev); retval = usb_register_dev(interface, &iowarrior_class); if (retval) { /* something prevented us from registering this driver */ dev_err(&interface->dev, "Not able to get a minor for this device.\n"); goto error; } dev->minor = interface->minor; /* let the user know what node this device is now attached to */ dev_info(&interface->dev, "IOWarrior product=0x%x, serial=%s interface=%d " "now attached to iowarrior%d\n", dev->product_id, dev->chip_serial, iface_desc->desc.bInterfaceNumber, dev->minor - IOWARRIOR_MINOR_BASE); return retval; error: iowarrior_delete(dev); return retval; } /* * iowarrior_disconnect * * Called by the usb core when the device is removed from the system. */ static void iowarrior_disconnect(struct usb_interface *interface) { struct iowarrior *dev = usb_get_intfdata(interface); usb_deregister_dev(interface, &iowarrior_class); mutex_lock(&dev->mutex); /* prevent device read, write and ioctl */ dev->present = 0; if (dev->opened) { /* There is a process that holds a filedescriptor to the device , so we only shutdown read-/write-ops going on. Deleting the device is postponed until close() was called. */ usb_kill_urb(dev->int_in_urb); usb_kill_anchored_urbs(&dev->submitted); wake_up_interruptible(&dev->read_wait); wake_up_interruptible(&dev->write_wait); mutex_unlock(&dev->mutex); } else { /* no process is using the device, cleanup now */ mutex_unlock(&dev->mutex); iowarrior_delete(dev); } } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver iowarrior_driver = { .name = "iowarrior", .probe = iowarrior_probe, .disconnect = iowarrior_disconnect, .id_table = iowarrior_ids, }; module_usb_driver(iowarrior_driver);
23 24 23 4 8 9 3 2 5 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains basic common functions used in AppArmor * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #include <linux/ctype.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/vmalloc.h> #include "include/audit.h" #include "include/apparmor.h" #include "include/lib.h" #include "include/perms.h" #include "include/policy.h" struct aa_perms nullperms; struct aa_perms allperms = { .allow = ALL_PERMS_MASK, .quiet = ALL_PERMS_MASK, .hide = ALL_PERMS_MASK }; struct val_table_ent { const char *str; int value; }; static struct val_table_ent debug_values_table[] = { { "N", DEBUG_NONE }, { "none", DEBUG_NONE }, { "n", DEBUG_NONE }, { "0", DEBUG_NONE }, { "all", DEBUG_ALL }, { "Y", DEBUG_ALL }, { "y", DEBUG_ALL }, { "1", DEBUG_ALL }, { "abs_root", DEBUG_LABEL_ABS_ROOT }, { "label", DEBUG_LABEL }, { "domain", DEBUG_DOMAIN }, { "policy", DEBUG_POLICY }, { "interface", DEBUG_INTERFACE }, { NULL, 0 } }; static struct val_table_ent *val_table_find_ent(struct val_table_ent *table, const char *name, size_t len) { struct val_table_ent *entry; for (entry = table; entry->str != NULL; entry++) { if (strncmp(entry->str, name, len) == 0 && strlen(entry->str) == len) return entry; } return NULL; } int aa_parse_debug_params(const char *str) { struct val_table_ent *ent; const char *next; int val = 0; do { size_t n = strcspn(str, "\r\n,"); next = str + n; ent = val_table_find_ent(debug_values_table, str, next - str); if (ent) val |= ent->value; else AA_DEBUG(DEBUG_INTERFACE, "unknown debug type '%.*s'", (int)(next - str), str); str = next + 1; } while (*next != 0); return val; } /** * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @size: size of the @str buffer * @table: NUL-terminated character buffer of permission characters (NOT NULL) * @mask: permission mask to convert */ static int val_mask_to_str(char *str, size_t size, const struct val_table_ent *table, u32 mask) { const struct val_table_ent *ent; int total = 0; for (ent = table; ent->str; ent++) { if (ent->value && (ent->value & mask) == ent->value) { int len = scnprintf(str, size, "%s%s", total ? "," : "", ent->str); size -= len; str += len; total += len; mask &= ~ent->value; } } return total; } int aa_print_debug_params(char *buffer) { if (!aa_g_debug) return sprintf(buffer, "N"); return val_mask_to_str(buffer, PAGE_SIZE, debug_values_table, aa_g_debug); } bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp) { char **n; int i; if (t->size == newsize) return true; n = kcalloc(newsize, sizeof(*n), gfp); if (!n) return false; for (i = 0; i < min(t->size, newsize); i++) n[i] = t->table[i]; for (; i < t->size; i++) kfree_sensitive(t->table[i]); if (newsize > t->size) memset(&n[t->size], 0, (newsize-t->size)*sizeof(*n)); kfree_sensitive(t->table); t->table = n; t->size = newsize; return true; } /** * aa_free_str_table - free entries str table * @t: the string table to free (MAYBE NULL) */ void aa_free_str_table(struct aa_str_table *t) { int i; if (t) { if (!t->table) return; for (i = 0; i < t->size; i++) kfree_sensitive(t->table[i]); kfree_sensitive(t->table); t->table = NULL; t->size = 0; } } /** * skipn_spaces - Removes leading whitespace from @str. * @str: The string to be stripped. * @n: length of str to parse, will stop at \0 if encountered before n * * Returns a pointer to the first non-whitespace character in @str. * if all whitespace will return NULL */ const char *skipn_spaces(const char *str, size_t n) { for (; n && isspace(*str); --n) ++str; if (n) return (char *)str; return NULL; } const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, size_t *ns_len) { const char *end = fqname + n; const char *name = skipn_spaces(fqname, n); *ns_name = NULL; *ns_len = 0; if (!name) return NULL; if (name[0] == ':') { char *split = strnchr(&name[1], end - &name[1], ':'); *ns_name = skipn_spaces(&name[1], end - &name[1]); if (!*ns_name) return NULL; if (split) { *ns_len = split - *ns_name; if (*ns_len == 0) *ns_name = NULL; split++; if (end - split > 1 && strncmp(split, "//", 2) == 0) split += 2; name = skipn_spaces(split, end - split); } else { /* a ns name without a following profile is allowed */ name = NULL; *ns_len = end - *ns_name; } } if (name && *name == 0) name = NULL; return name; } /** * aa_info_message - log a none profile related status message * @str: message to log */ void aa_info_message(const char *str) { if (audit_enabled) { DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); ad.info = str; aa_audit_msg(AUDIT_APPARMOR_STATUS, &ad, NULL); } printk(KERN_INFO "AppArmor: %s\n", str); } __counted char *aa_str_alloc(int size, gfp_t gfp) { struct counted_str *str; str = kmalloc(struct_size(str, name, size), gfp); if (!str) return NULL; kref_init(&str->count); return str->name; } void aa_str_kref(struct kref *kref) { kfree(container_of(kref, struct counted_str, count)); } const char aa_file_perm_chrs[] = "xwracd km l "; const char *aa_file_perm_names[] = { "exec", "write", "read", "append", "create", "delete", "open", "rename", "setattr", "getattr", "setcred", "getcred", "chmod", "chown", "chgrp", "lock", "mmap", "mprot", "link", "snapshot", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "stack", "change_onexec", "change_profile", "change_hat", }; /** * aa_perm_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @str_size: size of the @str buffer * @chrs: NUL-terminated character buffer of permission characters * @mask: permission mask to convert */ void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs, u32 mask) { unsigned int i, perm = 1; size_t num_chrs = strlen(chrs); for (i = 0; i < num_chrs; perm <<= 1, i++) { if (mask & perm) { /* Ensure that one byte is left for NUL-termination */ if (WARN_ON_ONCE(str_size <= 1)) break; *str++ = chrs[i]; str_size--; } } *str = '\0'; } void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, u32 mask) { const char *fmt = "%s"; unsigned int i, perm = 1; bool prev = false; for (i = 0; i < 32; perm <<= 1, i++) { if (mask & perm) { audit_log_format(ab, fmt, names[i]); if (!prev) { prev = true; fmt = " %s"; } } } } void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, u32 chrsmask, const char * const *names, u32 namesmask) { char str[33]; audit_log_format(ab, "\""); if ((mask & chrsmask) && chrs) { aa_perm_mask_to_str(str, sizeof(str), chrs, mask & chrsmask); mask &= ~chrsmask; audit_log_format(ab, "%s", str); if (mask & namesmask) audit_log_format(ab, " "); } if ((mask & namesmask) && names) aa_audit_perm_names(ab, names, mask & namesmask); audit_log_format(ab, "\""); } /** * aa_apply_modes_to_perms - apply namespace and profile flags to perms * @profile: that perms where computed from * @perms: perms to apply mode modifiers to * * TODO: split into profile and ns based flags for when accumulating perms */ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms) { switch (AUDIT_MODE(profile)) { case AUDIT_ALL: perms->audit = ALL_PERMS_MASK; fallthrough; case AUDIT_NOQUIET: perms->quiet = 0; break; case AUDIT_QUIET: perms->audit = 0; fallthrough; case AUDIT_QUIET_DENIED: perms->quiet = ALL_PERMS_MASK; break; } if (KILL_MODE(profile)) perms->kill = ALL_PERMS_MASK; else if (COMPLAIN_MODE(profile)) perms->complain = ALL_PERMS_MASK; else if (USER_MODE(profile)) perms->prompt = ALL_PERMS_MASK; } void aa_profile_match_label(struct aa_profile *profile, struct aa_ruleset *rules, struct aa_label *label, int type, u32 request, struct aa_perms *perms) { /* TODO: doesn't yet handle extended types */ aa_state_t state; state = aa_dfa_next(rules->policy->dfa, rules->policy->start[AA_CLASS_LABEL], type); aa_label_match(profile, rules, label, state, false, request, perms); } /** * aa_check_perms - do audit mode selection based on perms set * @profile: profile being checked * @perms: perms computed for the request * @request: requested perms * @ad: initialized audit structure (MAY BE NULL if not auditing) * @cb: callback fn for type specific fields (MAY BE NULL) * * Returns: 0 if permission else error code * * Note: profile audit modes need to be set before calling by setting the * perm masks appropriately. * * If not auditing then complain mode is not enabled and the * error code will indicate whether there was an explicit deny * with a positive value. */ int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, u32 request, struct apparmor_audit_data *ad, void (*cb)(struct audit_buffer *, void *)) { int type, error; u32 denied = request & (~perms->allow | perms->deny); if (likely(!denied)) { /* mask off perms that are not being force audited */ request &= perms->audit; if (!request || !ad) return 0; type = AUDIT_APPARMOR_AUDIT; error = 0; } else { error = -EACCES; if (denied & perms->kill) type = AUDIT_APPARMOR_KILL; else if (denied == (denied & perms->complain)) type = AUDIT_APPARMOR_ALLOWED; else type = AUDIT_APPARMOR_DENIED; if (denied == (denied & perms->hide)) error = -ENOENT; denied &= ~perms->quiet; if (!ad || !denied) return error; } if (ad) { ad->subj_label = &profile->label; ad->request = request; ad->denied = denied; ad->error = error; aa_audit_msg(type, ad, cb); } if (type == AUDIT_APPARMOR_ALLOWED) error = 0; return error; } /** * aa_policy_init - initialize a policy structure * @policy: policy to initialize (NOT NULL) * @prefix: prefix name if any is required. (MAYBE NULL) * @name: name of the policy, init will make a copy of it (NOT NULL) * @gfp: allocation mode * * Note: this fn creates a copy of strings passed in * * Returns: true if policy init successful */ bool aa_policy_init(struct aa_policy *policy, const char *prefix, const char *name, gfp_t gfp) { char *hname; /* freed by policy_free */ if (prefix) { hname = aa_str_alloc(strlen(prefix) + strlen(name) + 3, gfp); if (hname) sprintf(hname, "%s//%s", prefix, name); } else { hname = aa_str_alloc(strlen(name) + 1, gfp); if (hname) strcpy(hname, name); } if (!hname) return false; policy->hname = hname; /* base.name is a substring of fqname */ policy->name = basename(policy->hname); INIT_LIST_HEAD(&policy->list); INIT_LIST_HEAD(&policy->profiles); return true; } /** * aa_policy_destroy - free the elements referenced by @policy * @policy: policy that is to have its elements freed (NOT NULL) */ void aa_policy_destroy(struct aa_policy *policy) { AA_BUG(on_list_rcu(&policy->profiles)); AA_BUG(on_list_rcu(&policy->list)); /* don't free name as its a subset of hname */ aa_put_str(policy->hname); }
2 1 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 // SPDX-License-Identifier: GPL-2.0-only /* * HID driver for some ITE "special" devices * Copyright (c) 2017 Hans de Goede <hdegoede@redhat.com> */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" #define QUIRK_TOUCHPAD_ON_OFF_REPORT BIT(0) static const __u8 *ite_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); if (quirks & QUIRK_TOUCHPAD_ON_OFF_REPORT) { /* For Acer Aspire Switch 10 SW5-012 keyboard-dock */ if (*rsize == 188 && rdesc[162] == 0x81 && rdesc[163] == 0x02) { hid_info(hdev, "Fixing up Acer Sw5-012 ITE keyboard report descriptor\n"); rdesc[163] = HID_MAIN_ITEM_RELATIVE; } /* For Acer One S1002/S1003 keyboard-dock */ if (*rsize == 188 && rdesc[185] == 0x81 && rdesc[186] == 0x02) { hid_info(hdev, "Fixing up Acer S1002/S1003 ITE keyboard report descriptor\n"); rdesc[186] = HID_MAIN_ITEM_RELATIVE; } /* For Acer Aspire Switch 10E (SW3-016) keyboard-dock */ if (*rsize == 210 && rdesc[184] == 0x81 && rdesc[185] == 0x02) { hid_info(hdev, "Fixing up Acer Aspire Switch 10E (SW3-016) ITE keyboard report descriptor\n"); rdesc[185] = HID_MAIN_ITEM_RELATIVE; } } return rdesc; } static int ite_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); if ((quirks & QUIRK_TOUCHPAD_ON_OFF_REPORT) && (usage->hid & HID_USAGE_PAGE) == 0x00880000) { if (usage->hid == 0x00880078) { /* Touchpad on, userspace expects F22 for this */ hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_F22); return 1; } if (usage->hid == 0x00880079) { /* Touchpad off, userspace expects F23 for this */ hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_F23); return 1; } return -1; } return 0; } static int ite_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct input_dev *input; if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput) return 0; input = field->hidinput->input; /* * The ITE8595 always reports 0 as value for the rfkill button. Luckily * it is the only button in its report, and it sends a report on * release only, so receiving a report means the button was pressed. */ if (usage->hid == HID_GD_RFKILL_BTN) { input_event(input, EV_KEY, KEY_RFKILL, 1); input_sync(input); input_event(input, EV_KEY, KEY_RFKILL, 0); input_sync(input); return 1; } return 0; } static int ite_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; hid_set_drvdata(hdev, (void *)id->driver_data); ret = hid_open_report(hdev); if (ret) return ret; return hid_hw_start(hdev, HID_CONNECT_DEFAULT); } static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); static struct hid_driver ite_driver = { .name = "itetech", .id_table = ite_devices, .probe = ite_probe, .report_fixup = ite_report_fixup, .input_mapping = ite_input_mapping, .event = ite_event, }; module_hid_driver(ite_driver); MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); MODULE_DESCRIPTION("HID driver for some ITE \"special\" devices"); MODULE_LICENSE("GPL");
3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 // SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2005 Mike Isely <isely@pobox.com> */ #include "pvrusb2-std.h" #include "pvrusb2-debug.h" #include <asm/string.h> #include <linux/slab.h> struct std_name { const char *name; v4l2_std_id id; }; #define CSTD_PAL \ (V4L2_STD_PAL_B| \ V4L2_STD_PAL_B1| \ V4L2_STD_PAL_G| \ V4L2_STD_PAL_H| \ V4L2_STD_PAL_I| \ V4L2_STD_PAL_D| \ V4L2_STD_PAL_D1| \ V4L2_STD_PAL_K| \ V4L2_STD_PAL_M| \ V4L2_STD_PAL_N| \ V4L2_STD_PAL_Nc| \ V4L2_STD_PAL_60) #define CSTD_NTSC \ (V4L2_STD_NTSC_M| \ V4L2_STD_NTSC_M_JP| \ V4L2_STD_NTSC_M_KR| \ V4L2_STD_NTSC_443) #define CSTD_ATSC \ (V4L2_STD_ATSC_8_VSB| \ V4L2_STD_ATSC_16_VSB) #define CSTD_SECAM \ (V4L2_STD_SECAM_B| \ V4L2_STD_SECAM_D| \ V4L2_STD_SECAM_G| \ V4L2_STD_SECAM_H| \ V4L2_STD_SECAM_K| \ V4L2_STD_SECAM_K1| \ V4L2_STD_SECAM_L| \ V4L2_STD_SECAM_LC) #define TSTD_B (V4L2_STD_PAL_B|V4L2_STD_SECAM_B) #define TSTD_B1 (V4L2_STD_PAL_B1) #define TSTD_D (V4L2_STD_PAL_D|V4L2_STD_SECAM_D) #define TSTD_D1 (V4L2_STD_PAL_D1) #define TSTD_G (V4L2_STD_PAL_G|V4L2_STD_SECAM_G) #define TSTD_H (V4L2_STD_PAL_H|V4L2_STD_SECAM_H) #define TSTD_I (V4L2_STD_PAL_I) #define TSTD_K (V4L2_STD_PAL_K|V4L2_STD_SECAM_K) #define TSTD_K1 (V4L2_STD_SECAM_K1) #define TSTD_L (V4L2_STD_SECAM_L) #define TSTD_M (V4L2_STD_PAL_M|V4L2_STD_NTSC_M) #define TSTD_N (V4L2_STD_PAL_N) #define TSTD_Nc (V4L2_STD_PAL_Nc) #define TSTD_60 (V4L2_STD_PAL_60) #define CSTD_ALL (CSTD_PAL|CSTD_NTSC|CSTD_ATSC|CSTD_SECAM) /* Mapping of standard bits to color system */ static const struct std_name std_groups[] = { {"PAL",CSTD_PAL}, {"NTSC",CSTD_NTSC}, {"SECAM",CSTD_SECAM}, {"ATSC",CSTD_ATSC}, }; /* Mapping of standard bits to modulation system */ static const struct std_name std_items[] = { {"B",TSTD_B}, {"B1",TSTD_B1}, {"D",TSTD_D}, {"D1",TSTD_D1}, {"G",TSTD_G}, {"H",TSTD_H}, {"I",TSTD_I}, {"K",TSTD_K}, {"K1",TSTD_K1}, {"L",TSTD_L}, {"LC",V4L2_STD_SECAM_LC}, {"M",TSTD_M}, {"Mj",V4L2_STD_NTSC_M_JP}, {"443",V4L2_STD_NTSC_443}, {"Mk",V4L2_STD_NTSC_M_KR}, {"N",TSTD_N}, {"Nc",TSTD_Nc}, {"60",TSTD_60}, {"8VSB",V4L2_STD_ATSC_8_VSB}, {"16VSB",V4L2_STD_ATSC_16_VSB}, }; // Search an array of std_name structures and return a pointer to the // element with the matching name. static const struct std_name *find_std_name(const struct std_name *arrPtr, unsigned int arrSize, const char *bufPtr, unsigned int bufSize) { unsigned int idx; const struct std_name *p; for (idx = 0; idx < arrSize; idx++) { p = arrPtr + idx; if (strlen(p->name) != bufSize) continue; if (!memcmp(bufPtr,p->name,bufSize)) return p; } return NULL; } int pvr2_std_str_to_id(v4l2_std_id *idPtr,const char *bufPtr, unsigned int bufSize) { v4l2_std_id id = 0; v4l2_std_id cmsk = 0; v4l2_std_id t; int mMode = 0; unsigned int cnt; char ch; const struct std_name *sp; while (bufSize) { if (!mMode) { cnt = 0; while ((cnt < bufSize) && (bufPtr[cnt] != '-')) cnt++; if (cnt >= bufSize) return 0; // No more characters sp = find_std_name(std_groups, ARRAY_SIZE(std_groups), bufPtr,cnt); if (!sp) return 0; // Illegal color system name cnt++; bufPtr += cnt; bufSize -= cnt; mMode = !0; cmsk = sp->id; continue; } cnt = 0; while (cnt < bufSize) { ch = bufPtr[cnt]; if (ch == ';') { mMode = 0; break; } if (ch == '/') break; cnt++; } sp = find_std_name(std_items, ARRAY_SIZE(std_items), bufPtr,cnt); if (!sp) return 0; // Illegal modulation system ID t = sp->id & cmsk; if (!t) return 0; // Specific color + modulation system illegal id |= t; if (cnt < bufSize) cnt++; bufPtr += cnt; bufSize -= cnt; } if (idPtr) *idPtr = id; return !0; } unsigned int pvr2_std_id_to_str(char *bufPtr, unsigned int bufSize, v4l2_std_id id) { unsigned int idx1,idx2; const struct std_name *ip,*gp; int gfl,cfl; unsigned int c1,c2; cfl = 0; c1 = 0; for (idx1 = 0; idx1 < ARRAY_SIZE(std_groups); idx1++) { gp = std_groups + idx1; gfl = 0; for (idx2 = 0; idx2 < ARRAY_SIZE(std_items); idx2++) { ip = std_items + idx2; if (!(gp->id & ip->id & id)) continue; if (!gfl) { if (cfl) { c2 = scnprintf(bufPtr,bufSize,";"); c1 += c2; bufSize -= c2; bufPtr += c2; } cfl = !0; c2 = scnprintf(bufPtr,bufSize, "%s-",gp->name); gfl = !0; } else { c2 = scnprintf(bufPtr,bufSize,"/"); } c1 += c2; bufSize -= c2; bufPtr += c2; c2 = scnprintf(bufPtr,bufSize, ip->name); c1 += c2; bufSize -= c2; bufPtr += c2; } } return c1; } v4l2_std_id pvr2_std_get_usable(void) { return CSTD_ALL; }
4 4 4 1 1 1 4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 // SPDX-License-Identifier: GPL-2.0-or-later /* * USB HID driver for Glorious PC Gaming Race * Glorious Model O, O- and D mice. * * Copyright (c) 2020 Samuel Čavoj <sammko@sammserver.com> */ /* */ #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" MODULE_AUTHOR("Samuel Čavoj <sammko@sammserver.com>"); MODULE_DESCRIPTION("HID driver for Glorious PC Gaming Race mice"); /* * Glorious Model O and O- specify the const flag in the consumer input * report descriptor, which leads to inputs being ignored. Fix this * by patching the descriptor. * * Glorious Model I incorrectly specifes the Usage Minimum for its * keyboard HID report, causing keycodes to be misinterpreted. * Fix this by setting Usage Minimum to 0 in that report. */ static const __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize == 213 && rdesc[84] == 129 && rdesc[112] == 129 && rdesc[140] == 129 && rdesc[85] == 3 && rdesc[113] == 3 && rdesc[141] == 3) { hid_info(hdev, "patching Glorious Model O consumer control report descriptor\n"); rdesc[85] = rdesc[113] = rdesc[141] = \ HID_MAIN_ITEM_VARIABLE | HID_MAIN_ITEM_RELATIVE; } if (*rsize == 156 && rdesc[41] == 1) { hid_info(hdev, "patching Glorious Model I keyboard report descriptor\n"); rdesc[41] = 0; } return rdesc; } static void glorious_update_name(struct hid_device *hdev) { const char *model = "Device"; switch (hdev->product) { case USB_DEVICE_ID_GLORIOUS_MODEL_O: model = "Model O"; break; case USB_DEVICE_ID_GLORIOUS_MODEL_D: model = "Model D"; break; case USB_DEVICE_ID_GLORIOUS_MODEL_I: model = "Model I"; break; } snprintf(hdev->name, sizeof(hdev->name), "%s %s", "Glorious", model); } static int glorious_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; hdev->quirks |= HID_QUIRK_INPUT_PER_APP; ret = hid_parse(hdev); if (ret) return ret; glorious_update_name(hdev); return hid_hw_start(hdev, HID_CONNECT_DEFAULT); } static const struct hid_device_id glorious_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH, USB_DEVICE_ID_GLORIOUS_MODEL_O) }, { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH, USB_DEVICE_ID_GLORIOUS_MODEL_D) }, { HID_USB_DEVICE(USB_VENDOR_ID_LAVIEW, USB_DEVICE_ID_GLORIOUS_MODEL_I) }, { } }; MODULE_DEVICE_TABLE(hid, glorious_devices); static struct hid_driver glorious_driver = { .name = "glorious", .id_table = glorious_devices, .probe = glorious_probe, .report_fixup = glorious_report_fixup }; module_hid_driver(glorious_driver); MODULE_LICENSE("GPL");
2 2 7 5 4 1 1 1 6 2 3 1 2 2 2 1 1 3 2 1 1 2 2 1 1 3 1 1 1 1 1 1 1 7 1 1 6 2 5 1 6 6 1 1 4 4 1 12 12 1 1 4 5 3 5 3 3 5 8 8 7 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 // SPDX-License-Identifier: GPL-2.0+ /* * Generic USB driver for report based interrupt in/out devices * like LD Didactic's USB devices. LD Didactic's USB devices are * HID devices which do not use HID report definitons (they use * raw interrupt in and our reports only for communication). * * This driver uses a ring buffer for time critical reading of * interrupt in reports and provides read and write methods for * raw interrupt reports (similar to the Windows HID driver). * Devices based on the book USB COMPLETE by Jan Axelson may need * such a compatibility to the Windows HID driver. * * Copyright (C) 2005 Michael Hund <mhund@ld-didactic.de> * * Derived from Lego USB Tower driver * Copyright (C) 2003 David Glance <advidgsf@sourceforge.net> * 2001-2004 Juergen Stuber <starblue@users.sourceforge.net> */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/input.h> #include <linux/usb.h> #include <linux/poll.h> /* Define these values to match your devices */ #define USB_VENDOR_ID_LD 0x0f11 /* USB Vendor ID of LD Didactic GmbH */ #define USB_DEVICE_ID_LD_CASSY 0x1000 /* USB Product ID of CASSY-S modules with 8 bytes endpoint size */ #define USB_DEVICE_ID_LD_CASSY2 0x1001 /* USB Product ID of CASSY-S modules with 64 bytes endpoint size */ #define USB_DEVICE_ID_LD_POCKETCASSY 0x1010 /* USB Product ID of Pocket-CASSY */ #define USB_DEVICE_ID_LD_POCKETCASSY2 0x1011 /* USB Product ID of Pocket-CASSY 2 (reserved) */ #define USB_DEVICE_ID_LD_MOBILECASSY 0x1020 /* USB Product ID of Mobile-CASSY */ #define USB_DEVICE_ID_LD_MOBILECASSY2 0x1021 /* USB Product ID of Mobile-CASSY 2 (reserved) */ #define USB_DEVICE_ID_LD_MICROCASSYVOLTAGE 0x1031 /* USB Product ID of Micro-CASSY Voltage */ #define USB_DEVICE_ID_LD_MICROCASSYCURRENT 0x1032 /* USB Product ID of Micro-CASSY Current */ #define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033 /* USB Product ID of Micro-CASSY Time (reserved) */ #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035 /* USB Product ID of Micro-CASSY Temperature */ #define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038 /* USB Product ID of Micro-CASSY pH */ #define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040 /* USB Product ID of Power Analyser CASSY */ #define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042 /* USB Product ID of Converter Controller CASSY */ #define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043 /* USB Product ID of Machine Test CASSY */ #define USB_DEVICE_ID_LD_JWM 0x1080 /* USB Product ID of Joule and Wattmeter */ #define USB_DEVICE_ID_LD_DMMP 0x1081 /* USB Product ID of Digital Multimeter P (reserved) */ #define USB_DEVICE_ID_LD_UMIP 0x1090 /* USB Product ID of UMI P */ #define USB_DEVICE_ID_LD_UMIC 0x10A0 /* USB Product ID of UMI C */ #define USB_DEVICE_ID_LD_UMIB 0x10B0 /* USB Product ID of UMI B */ #define USB_DEVICE_ID_LD_XRAY 0x1100 /* USB Product ID of X-Ray Apparatus 55481 */ #define USB_DEVICE_ID_LD_XRAY2 0x1101 /* USB Product ID of X-Ray Apparatus 554800 */ #define USB_DEVICE_ID_LD_XRAYCT 0x1110 /* USB Product ID of X-Ray Apparatus CT 554821*/ #define USB_DEVICE_ID_LD_VIDEOCOM 0x1200 /* USB Product ID of VideoCom */ #define USB_DEVICE_ID_LD_MOTOR 0x1210 /* USB Product ID of Motor (reserved) */ #define USB_DEVICE_ID_LD_COM3LAB 0x2000 /* USB Product ID of COM3LAB */ #define USB_DEVICE_ID_LD_TELEPORT 0x2010 /* USB Product ID of Terminal Adapter */ #define USB_DEVICE_ID_LD_NETWORKANALYSER 0x2020 /* USB Product ID of Network Analyser */ #define USB_DEVICE_ID_LD_POWERCONTROL 0x2030 /* USB Product ID of Converter Control Unit */ #define USB_DEVICE_ID_LD_MACHINETEST 0x2040 /* USB Product ID of Machine Test System */ #define USB_DEVICE_ID_LD_MOSTANALYSER 0x2050 /* USB Product ID of MOST Protocol Analyser */ #define USB_DEVICE_ID_LD_MOSTANALYSER2 0x2051 /* USB Product ID of MOST Protocol Analyser 2 */ #define USB_DEVICE_ID_LD_ABSESP 0x2060 /* USB Product ID of ABS ESP */ #define USB_DEVICE_ID_LD_AUTODATABUS 0x2070 /* USB Product ID of Automotive Data Buses */ #define USB_DEVICE_ID_LD_MCT 0x2080 /* USB Product ID of Microcontroller technique */ #define USB_DEVICE_ID_LD_HYBRID 0x2090 /* USB Product ID of Automotive Hybrid */ #define USB_DEVICE_ID_LD_HEATCONTROL 0x20A0 /* USB Product ID of Heat control */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define USB_LD_MINOR_BASE 0 #else #define USB_LD_MINOR_BASE 176 #endif /* table of devices that work with this driver */ static const struct usb_device_id ld_usb_table[] = { { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYVOLTAGE) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYCURRENT) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIC) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIB) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_VIDEOCOM) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOTOR) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_COM3LAB) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_TELEPORT) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_NETWORKANALYSER) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERCONTROL) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETEST) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOSTANALYSER) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOSTANALYSER2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_ABSESP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_AUTODATABUS) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MCT) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HYBRID) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HEATCONTROL) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, ld_usb_table); MODULE_AUTHOR("Michael Hund <mhund@ld-didactic.de>"); MODULE_DESCRIPTION("LD USB Driver"); MODULE_LICENSE("GPL"); /* All interrupt in transfers are collected in a ring buffer to * avoid racing conditions and get better performance of the driver. */ static int ring_buffer_size = 128; module_param(ring_buffer_size, int, 0000); MODULE_PARM_DESC(ring_buffer_size, "Read ring buffer size in reports"); /* The write_buffer can contain more than one interrupt out transfer. */ static int write_buffer_size = 10; module_param(write_buffer_size, int, 0000); MODULE_PARM_DESC(write_buffer_size, "Write buffer size in reports"); /* As of kernel version 2.6.4 ehci-hcd uses an * "only one interrupt transfer per frame" shortcut * to simplify the scheduling of periodic transfers. * This conflicts with our standard 1ms intervals for in and out URBs. * We use default intervals of 2ms for in and 2ms for out transfers, * which should be fast enough. * Increase the interval to allow more devices that do interrupt transfers, * or set to 1 to use the standard interval from the endpoint descriptors. */ static int min_interrupt_in_interval = 2; module_param(min_interrupt_in_interval, int, 0000); MODULE_PARM_DESC(min_interrupt_in_interval, "Minimum interrupt in interval in ms"); static int min_interrupt_out_interval = 2; module_param(min_interrupt_out_interval, int, 0000); MODULE_PARM_DESC(min_interrupt_out_interval, "Minimum interrupt out interval in ms"); /* Structure to hold all of our device specific stuff */ struct ld_usb { struct mutex mutex; /* locks this structure */ struct usb_interface *intf; /* save off the usb interface pointer */ unsigned long disconnected:1; int open_count; /* number of times this port has been opened */ char *ring_buffer; unsigned int ring_head; unsigned int ring_tail; wait_queue_head_t read_wait; wait_queue_head_t write_wait; char *interrupt_in_buffer; struct usb_endpoint_descriptor *interrupt_in_endpoint; struct urb *interrupt_in_urb; int interrupt_in_interval; size_t interrupt_in_endpoint_size; int interrupt_in_running; int interrupt_in_done; int buffer_overflow; spinlock_t rbsl; char *interrupt_out_buffer; struct usb_endpoint_descriptor *interrupt_out_endpoint; struct urb *interrupt_out_urb; int interrupt_out_interval; size_t interrupt_out_endpoint_size; int interrupt_out_busy; }; static struct usb_driver ld_usb_driver; /* * ld_usb_abort_transfers * aborts transfers and frees associated data structures */ static void ld_usb_abort_transfers(struct ld_usb *dev) { /* shutdown transfer */ if (dev->interrupt_in_running) { dev->interrupt_in_running = 0; usb_kill_urb(dev->interrupt_in_urb); } if (dev->interrupt_out_busy) usb_kill_urb(dev->interrupt_out_urb); } /* * ld_usb_delete */ static void ld_usb_delete(struct ld_usb *dev) { /* free data structures */ usb_free_urb(dev->interrupt_in_urb); usb_free_urb(dev->interrupt_out_urb); kfree(dev->ring_buffer); kfree(dev->interrupt_in_buffer); kfree(dev->interrupt_out_buffer); kfree(dev); } /* * ld_usb_interrupt_in_callback */ static void ld_usb_interrupt_in_callback(struct urb *urb) { struct ld_usb *dev = urb->context; size_t *actual_buffer; unsigned int next_ring_head; int status = urb->status; unsigned long flags; int retval; if (status) { if (status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN) { goto exit; } else { dev_dbg(&dev->intf->dev, "%s: nonzero status received: %d\n", __func__, status); spin_lock_irqsave(&dev->rbsl, flags); goto resubmit; /* maybe we can recover */ } } spin_lock_irqsave(&dev->rbsl, flags); if (urb->actual_length > 0) { next_ring_head = (dev->ring_head+1) % ring_buffer_size; if (next_ring_head != dev->ring_tail) { actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_head * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); /* actual_buffer gets urb->actual_length + interrupt_in_buffer */ *actual_buffer = urb->actual_length; memcpy(actual_buffer+1, dev->interrupt_in_buffer, urb->actual_length); dev->ring_head = next_ring_head; dev_dbg(&dev->intf->dev, "%s: received %d bytes\n", __func__, urb->actual_length); } else { dev_warn(&dev->intf->dev, "Ring buffer overflow, %d bytes dropped\n", urb->actual_length); dev->buffer_overflow = 1; } } resubmit: /* resubmit if we're still running */ if (dev->interrupt_in_running && !dev->buffer_overflow) { retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->intf->dev, "usb_submit_urb failed (%d)\n", retval); dev->buffer_overflow = 1; } } spin_unlock_irqrestore(&dev->rbsl, flags); exit: dev->interrupt_in_done = 1; wake_up_interruptible(&dev->read_wait); } /* * ld_usb_interrupt_out_callback */ static void ld_usb_interrupt_out_callback(struct urb *urb) { struct ld_usb *dev = urb->context; int status = urb->status; /* sync/async unlink faults aren't errors */ if (status && !(status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN)) dev_dbg(&dev->intf->dev, "%s - nonzero write interrupt status received: %d\n", __func__, status); dev->interrupt_out_busy = 0; wake_up_interruptible(&dev->write_wait); } /* * ld_usb_open */ static int ld_usb_open(struct inode *inode, struct file *file) { struct ld_usb *dev; int subminor; int retval; struct usb_interface *interface; stream_open(inode, file); subminor = iminor(inode); interface = usb_find_interface(&ld_usb_driver, subminor); if (!interface) { printk(KERN_ERR "%s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } dev = usb_get_intfdata(interface); if (!dev) return -ENODEV; /* lock this device */ if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; /* allow opening only once */ if (dev->open_count) { retval = -EBUSY; goto unlock_exit; } dev->open_count = 1; /* initialize in direction */ dev->ring_head = 0; dev->ring_tail = 0; dev->buffer_overflow = 0; usb_fill_int_urb(dev->interrupt_in_urb, interface_to_usbdev(interface), usb_rcvintpipe(interface_to_usbdev(interface), dev->interrupt_in_endpoint->bEndpointAddress), dev->interrupt_in_buffer, dev->interrupt_in_endpoint_size, ld_usb_interrupt_in_callback, dev, dev->interrupt_in_interval); dev->interrupt_in_running = 1; dev->interrupt_in_done = 0; retval = usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL); if (retval) { dev_err(&interface->dev, "Couldn't submit interrupt_in_urb %d\n", retval); dev->interrupt_in_running = 0; dev->open_count = 0; goto unlock_exit; } /* save device in the file's private structure */ file->private_data = dev; unlock_exit: mutex_unlock(&dev->mutex); return retval; } /* * ld_usb_release */ static int ld_usb_release(struct inode *inode, struct file *file) { struct ld_usb *dev; int retval = 0; dev = file->private_data; if (dev == NULL) { retval = -ENODEV; goto exit; } mutex_lock(&dev->mutex); if (dev->open_count != 1) { retval = -ENODEV; goto unlock_exit; } if (dev->disconnected) { /* the device was unplugged before the file was released */ mutex_unlock(&dev->mutex); /* unlock here as ld_usb_delete frees dev */ ld_usb_delete(dev); goto exit; } /* wait until write transfer is finished */ if (dev->interrupt_out_busy) wait_event_interruptible_timeout(dev->write_wait, !dev->interrupt_out_busy, 2 * HZ); ld_usb_abort_transfers(dev); dev->open_count = 0; unlock_exit: mutex_unlock(&dev->mutex); exit: return retval; } /* * ld_usb_poll */ static __poll_t ld_usb_poll(struct file *file, poll_table *wait) { struct ld_usb *dev; __poll_t mask = 0; dev = file->private_data; if (dev->disconnected) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); poll_wait(file, &dev->write_wait, wait); if (dev->ring_head != dev->ring_tail) mask |= EPOLLIN | EPOLLRDNORM; if (!dev->interrupt_out_busy) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } /* * ld_usb_read */ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct ld_usb *dev; size_t *actual_buffer; size_t bytes_to_read; int retval = 0; int rv; dev = file->private_data; /* verify that we actually have some data to read */ if (count == 0) goto exit; /* lock this object */ if (mutex_lock_interruptible(&dev->mutex)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; } /* wait for data */ spin_lock_irq(&dev->rbsl); while (dev->ring_head == dev->ring_tail) { dev->interrupt_in_done = 0; spin_unlock_irq(&dev->rbsl); if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible(dev->read_wait, dev->interrupt_in_done); if (retval < 0) goto unlock_exit; spin_lock_irq(&dev->rbsl); } spin_unlock_irq(&dev->rbsl); /* actual_buffer contains actual_length + interrupt_in_buffer */ actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); if (*actual_buffer > dev->interrupt_in_endpoint_size) { retval = -EIO; goto unlock_exit; } bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) dev_warn(&dev->intf->dev, "Read buffer overflow, %zu bytes dropped\n", *actual_buffer-bytes_to_read); /* copy one interrupt_in_buffer from ring_buffer into userspace */ if (copy_to_user(buffer, actual_buffer+1, bytes_to_read)) { retval = -EFAULT; goto unlock_exit; } retval = bytes_to_read; spin_lock_irq(&dev->rbsl); dev->ring_tail = (dev->ring_tail + 1) % ring_buffer_size; if (dev->buffer_overflow) { dev->buffer_overflow = 0; spin_unlock_irq(&dev->rbsl); rv = usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL); if (rv < 0) dev->buffer_overflow = 1; } else { spin_unlock_irq(&dev->rbsl); } unlock_exit: /* unlock the device */ mutex_unlock(&dev->mutex); exit: return retval; } /* * ld_usb_write */ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct ld_usb *dev; size_t bytes_to_write; int retval = 0; dev = file->private_data; /* verify that we actually have some data to write */ if (count == 0) goto exit; /* lock this object */ if (mutex_lock_interruptible(&dev->mutex)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; } /* wait until previous transfer is finished */ if (dev->interrupt_out_busy) { if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible(dev->write_wait, !dev->interrupt_out_busy); if (retval < 0) { goto unlock_exit; } } /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size); if (bytes_to_write < count) dev_warn(&dev->intf->dev, "Write buffer overflow, %zu bytes dropped\n", count - bytes_to_write); dev_dbg(&dev->intf->dev, "%s: count = %zu, bytes_to_write = %zu\n", __func__, count, bytes_to_write); if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) { retval = -EFAULT; goto unlock_exit; } if (dev->interrupt_out_endpoint == NULL) { /* try HID_REQ_SET_REPORT=9 on control_endpoint instead of interrupt_out_endpoint */ retval = usb_control_msg(interface_to_usbdev(dev->intf), usb_sndctrlpipe(interface_to_usbdev(dev->intf), 0), 9, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, 1 << 8, 0, dev->interrupt_out_buffer, bytes_to_write, USB_CTRL_SET_TIMEOUT); if (retval < 0) dev_err(&dev->intf->dev, "Couldn't submit HID_REQ_SET_REPORT %d\n", retval); goto unlock_exit; } /* send off the urb */ usb_fill_int_urb(dev->interrupt_out_urb, interface_to_usbdev(dev->intf), usb_sndintpipe(interface_to_usbdev(dev->intf), dev->interrupt_out_endpoint->bEndpointAddress), dev->interrupt_out_buffer, bytes_to_write, ld_usb_interrupt_out_callback, dev, dev->interrupt_out_interval); dev->interrupt_out_busy = 1; wmb(); retval = usb_submit_urb(dev->interrupt_out_urb, GFP_KERNEL); if (retval) { dev->interrupt_out_busy = 0; dev_err(&dev->intf->dev, "Couldn't submit interrupt_out_urb %d\n", retval); goto unlock_exit; } retval = bytes_to_write; unlock_exit: /* unlock the device */ mutex_unlock(&dev->mutex); exit: return retval; } /* file operations needed when we register this driver */ static const struct file_operations ld_usb_fops = { .owner = THIS_MODULE, .read = ld_usb_read, .write = ld_usb_write, .open = ld_usb_open, .release = ld_usb_release, .poll = ld_usb_poll, }; /* * usb class driver info in order to get a minor number from the usb core, * and to have the device registered with the driver core */ static struct usb_class_driver ld_usb_class = { .name = "ldusb%d", .fops = &ld_usb_fops, .minor_base = USB_LD_MINOR_BASE, }; /* * ld_usb_probe * * Called by the usb core when a new device is connected that it thinks * this driver might be interested in. */ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct ld_usb *dev = NULL; struct usb_host_interface *iface_desc; char *buffer; int retval = -ENOMEM; int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto exit; mutex_init(&dev->mutex); spin_lock_init(&dev->rbsl); dev->intf = intf; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); /* workaround for early firmware versions on fast computers */ if ((le16_to_cpu(udev->descriptor.idVendor) == USB_VENDOR_ID_LD) && ((le16_to_cpu(udev->descriptor.idProduct) == USB_DEVICE_ID_LD_CASSY) || (le16_to_cpu(udev->descriptor.idProduct) == USB_DEVICE_ID_LD_COM3LAB)) && (le16_to_cpu(udev->descriptor.bcdDevice) <= 0x103)) { buffer = kmalloc(256, GFP_KERNEL); if (!buffer) goto error; /* usb_string makes SETUP+STALL to leave always ControlReadLoop */ usb_string(udev, 255, buffer, 256); kfree(buffer); } iface_desc = intf->cur_altsetting; res = usb_find_last_int_in_endpoint(iface_desc, &dev->interrupt_in_endpoint); if (res) { dev_err(&intf->dev, "Interrupt in endpoint not found\n"); retval = res; goto error; } res = usb_find_last_int_out_endpoint(iface_desc, &dev->interrupt_out_endpoint); if (res) dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n"); dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint); dev->ring_buffer = kcalloc(ring_buffer_size, sizeof(size_t) + dev->interrupt_in_endpoint_size, GFP_KERNEL); if (!dev->ring_buffer) goto error; dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL); if (!dev->interrupt_in_buffer) goto error; dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_in_urb) goto error; dev->interrupt_out_endpoint_size = dev->interrupt_out_endpoint ? usb_endpoint_maxp(dev->interrupt_out_endpoint) : udev->descriptor.bMaxPacketSize0; dev->interrupt_out_buffer = kmalloc_array(write_buffer_size, dev->interrupt_out_endpoint_size, GFP_KERNEL); if (!dev->interrupt_out_buffer) goto error; dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_out_urb) goto error; dev->interrupt_in_interval = max_t(int, min_interrupt_in_interval, dev->interrupt_in_endpoint->bInterval); if (dev->interrupt_out_endpoint) dev->interrupt_out_interval = max_t(int, min_interrupt_out_interval, dev->interrupt_out_endpoint->bInterval); /* we can register the device now, as it is ready */ usb_set_intfdata(intf, dev); retval = usb_register_dev(intf, &ld_usb_class); if (retval) { /* something prevented us from registering this driver */ dev_err(&intf->dev, "Not able to get a minor for this device.\n"); usb_set_intfdata(intf, NULL); goto error; } /* let the user know what node this device is now attached to */ dev_info(&intf->dev, "LD USB Device #%d now attached to major %d minor %d\n", (intf->minor - USB_LD_MINOR_BASE), USB_MAJOR, intf->minor); exit: return retval; error: ld_usb_delete(dev); return retval; } /* * ld_usb_disconnect * * Called by the usb core when the device is removed from the system. */ static void ld_usb_disconnect(struct usb_interface *intf) { struct ld_usb *dev; int minor; dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); minor = intf->minor; /* give back our minor */ usb_deregister_dev(intf, &ld_usb_class); usb_poison_urb(dev->interrupt_in_urb); usb_poison_urb(dev->interrupt_out_urb); mutex_lock(&dev->mutex); /* if the device is not opened, then we clean up right now */ if (!dev->open_count) { mutex_unlock(&dev->mutex); ld_usb_delete(dev); } else { dev->disconnected = 1; /* wake up pollers */ wake_up_interruptible_all(&dev->read_wait); wake_up_interruptible_all(&dev->write_wait); mutex_unlock(&dev->mutex); } dev_info(&intf->dev, "LD USB Device #%d now disconnected\n", (minor - USB_LD_MINOR_BASE)); } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver ld_usb_driver = { .name = "ldusb", .probe = ld_usb_probe, .disconnect = ld_usb_disconnect, .id_table = ld_usb_table, }; module_usb_driver(ld_usb_driver);
1 1 1 1 1 1 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 // SPDX-License-Identifier: GPL-2.0+ /* * hwmon driver for Gigabyte AORUS Waterforce AIO CPU coolers: X240, X280 and X360. * * Copyright 2023 Aleksa Savic <savicaleksa83@gmail.com> */ #include <linux/debugfs.h> #include <linux/hid.h> #include <linux/hwmon.h> #include <linux/jiffies.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/unaligned.h> #define DRIVER_NAME "gigabyte_waterforce" #define USB_VENDOR_ID_GIGABYTE 0x1044 #define USB_PRODUCT_ID_WATERFORCE 0x7a4d /* Gigabyte AORUS WATERFORCE X240, X280 and X360 */ #define STATUS_VALIDITY (2 * 1000) /* ms */ #define MAX_REPORT_LENGTH 6144 #define WATERFORCE_TEMP_SENSOR 0xD #define WATERFORCE_FAN_SPEED 0x02 #define WATERFORCE_PUMP_SPEED 0x05 #define WATERFORCE_FAN_DUTY 0x08 #define WATERFORCE_PUMP_DUTY 0x09 /* Control commands, inner offsets and lengths */ static const u8 get_status_cmd[] = { 0x99, 0xDA }; #define FIRMWARE_VER_START_OFFSET_1 2 #define FIRMWARE_VER_START_OFFSET_2 3 static const u8 get_firmware_ver_cmd[] = { 0x99, 0xD6 }; /* Command lengths */ #define GET_STATUS_CMD_LENGTH 2 #define GET_FIRMWARE_VER_CMD_LENGTH 2 static const char *const waterforce_temp_label[] = { "Coolant temp" }; static const char *const waterforce_speed_label[] = { "Fan speed", "Pump speed" }; struct waterforce_data { struct hid_device *hdev; struct device *hwmon_dev; struct dentry *debugfs; /* For locking access to buffer */ struct mutex buffer_lock; /* For queueing multiple readers */ struct mutex status_report_request_mutex; /* For reinitializing the completion below */ spinlock_t status_report_request_lock; struct completion status_report_received; struct completion fw_version_processed; /* Sensor data */ s32 temp_input[1]; u16 speed_input[2]; /* Fan and pump speed in RPM */ u8 duty_input[2]; /* Fan and pump duty in 0-100% */ u8 *buffer; int firmware_version; unsigned long updated; /* jiffies */ }; static umode_t waterforce_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) { switch (type) { case hwmon_temp: switch (attr) { case hwmon_temp_label: case hwmon_temp_input: return 0444; default: break; } break; case hwmon_fan: switch (attr) { case hwmon_fan_label: case hwmon_fan_input: return 0444; default: break; } break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: return 0444; default: break; } break; default: break; } return 0; } /* Writes the command to the device with the rest of the report filled with zeroes */ static int waterforce_write_expanded(struct waterforce_data *priv, const u8 *cmd, int cmd_length) { int ret; mutex_lock(&priv->buffer_lock); memcpy_and_pad(priv->buffer, MAX_REPORT_LENGTH, cmd, cmd_length, 0x00); ret = hid_hw_output_report(priv->hdev, priv->buffer, MAX_REPORT_LENGTH); mutex_unlock(&priv->buffer_lock); return ret; } static int waterforce_get_status(struct waterforce_data *priv) { int ret = mutex_lock_interruptible(&priv->status_report_request_mutex); if (ret < 0) return ret; if (!time_after(jiffies, priv->updated + msecs_to_jiffies(STATUS_VALIDITY))) { /* Data is up to date */ goto unlock_and_return; } /* * Disable raw event parsing for a moment to safely reinitialize the * completion. Reinit is done because hidraw could have triggered * the raw event parsing and marked the priv->status_report_received * completion as done. */ spin_lock_bh(&priv->status_report_request_lock); reinit_completion(&priv->status_report_received); spin_unlock_bh(&priv->status_report_request_lock); /* Send command for getting status */ ret = waterforce_write_expanded(priv, get_status_cmd, GET_STATUS_CMD_LENGTH); if (ret < 0) goto unlock_and_return; ret = wait_for_completion_interruptible_timeout(&priv->status_report_received, msecs_to_jiffies(STATUS_VALIDITY)); if (ret == 0) ret = -ETIMEDOUT; unlock_and_return: mutex_unlock(&priv->status_report_request_mutex); if (ret < 0) return ret; return 0; } static int waterforce_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct waterforce_data *priv = dev_get_drvdata(dev); int ret = waterforce_get_status(priv); if (ret < 0) return ret; switch (type) { case hwmon_temp: *val = priv->temp_input[channel]; break; case hwmon_fan: *val = priv->speed_input[channel]; break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: *val = DIV_ROUND_CLOSEST(priv->duty_input[channel] * 255, 100); break; default: return -EOPNOTSUPP; } break; default: return -EOPNOTSUPP; /* unreachable */ } return 0; } static int waterforce_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) { switch (type) { case hwmon_temp: *str = waterforce_temp_label[channel]; break; case hwmon_fan: *str = waterforce_speed_label[channel]; break; default: return -EOPNOTSUPP; /* unreachable */ } return 0; } static int waterforce_get_fw_ver(struct hid_device *hdev) { struct waterforce_data *priv = hid_get_drvdata(hdev); int ret; ret = waterforce_write_expanded(priv, get_firmware_ver_cmd, GET_FIRMWARE_VER_CMD_LENGTH); if (ret < 0) return ret; ret = wait_for_completion_interruptible_timeout(&priv->fw_version_processed, msecs_to_jiffies(STATUS_VALIDITY)); if (ret == 0) return -ETIMEDOUT; else if (ret < 0) return ret; return 0; } static const struct hwmon_ops waterforce_hwmon_ops = { .is_visible = waterforce_is_visible, .read = waterforce_read, .read_string = waterforce_read_string }; static const struct hwmon_channel_info *waterforce_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL), HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_LABEL, HWMON_F_INPUT | HWMON_F_LABEL), HWMON_CHANNEL_INFO(pwm, HWMON_PWM_INPUT, HWMON_PWM_INPUT), NULL }; static const struct hwmon_chip_info waterforce_chip_info = { .ops = &waterforce_hwmon_ops, .info = waterforce_info, }; static int waterforce_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct waterforce_data *priv = hid_get_drvdata(hdev); if (data[0] == get_firmware_ver_cmd[0] && data[1] == get_firmware_ver_cmd[1]) { /* Received a firmware version report */ priv->firmware_version = data[FIRMWARE_VER_START_OFFSET_1] * 10 + data[FIRMWARE_VER_START_OFFSET_2]; if (!completion_done(&priv->fw_version_processed)) complete_all(&priv->fw_version_processed); return 0; } if (data[0] != get_status_cmd[0] || data[1] != get_status_cmd[1]) return 0; priv->temp_input[0] = data[WATERFORCE_TEMP_SENSOR] * 1000; priv->speed_input[0] = get_unaligned_le16(data + WATERFORCE_FAN_SPEED); priv->speed_input[1] = get_unaligned_le16(data + WATERFORCE_PUMP_SPEED); priv->duty_input[0] = data[WATERFORCE_FAN_DUTY]; priv->duty_input[1] = data[WATERFORCE_PUMP_DUTY]; spin_lock(&priv->status_report_request_lock); if (!completion_done(&priv->status_report_received)) complete_all(&priv->status_report_received); spin_unlock(&priv->status_report_request_lock); priv->updated = jiffies; return 0; } static int firmware_version_show(struct seq_file *seqf, void *unused) { struct waterforce_data *priv = seqf->private; seq_printf(seqf, "%u\n", priv->firmware_version); return 0; } DEFINE_SHOW_ATTRIBUTE(firmware_version); static void waterforce_debugfs_init(struct waterforce_data *priv) { char name[64]; if (!priv->firmware_version) return; /* There's nothing to show in debugfs */ scnprintf(name, sizeof(name), "%s-%s", DRIVER_NAME, dev_name(&priv->hdev->dev)); priv->debugfs = debugfs_create_dir(name, NULL); debugfs_create_file("firmware_version", 0444, priv->debugfs, priv, &firmware_version_fops); } static int waterforce_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct waterforce_data *priv; int ret; priv = devm_kzalloc(&hdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; priv->hdev = hdev; hid_set_drvdata(hdev, priv); /* * Initialize priv->updated to STATUS_VALIDITY seconds in the past, making * the initial empty data invalid for waterforce_read() without the need for * a special case there. */ priv->updated = jiffies - msecs_to_jiffies(STATUS_VALIDITY); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "hid parse failed with %d\n", ret); return ret; } /* * Enable hidraw so existing user-space tools can continue to work. */ ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); if (ret) { hid_err(hdev, "hid hw start failed with %d\n", ret); return ret; } ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "hid hw open failed with %d\n", ret); goto fail_and_stop; } priv->buffer = devm_kzalloc(&hdev->dev, MAX_REPORT_LENGTH, GFP_KERNEL); if (!priv->buffer) { ret = -ENOMEM; goto fail_and_close; } mutex_init(&priv->status_report_request_mutex); mutex_init(&priv->buffer_lock); spin_lock_init(&priv->status_report_request_lock); init_completion(&priv->status_report_received); init_completion(&priv->fw_version_processed); hid_device_io_start(hdev); ret = waterforce_get_fw_ver(hdev); if (ret < 0) hid_warn(hdev, "fw version request failed with %d\n", ret); priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "waterforce", priv, &waterforce_chip_info, NULL); if (IS_ERR(priv->hwmon_dev)) { ret = PTR_ERR(priv->hwmon_dev); hid_err(hdev, "hwmon registration failed with %d\n", ret); goto fail_and_close; } waterforce_debugfs_init(priv); return 0; fail_and_close: hid_hw_close(hdev); fail_and_stop: hid_hw_stop(hdev); return ret; } static void waterforce_remove(struct hid_device *hdev) { struct waterforce_data *priv = hid_get_drvdata(hdev); debugfs_remove_recursive(priv->debugfs); hwmon_device_unregister(priv->hwmon_dev); hid_hw_close(hdev); hid_hw_stop(hdev); } static const struct hid_device_id waterforce_table[] = { { HID_USB_DEVICE(USB_VENDOR_ID_GIGABYTE, USB_PRODUCT_ID_WATERFORCE) }, { } }; MODULE_DEVICE_TABLE(hid, waterforce_table); static struct hid_driver waterforce_driver = { .name = "waterforce", .id_table = waterforce_table, .probe = waterforce_probe, .remove = waterforce_remove, .raw_event = waterforce_raw_event, }; static int __init waterforce_init(void) { return hid_register_driver(&waterforce_driver); } static void __exit waterforce_exit(void) { hid_unregister_driver(&waterforce_driver); } /* When compiled into the kernel, initialize after the HID bus */ late_initcall(waterforce_init); module_exit(waterforce_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Aleksa Savic <savicaleksa83@gmail.com>"); MODULE_DESCRIPTION("Hwmon driver for Gigabyte AORUS Waterforce AIO coolers");
2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 // SPDX-License-Identifier: GPL-2.0 /* * Generic USB GNSS receiver driver * * Copyright (C) 2021 Johan Hovold <johan@kernel.org> */ #include <linux/errno.h> #include <linux/gnss.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #define GNSS_USB_READ_BUF_LEN 512 #define GNSS_USB_WRITE_TIMEOUT 1000 static const struct usb_device_id gnss_usb_id_table[] = { { USB_DEVICE(0x1199, 0xb000) }, /* Sierra Wireless XM1210 */ { } }; MODULE_DEVICE_TABLE(usb, gnss_usb_id_table); struct gnss_usb { struct usb_device *udev; struct usb_interface *intf; struct gnss_device *gdev; struct urb *read_urb; unsigned int write_pipe; }; static void gnss_usb_rx_complete(struct urb *urb) { struct gnss_usb *gusb = urb->context; struct gnss_device *gdev = gusb->gdev; int status = urb->status; int len; int ret; switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&gdev->dev, "urb stopped: %d\n", status); return; case -EPIPE: dev_err(&gdev->dev, "urb stopped: %d\n", status); return; default: dev_dbg(&gdev->dev, "nonzero urb status: %d\n", status); goto resubmit; } len = urb->actual_length; if (len == 0) goto resubmit; ret = gnss_insert_raw(gdev, urb->transfer_buffer, len); if (ret < len) dev_dbg(&gdev->dev, "dropped %d bytes\n", len - ret); resubmit: ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret && ret != -EPERM && ret != -ENODEV) dev_err(&gdev->dev, "failed to resubmit urb: %d\n", ret); } static int gnss_usb_open(struct gnss_device *gdev) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); int ret; ret = usb_submit_urb(gusb->read_urb, GFP_KERNEL); if (ret) { if (ret != -EPERM && ret != -ENODEV) dev_err(&gdev->dev, "failed to submit urb: %d\n", ret); return ret; } return 0; } static void gnss_usb_close(struct gnss_device *gdev) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); usb_kill_urb(gusb->read_urb); } static int gnss_usb_write_raw(struct gnss_device *gdev, const unsigned char *buf, size_t count) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); void *tbuf; int ret; tbuf = kmemdup(buf, count, GFP_KERNEL); if (!tbuf) return -ENOMEM; ret = usb_bulk_msg(gusb->udev, gusb->write_pipe, tbuf, count, NULL, GNSS_USB_WRITE_TIMEOUT); kfree(tbuf); if (ret) return ret; return count; } static const struct gnss_operations gnss_usb_gnss_ops = { .open = gnss_usb_open, .close = gnss_usb_close, .write_raw = gnss_usb_write_raw, }; static int gnss_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *in, *out; struct gnss_device *gdev; struct gnss_usb *gusb; struct urb *urb; size_t buf_len; void *buf; int ret; ret = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL); if (ret) return ret; gusb = kzalloc(sizeof(*gusb), GFP_KERNEL); if (!gusb) return -ENOMEM; gdev = gnss_allocate_device(&intf->dev); if (!gdev) { ret = -ENOMEM; goto err_free_gusb; } gdev->ops = &gnss_usb_gnss_ops; gdev->type = GNSS_TYPE_NMEA; gnss_set_drvdata(gdev, gusb); urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { ret = -ENOMEM; goto err_put_gdev; } buf_len = max(usb_endpoint_maxp(in), GNSS_USB_READ_BUF_LEN); buf = kzalloc(buf_len, GFP_KERNEL); if (!buf) { ret = -ENOMEM; goto err_free_urb; } usb_fill_bulk_urb(urb, udev, usb_rcvbulkpipe(udev, usb_endpoint_num(in)), buf, buf_len, gnss_usb_rx_complete, gusb); gusb->intf = intf; gusb->udev = udev; gusb->gdev = gdev; gusb->read_urb = urb; gusb->write_pipe = usb_sndbulkpipe(udev, usb_endpoint_num(out)); ret = gnss_register_device(gdev); if (ret) goto err_free_buf; usb_set_intfdata(intf, gusb); return 0; err_free_buf: kfree(buf); err_free_urb: usb_free_urb(urb); err_put_gdev: gnss_put_device(gdev); err_free_gusb: kfree(gusb); return ret; } static void gnss_usb_disconnect(struct usb_interface *intf) { struct gnss_usb *gusb = usb_get_intfdata(intf); gnss_deregister_device(gusb->gdev); kfree(gusb->read_urb->transfer_buffer); usb_free_urb(gusb->read_urb); gnss_put_device(gusb->gdev); kfree(gusb); } static struct usb_driver gnss_usb_driver = { .name = "gnss-usb", .probe = gnss_usb_probe, .disconnect = gnss_usb_disconnect, .id_table = gnss_usb_id_table, }; module_usb_driver(gnss_usb_driver); MODULE_AUTHOR("Johan Hovold <johan@kernel.org>"); MODULE_DESCRIPTION("Generic USB GNSS receiver driver"); MODULE_LICENSE("GPL v2");
113 113 18 18 18 120 11 24 17 11 24 17 11 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 // SPDX-License-Identifier: GPL-2.0 #include <linux/tty.h> #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/semaphore.h> #include <linux/sched.h> #include "tty.h" /* Legacy tty mutex glue */ /* * Getting the big tty mutex. */ void tty_lock(struct tty_struct *tty) { tty_kref_get(tty); mutex_lock(&tty->legacy_mutex); } EXPORT_SYMBOL(tty_lock); int tty_lock_interruptible(struct tty_struct *tty) { int ret; tty_kref_get(tty); ret = mutex_lock_interruptible(&tty->legacy_mutex); if (ret) tty_kref_put(tty); return ret; } void tty_unlock(struct tty_struct *tty) { mutex_unlock(&tty->legacy_mutex); tty_kref_put(tty); } EXPORT_SYMBOL(tty_unlock); void tty_lock_slave(struct tty_struct *tty) { if (tty && tty != tty->link) tty_lock(tty); } void tty_unlock_slave(struct tty_struct *tty) { if (tty && tty != tty->link) tty_unlock(tty); } void tty_set_lock_subclass(struct tty_struct *tty) { lockdep_set_subclass(&tty->legacy_mutex, TTY_LOCK_SLAVE); }
117 17 117 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_TIMEOUT_H #define _NF_CONNTRACK_TIMEOUT_H #include <net/net_namespace.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <linux/refcount.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #define CTNL_TIMEOUT_NAME_MAX 32 struct nf_ct_timeout { __u16 l3num; const struct nf_conntrack_l4proto *l4proto; char data[]; }; struct nf_conn_timeout { struct nf_ct_timeout __rcu *timeout; }; static inline unsigned int * nf_ct_timeout_data(const struct nf_conn_timeout *t) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_ct_timeout *timeout; timeout = rcu_dereference(t->timeout); if (timeout == NULL) return NULL; return (unsigned int *)timeout->data; #else return NULL; #endif } static inline struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT return nf_ct_ext_find(ct, NF_CT_EXT_TIMEOUT); #else return NULL; #endif } static inline struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, struct nf_ct_timeout *timeout, gfp_t gfp) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; timeout_ext = nf_ct_ext_add(ct, NF_CT_EXT_TIMEOUT, gfp); if (timeout_ext == NULL) return NULL; rcu_assign_pointer(timeout_ext->timeout, timeout); return timeout_ext; #else return NULL; #endif }; static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) { unsigned int *timeouts = NULL; #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) timeouts = nf_ct_timeout_data(timeout_ext); #endif return timeouts; } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name); void nf_ct_destroy_timeout(struct nf_conn *ct); #else static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, const char *timeout_name) { return -EOPNOTSUPP; } static inline void nf_ct_destroy_timeout(struct nf_conn *ct) { return; } #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_ct_timeout_hooks { struct nf_ct_timeout *(*timeout_find_get)(struct net *net, const char *name); void (*timeout_put)(struct nf_ct_timeout *timeout); }; extern const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook; #endif #endif /* _NF_CONNTRACK_TIMEOUT_H */
17 2 1 12 2 17 4 13 2 1 1 1 1 4 1 1 2 2 1 1 3 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 // SPDX-License-Identifier: GPL-2.0-or-later /* Large capacity key type * * Copyright (C) 2017-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "big_key: "fmt #include <linux/init.h> #include <linux/seq_file.h> #include <linux/file.h> #include <linux/shmem_fs.h> #include <linux/err.h> #include <linux/random.h> #include <keys/user-type.h> #include <keys/big_key-type.h> #include <crypto/chacha20poly1305.h> /* * Layout of key payload words. */ struct big_key_payload { u8 *data; struct path path; size_t length; }; #define to_big_key_payload(payload) \ (struct big_key_payload *)((payload).data) /* * If the data is under this limit, there's no point creating a shm file to * hold it as the permanently resident metadata for the shmem fs will be at * least as large as the data. */ #define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry)) /* * big_key defined keys take an arbitrary string as the description and an * arbitrary blob of data as the payload */ struct key_type key_type_big_key = { .name = "big_key", .preparse = big_key_preparse, .free_preparse = big_key_free_preparse, .instantiate = generic_key_instantiate, .revoke = big_key_revoke, .destroy = big_key_destroy, .describe = big_key_describe, .read = big_key_read, .update = big_key_update, }; /* * Preparse a big key */ int big_key_preparse(struct key_preparsed_payload *prep) { struct big_key_payload *payload = to_big_key_payload(prep->payload); struct file *file; u8 *buf, *enckey; ssize_t written; size_t datalen = prep->datalen; size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE; int ret; BUILD_BUG_ON(sizeof(*payload) != sizeof(prep->payload.data)); if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) return -EINVAL; /* Set an arbitrary quota */ prep->quotalen = 16; payload->length = datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { /* Create a shmem file to store the data in. This will permit the data * to be swapped out if needed. * * File content is stored encrypted with randomly generated key. * Since the key is random for each file, we can set the nonce * to zero, provided we never define a ->update() call. */ loff_t pos = 0; buf = kvmalloc(enclen, GFP_KERNEL); if (!buf) return -ENOMEM; /* generate random key */ enckey = kmalloc(CHACHA20POLY1305_KEY_SIZE, GFP_KERNEL); if (!enckey) { ret = -ENOMEM; goto error; } ret = get_random_bytes_wait(enckey, CHACHA20POLY1305_KEY_SIZE); if (unlikely(ret)) goto err_enckey; /* encrypt data */ chacha20poly1305_encrypt(buf, prep->data, datalen, NULL, 0, 0, enckey); /* save aligned data to file */ file = shmem_kernel_file_setup("", enclen, 0); if (IS_ERR(file)) { ret = PTR_ERR(file); goto err_enckey; } written = kernel_write(file, buf, enclen, &pos); if (written != enclen) { ret = written; if (written >= 0) ret = -EIO; goto err_fput; } /* Pin the mount and dentry to the key so that we can open it again * later */ payload->data = enckey; payload->path = file->f_path; path_get(&payload->path); fput(file); kvfree_sensitive(buf, enclen); } else { /* Just store the data in a buffer */ void *data = kmalloc(datalen, GFP_KERNEL); if (!data) return -ENOMEM; payload->data = data; memcpy(data, prep->data, prep->datalen); } return 0; err_fput: fput(file); err_enckey: kfree_sensitive(enckey); error: kvfree_sensitive(buf, enclen); return ret; } /* * Clear preparsement. */ void big_key_free_preparse(struct key_preparsed_payload *prep) { struct big_key_payload *payload = to_big_key_payload(prep->payload); if (prep->datalen > BIG_KEY_FILE_THRESHOLD) path_put(&payload->path); kfree_sensitive(payload->data); } /* * dispose of the links from a revoked keyring * - called with the key sem write-locked */ void big_key_revoke(struct key *key) { struct big_key_payload *payload = to_big_key_payload(key->payload); /* clear the quota */ key_payload_reserve(key, 0); if (key_is_positive(key) && payload->length > BIG_KEY_FILE_THRESHOLD) vfs_truncate(&payload->path, 0); } /* * dispose of the data dangling from the corpse of a big_key key */ void big_key_destroy(struct key *key) { struct big_key_payload *payload = to_big_key_payload(key->payload); if (payload->length > BIG_KEY_FILE_THRESHOLD) { path_put(&payload->path); payload->path.mnt = NULL; payload->path.dentry = NULL; } kfree_sensitive(payload->data); payload->data = NULL; } /* * Update a big key */ int big_key_update(struct key *key, struct key_preparsed_payload *prep) { int ret; ret = key_payload_reserve(key, prep->datalen); if (ret < 0) return ret; if (key_is_positive(key)) big_key_destroy(key); return generic_key_instantiate(key, prep); } /* * describe the big_key key */ void big_key_describe(const struct key *key, struct seq_file *m) { struct big_key_payload *payload = to_big_key_payload(key->payload); seq_puts(m, key->description); if (key_is_positive(key)) seq_printf(m, ": %zu [%s]", payload->length, payload->length > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); } /* * read the key data * - the key's semaphore is read-locked */ long big_key_read(const struct key *key, char *buffer, size_t buflen) { struct big_key_payload *payload = to_big_key_payload(key->payload); size_t datalen = payload->length; long ret; if (!buffer || buflen < datalen) return datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { struct file *file; u8 *buf, *enckey = payload->data; size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE; loff_t pos = 0; buf = kvmalloc(enclen, GFP_KERNEL); if (!buf) return -ENOMEM; file = dentry_open(&payload->path, O_RDONLY, current_cred()); if (IS_ERR(file)) { ret = PTR_ERR(file); goto error; } /* read file to kernel and decrypt */ ret = kernel_read(file, buf, enclen, &pos); if (ret != enclen) { if (ret >= 0) ret = -EIO; goto err_fput; } ret = chacha20poly1305_decrypt(buf, buf, enclen, NULL, 0, 0, enckey) ? 0 : -EBADMSG; if (unlikely(ret)) goto err_fput; ret = datalen; /* copy out decrypted data */ memcpy(buffer, buf, datalen); err_fput: fput(file); error: kvfree_sensitive(buf, enclen); } else { ret = datalen; memcpy(buffer, payload->data, datalen); } return ret; } /* * Register key type */ static int __init big_key_init(void) { return register_key_type(&key_type_big_key); } late_initcall(big_key_init);
7 7 7 7 7 4 5 1 1 1 1 1 1 1 1 6 6 6 6 11 2 7 1 3 2 1 5 1 6 1 5 5 5 1 4 1 7 7 7 6 6 7 7 1 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 // SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * The HSR spec says never to forward the same frame twice on the same * interface. A frame is identified by its source MAC address and its HSR * sequence number. This code keeps track of senders and their sequence numbers * to allow filtering of duplicate frames, and to detect HSR ring errors. * Same code handles filtering of duplicates for PRP as well. */ #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/slab.h> #include <linux/rculist.h> #include "hsr_main.h" #include "hsr_framereg.h" #include "hsr_netlink.h" /* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, * false otherwise. */ static bool seq_nr_after(u16 a, u16 b) { /* Remove inconsistency where * seq_nr_after(a, b) == seq_nr_before(a, b) */ if ((int)b - a == 32768) return false; return (((s16)(b - a)) < 0); } #define seq_nr_before(a, b) seq_nr_after((b), (a)) #define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) #define PRP_DROP_WINDOW_LEN 32768 bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr) { if (!hsr->redbox || !is_valid_ether_addr(hsr->macaddress_redbox)) return false; return ether_addr_equal(addr, hsr->macaddress_redbox); } bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { struct hsr_self_node *sn; bool ret = false; rcu_read_lock(); sn = rcu_dereference(hsr->self_node); if (!sn) { WARN_ONCE(1, "HSR: No self node\n"); goto out; } if (ether_addr_equal(addr, sn->macaddress_A) || ether_addr_equal(addr, sn->macaddress_B)) ret = true; out: rcu_read_unlock(); return ret; } /* Search for mac entry. Caller must hold rcu read lock. */ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, const unsigned char addr[ETH_ALEN]) { struct hsr_node *node; list_for_each_entry_rcu(node, node_db, mac_list) { if (ether_addr_equal(node->macaddress_A, addr)) return node; } return NULL; } /* Check if node for a given MAC address is already present in data base */ bool hsr_is_node_in_db(struct list_head *node_db, const unsigned char addr[ETH_ALEN]) { return !!find_node_by_addr_A(node_db, addr); } /* Helper for device init; the self_node is used in hsr_rcv() to recognize * frames from self that's been looped over the HSR ring. */ int hsr_create_self_node(struct hsr_priv *hsr, const unsigned char addr_a[ETH_ALEN], const unsigned char addr_b[ETH_ALEN]) { struct hsr_self_node *sn, *old; sn = kmalloc(sizeof(*sn), GFP_KERNEL); if (!sn) return -ENOMEM; ether_addr_copy(sn->macaddress_A, addr_a); ether_addr_copy(sn->macaddress_B, addr_b); spin_lock_bh(&hsr->list_lock); old = rcu_replace_pointer(hsr->self_node, sn, lockdep_is_held(&hsr->list_lock)); spin_unlock_bh(&hsr->list_lock); if (old) kfree_rcu(old, rcu_head); return 0; } void hsr_del_self_node(struct hsr_priv *hsr) { struct hsr_self_node *old; spin_lock_bh(&hsr->list_lock); old = rcu_replace_pointer(hsr->self_node, NULL, lockdep_is_held(&hsr->list_lock)); spin_unlock_bh(&hsr->list_lock); if (old) kfree_rcu(old, rcu_head); } void hsr_del_nodes(struct list_head *node_db) { struct hsr_node *node; struct hsr_node *tmp; list_for_each_entry_safe(node, tmp, node_db, mac_list) kfree(node); } void prp_handle_san_frame(bool san, enum hsr_port_type port, struct hsr_node *node) { /* Mark if the SAN node is over LAN_A or LAN_B */ if (port == HSR_PT_SLAVE_A) { node->san_a = true; return; } if (port == HSR_PT_SLAVE_B) node->san_b = true; } /* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A; * seq_out is used to initialize filtering of outgoing duplicate frames * originating from the newly added node. */ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, struct list_head *node_db, unsigned char addr[], u16 seq_out, bool san, enum hsr_port_type rx_port) { struct hsr_node *new_node, *node; unsigned long now; int i; new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC); if (!new_node) return NULL; ether_addr_copy(new_node->macaddress_A, addr); spin_lock_init(&new_node->seq_out_lock); /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). */ now = jiffies; for (i = 0; i < HSR_PT_PORTS; i++) { new_node->time_in[i] = now; new_node->time_out[i] = now; } for (i = 0; i < HSR_PT_PORTS; i++) { new_node->seq_out[i] = seq_out; new_node->seq_expected[i] = seq_out + 1; new_node->seq_start[i] = seq_out + 1; } if (san && hsr->proto_ops->handle_san_frame) hsr->proto_ops->handle_san_frame(san, rx_port, new_node); spin_lock_bh(&hsr->list_lock); list_for_each_entry_rcu(node, node_db, mac_list, lockdep_is_held(&hsr->list_lock)) { if (ether_addr_equal(node->macaddress_A, addr)) goto out; if (ether_addr_equal(node->macaddress_B, addr)) goto out; } list_add_tail_rcu(&new_node->mac_list, node_db); spin_unlock_bh(&hsr->list_lock); return new_node; out: spin_unlock_bh(&hsr->list_lock); kfree(new_node); return node; } void prp_update_san_info(struct hsr_node *node, bool is_sup) { if (!is_sup) return; node->san_a = false; node->san_b = false; } /* Get the hsr_node from which 'skb' was sent. */ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, struct sk_buff *skb, bool is_sup, enum hsr_port_type rx_port) { struct hsr_priv *hsr = port->hsr; struct hsr_node *node; struct ethhdr *ethhdr; struct prp_rct *rct; bool san = false; u16 seq_out; if (!skb_mac_header_was_set(skb)) return NULL; ethhdr = (struct ethhdr *)skb_mac_header(skb); list_for_each_entry_rcu(node, node_db, mac_list) { if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) { if (hsr->proto_ops->update_san_info) hsr->proto_ops->update_san_info(node, is_sup); return node; } if (ether_addr_equal(node->macaddress_B, ethhdr->h_source)) { if (hsr->proto_ops->update_san_info) hsr->proto_ops->update_san_info(node, is_sup); return node; } } /* Check if required node is not in proxy nodes table */ list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) { if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) { if (hsr->proto_ops->update_san_info) hsr->proto_ops->update_san_info(node, is_sup); return node; } } /* Everyone may create a node entry, connected node to a HSR/PRP * device. */ if (ethhdr->h_proto == htons(ETH_P_PRP) || ethhdr->h_proto == htons(ETH_P_HSR)) { /* Check if skb contains hsr_ethhdr */ if (skb->mac_len < sizeof(struct hsr_ethhdr)) return NULL; /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ seq_out = hsr_get_skb_sequence_nr(skb) - 1; } else { rct = skb_get_PRP_rct(skb); if (rct && prp_check_lsdu_size(skb, rct, is_sup)) { seq_out = prp_get_skb_sequence_nr(rct); } else { if (rx_port != HSR_PT_MASTER) san = true; seq_out = HSR_SEQNR_START; } } return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out, san, rx_port); } /* Use the Supervision frame's info about an eventual macaddress_B for merging * nodes that has previously had their macaddress_B registered as a separate * node. */ void hsr_handle_sup_frame(struct hsr_frame_info *frame) { struct hsr_node *node_curr = frame->node_src; struct hsr_port *port_rcv = frame->port_rcv; struct hsr_priv *hsr = port_rcv->hsr; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tlv *hsr_sup_tlv; struct hsr_node *node_real; struct sk_buff *skb = NULL; struct list_head *node_db; struct ethhdr *ethhdr; int i; unsigned int pull_size = 0; unsigned int total_pull_size = 0; /* Here either frame->skb_hsr or frame->skb_prp should be * valid as supervision frame always will have protocol * header info. */ if (frame->skb_hsr) skb = frame->skb_hsr; else if (frame->skb_prp) skb = frame->skb_prp; else if (frame->skb_std) skb = frame->skb_std; if (!skb) return; /* Leave the ethernet header. */ pull_size = sizeof(struct ethhdr); skb_pull(skb, pull_size); total_pull_size += pull_size; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* And leave the HSR tag. */ if (ethhdr->h_proto == htons(ETH_P_HSR)) { pull_size = sizeof(struct hsr_tag); skb_pull(skb, pull_size); total_pull_size += pull_size; } /* And leave the HSR sup tag. */ pull_size = sizeof(struct hsr_sup_tag); skb_pull(skb, pull_size); total_pull_size += pull_size; /* get HSR sup payload */ hsr_sp = (struct hsr_sup_payload *)skb->data; /* Merge node_curr (registered on macaddress_B) into node_real */ node_db = &port_rcv->hsr->node_db; node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A); if (!node_real) /* No frame received from AddrA of this node yet */ node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A, HSR_SEQNR_START - 1, true, port_rcv->type); if (!node_real) goto done; /* No mem */ if (node_real == node_curr) /* Node has already been merged */ goto done; /* Leave the first HSR sup payload. */ pull_size = sizeof(struct hsr_sup_payload); skb_pull(skb, pull_size); total_pull_size += pull_size; /* Get second supervision tlv */ hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; /* And check if it is a redbox mac TLV */ if (hsr_sup_tlv->HSR_TLV_type == PRP_TLV_REDBOX_MAC) { /* We could stop here after pushing hsr_sup_payload, * or proceed and allow macaddress_B and for redboxes. */ /* Sanity check length */ if (hsr_sup_tlv->HSR_TLV_length != 6) goto done; /* Leave the second HSR sup tlv. */ pull_size = sizeof(struct hsr_sup_tlv); skb_pull(skb, pull_size); total_pull_size += pull_size; /* Get redbox mac address. */ hsr_sp = (struct hsr_sup_payload *)skb->data; /* Check if redbox mac and node mac are equal. */ if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) { /* This is a redbox supervision frame for a VDAN! */ goto done; } } ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); spin_lock_bh(&node_real->seq_out_lock); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { node_real->time_in[i] = node_curr->time_in[i]; node_real->time_in_stale[i] = node_curr->time_in_stale[i]; } if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) node_real->seq_out[i] = node_curr->seq_out[i]; } spin_unlock_bh(&node_real->seq_out_lock); node_real->addr_B_port = port_rcv->type; spin_lock_bh(&hsr->list_lock); if (!node_curr->removed) { list_del_rcu(&node_curr->mac_list); node_curr->removed = true; kfree_rcu(node_curr, rcu_head); } spin_unlock_bh(&hsr->list_lock); done: /* Push back here */ skb_push(skb, total_pull_size); } /* 'skb' is a frame meant for this host, that is to be passed to upper layers. * * If the frame was sent by a node's B interface, replace the source * address with that node's "official" address (macaddress_A) so that upper * layers recognize where it came from. */ void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb) { if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: Mac header not set\n", __func__); return; } memcpy(&eth_hdr(skb)->h_source, node->macaddress_A, ETH_ALEN); } /* 'skb' is a frame meant for another host. * 'port' is the outgoing interface * * Substitute the target (dest) MAC address if necessary, so the it matches the * recipient interface MAC address, regardless of whether that is the * recipient's A or B interface. * This is needed to keep the packets flowing through switches that learn on * which "side" the different interfaces are. */ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, struct hsr_port *port) { struct hsr_node *node_dst; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: Mac header not set\n", __func__); return; } if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest)) return; node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst && port->hsr->redbox) node_dst = find_node_by_addr_A(&port->hsr->proxy_node_db, eth_hdr(skb)->h_dest); if (!node_dst) { if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) netdev_err(skb->dev, "%s: Unknown node\n", __func__); return; } if (port->type != node_dst->addr_B_port) return; if (is_valid_ether_addr(node_dst->macaddress_B)) ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); } void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, u16 sequence_nr) { /* Don't register incoming frames without a valid sequence number. This * ensures entries of restarted nodes gets pruned so that they can * re-register and resume communications. */ if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) && seq_nr_before(sequence_nr, node->seq_out[port->type])) return; node->time_in[port->type] = jiffies; node->time_in_stale[port->type] = false; } /* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid * ethhdr->h_source address and skb->mac_header set. * * Return: * 1 if frame can be shown to have been sent recently on this interface, * 0 otherwise, or * negative error code on error */ int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) { struct hsr_node *node = frame->node_src; u16 sequence_nr = frame->sequence_nr; spin_lock_bh(&node->seq_out_lock); if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && time_is_after_jiffies(node->time_out[port->type] + msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) { spin_unlock_bh(&node->seq_out_lock); return 1; } node->time_out[port->type] = jiffies; node->seq_out[port->type] = sequence_nr; spin_unlock_bh(&node->seq_out_lock); return 0; } /* Adaptation of the PRP duplicate discard algorithm described in wireshark * wiki (https://wiki.wireshark.org/PRP) * * A drop window is maintained for both LANs with start sequence set to the * first sequence accepted on the LAN that has not been seen on the other LAN, * and expected sequence set to the latest received sequence number plus one. * * When a frame is received on either LAN it is compared against the received * frames on the other LAN. If it is outside the drop window of the other LAN * the frame is accepted and the drop window is updated. * The drop window for the other LAN is reset. * * 'port' is the outgoing interface * 'frame' is the frame to be sent * * Return: * 1 if frame can be shown to have been sent recently on this interface, * 0 otherwise */ int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) { enum hsr_port_type other_port; enum hsr_port_type rcv_port; struct hsr_node *node; u16 sequence_diff; u16 sequence_exp; u16 sequence_nr; /* out-going frames are always in order * and can be checked the same way as for HSR */ if (frame->port_rcv->type == HSR_PT_MASTER) return hsr_register_frame_out(port, frame); /* for PRP we should only forward frames from the slave ports * to the master port */ if (port->type != HSR_PT_MASTER) return 1; node = frame->node_src; sequence_nr = frame->sequence_nr; sequence_exp = sequence_nr + 1; rcv_port = frame->port_rcv->type; other_port = rcv_port == HSR_PT_SLAVE_A ? HSR_PT_SLAVE_B : HSR_PT_SLAVE_A; spin_lock_bh(&node->seq_out_lock); if (time_is_before_jiffies(node->time_out[port->type] + msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)) || (node->seq_start[rcv_port] == node->seq_expected[rcv_port] && node->seq_start[other_port] == node->seq_expected[other_port])) { /* the node hasn't been sending for a while * or both drop windows are empty, forward the frame */ node->seq_start[rcv_port] = sequence_nr; } else if (seq_nr_before(sequence_nr, node->seq_expected[other_port]) && seq_nr_before_or_eq(node->seq_start[other_port], sequence_nr)) { /* drop the frame, update the drop window for the other port * and reset our drop window */ node->seq_start[other_port] = sequence_exp; node->seq_expected[rcv_port] = sequence_exp; node->seq_start[rcv_port] = node->seq_expected[rcv_port]; spin_unlock_bh(&node->seq_out_lock); return 1; } /* update the drop window for the port where this frame was received * and clear the drop window for the other port */ node->seq_start[other_port] = node->seq_expected[other_port]; node->seq_expected[rcv_port] = sequence_exp; sequence_diff = sequence_exp - node->seq_start[rcv_port]; if (sequence_diff > PRP_DROP_WINDOW_LEN) node->seq_start[rcv_port] = sequence_exp - PRP_DROP_WINDOW_LEN; node->time_out[port->type] = jiffies; node->seq_out[port->type] = sequence_nr; spin_unlock_bh(&node->seq_out_lock); return 0; } #if IS_MODULE(CONFIG_PRP_DUP_DISCARD_KUNIT_TEST) EXPORT_SYMBOL(prp_register_frame_out); #endif static struct hsr_port *get_late_port(struct hsr_priv *hsr, struct hsr_node *node) { if (node->time_in_stale[HSR_PT_SLAVE_A]) return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); if (node->time_in_stale[HSR_PT_SLAVE_B]) return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); if (time_after(node->time_in[HSR_PT_SLAVE_B], node->time_in[HSR_PT_SLAVE_A] + msecs_to_jiffies(MAX_SLAVE_DIFF))) return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); if (time_after(node->time_in[HSR_PT_SLAVE_A], node->time_in[HSR_PT_SLAVE_B] + msecs_to_jiffies(MAX_SLAVE_DIFF))) return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); return NULL; } /* Remove stale sequence_nr records. Called by timer every * HSR_LIFE_CHECK_INTERVAL (two seconds or so). */ void hsr_prune_nodes(struct timer_list *t) { struct hsr_priv *hsr = timer_container_of(hsr, t, prune_timer); struct hsr_node *node; struct hsr_node *tmp; struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; spin_lock_bh(&hsr->list_lock); list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) { /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A] * nor time_in[HSR_PT_SLAVE_B], will ever be updated for * the master port. Thus the master node will be repeatedly * pruned leading to packet loss. */ if (hsr_addr_is_self(hsr, node->macaddress_A)) continue; /* Shorthand */ time_a = node->time_in[HSR_PT_SLAVE_A]; time_b = node->time_in[HSR_PT_SLAVE_B]; /* Check for timestamps old enough to risk wrap-around */ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2)) node->time_in_stale[HSR_PT_SLAVE_A] = true; if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2)) node->time_in_stale[HSR_PT_SLAVE_B] = true; /* Get age of newest frame from node. * At least one time_in is OK here; nodes get pruned long * before both time_ins can get stale */ timestamp = time_a; if (node->time_in_stale[HSR_PT_SLAVE_A] || (!node->time_in_stale[HSR_PT_SLAVE_B] && time_after(time_b, time_a))) timestamp = time_b; /* Warn of ring error only as long as we get frames at all */ if (time_is_after_jiffies(timestamp + msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) { rcu_read_lock(); port = get_late_port(hsr, node); if (port) hsr_nl_ringerror(hsr, node->macaddress_A, port); rcu_read_unlock(); } /* Prune old entries */ if (time_is_before_jiffies(timestamp + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { hsr_nl_nodedown(hsr, node->macaddress_A); if (!node->removed) { list_del_rcu(&node->mac_list); node->removed = true; /* Note that we need to free this entry later: */ kfree_rcu(node, rcu_head); } } } spin_unlock_bh(&hsr->list_lock); /* Restart timer */ mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); } void hsr_prune_proxy_nodes(struct timer_list *t) { struct hsr_priv *hsr = timer_container_of(hsr, t, prune_proxy_timer); unsigned long timestamp; struct hsr_node *node; struct hsr_node *tmp; spin_lock_bh(&hsr->list_lock); list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) { /* Don't prune RedBox node. */ if (hsr_addr_is_redbox(hsr, node->macaddress_A)) continue; timestamp = node->time_in[HSR_PT_INTERLINK]; /* Prune old entries */ if (time_is_before_jiffies(timestamp + msecs_to_jiffies(HSR_PROXY_NODE_FORGET_TIME))) { hsr_nl_nodedown(hsr, node->macaddress_A); if (!node->removed) { list_del_rcu(&node->mac_list); node->removed = true; /* Note that we need to free this entry later: */ kfree_rcu(node, rcu_head); } } } spin_unlock_bh(&hsr->list_lock); /* Restart timer */ mod_timer(&hsr->prune_proxy_timer, jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD)); } void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) { struct hsr_node *node; if (!_pos) { node = list_first_or_null_rcu(&hsr->node_db, struct hsr_node, mac_list); if (node) ether_addr_copy(addr, node->macaddress_A); return node; } node = _pos; list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) { ether_addr_copy(addr, node->macaddress_A); return node; } return NULL; } int hsr_get_node_data(struct hsr_priv *hsr, const unsigned char *addr, unsigned char addr_b[ETH_ALEN], unsigned int *addr_b_ifindex, int *if1_age, u16 *if1_seq, int *if2_age, u16 *if2_seq) { struct hsr_node *node; struct hsr_port *port; unsigned long tdiff; node = find_node_by_addr_A(&hsr->node_db, addr); if (!node) return -ENOENT; ether_addr_copy(addr_b, node->macaddress_B); tdiff = jiffies - node->time_in[HSR_PT_SLAVE_A]; if (node->time_in_stale[HSR_PT_SLAVE_A]) *if1_age = INT_MAX; #if HZ <= MSEC_PER_SEC else if (tdiff > msecs_to_jiffies(INT_MAX)) *if1_age = INT_MAX; #endif else *if1_age = jiffies_to_msecs(tdiff); tdiff = jiffies - node->time_in[HSR_PT_SLAVE_B]; if (node->time_in_stale[HSR_PT_SLAVE_B]) *if2_age = INT_MAX; #if HZ <= MSEC_PER_SEC else if (tdiff > msecs_to_jiffies(INT_MAX)) *if2_age = INT_MAX; #endif else *if2_age = jiffies_to_msecs(tdiff); /* Present sequence numbers as if they were incoming on interface */ *if1_seq = node->seq_out[HSR_PT_SLAVE_B]; *if2_seq = node->seq_out[HSR_PT_SLAVE_A]; if (node->addr_B_port != HSR_PT_NONE) { port = hsr_port_get_hsr(hsr, node->addr_B_port); *addr_b_ifindex = port->dev->ifindex; } else { *addr_b_ifindex = -1; } return 0; }
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/device.h> #include <linux/of_mdio.h> #include <linux/phy.h> #include <linux/stddef.h> struct mdiobus_devres { struct mii_bus *mii; }; static void devm_mdiobus_free(struct device *dev, void *this) { struct mdiobus_devres *dr = this; mdiobus_free(dr->mii); } /** * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size() * @dev: Device to allocate mii_bus for * @sizeof_priv: Space to allocate for private structure * * Managed mdiobus_alloc_size. mii_bus allocated with this function is * automatically freed on driver detach. * * RETURNS: * Pointer to allocated mii_bus on success, NULL on out-of-memory error. */ struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv) { struct mdiobus_devres *dr; dr = devres_alloc(devm_mdiobus_free, sizeof(*dr), GFP_KERNEL); if (!dr) return NULL; dr->mii = mdiobus_alloc_size(sizeof_priv); if (!dr->mii) { devres_free(dr); return NULL; } devres_add(dev, dr); return dr->mii; } EXPORT_SYMBOL(devm_mdiobus_alloc_size); static void devm_mdiobus_unregister(struct device *dev, void *this) { struct mdiobus_devres *dr = this; mdiobus_unregister(dr->mii); } static int mdiobus_devres_match(struct device *dev, void *this, void *match_data) { struct mdiobus_devres *res = this; struct mii_bus *mii = match_data; return mii == res->mii; } /** * __devm_mdiobus_register - Resource-managed variant of mdiobus_register() * @dev: Device to register mii_bus for * @bus: MII bus structure to register * @owner: Owning module * * Returns 0 on success, negative error number on failure. */ int __devm_mdiobus_register(struct device *dev, struct mii_bus *bus, struct module *owner) { struct mdiobus_devres *dr; int ret; if (WARN_ON(!devres_find(dev, devm_mdiobus_free, mdiobus_devres_match, bus))) return -EINVAL; dr = devres_alloc(devm_mdiobus_unregister, sizeof(*dr), GFP_KERNEL); if (!dr) return -ENOMEM; ret = __mdiobus_register(bus, owner); if (ret) { devres_free(dr); return ret; } dr->mii = bus; devres_add(dev, dr); return 0; } EXPORT_SYMBOL(__devm_mdiobus_register); #if IS_ENABLED(CONFIG_OF_MDIO) /** * __devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register() * @dev: Device to register mii_bus for * @mdio: MII bus structure to register * @np: Device node to parse * @owner: Owning module */ int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, struct device_node *np, struct module *owner) { struct mdiobus_devres *dr; int ret; if (WARN_ON(!devres_find(dev, devm_mdiobus_free, mdiobus_devres_match, mdio))) return -EINVAL; dr = devres_alloc(devm_mdiobus_unregister, sizeof(*dr), GFP_KERNEL); if (!dr) return -ENOMEM; ret = __of_mdiobus_register(mdio, np, owner); if (ret) { devres_free(dr); return ret; } dr->mii = mdio; devres_add(dev, dr); return 0; } EXPORT_SYMBOL(__devm_of_mdiobus_register); #endif /* CONFIG_OF_MDIO */ MODULE_DESCRIPTION("Network MDIO bus devres helpers"); MODULE_LICENSE("GPL");
919 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Global definitions for the Ethernet IEEE 802.3 interface. * * Version: @(#)if_ether.h 1.0.1a 02/08/94 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk> */ #ifndef _LINUX_IF_ETHER_H #define _LINUX_IF_ETHER_H #include <linux/skbuff.h> #include <uapi/linux/if_ether.h> /* XX:XX:XX:XX:XX:XX */ #define MAC_ADDR_STR_LEN (3 * ETH_ALEN - 1) static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_mac_header(skb); } /* Prefer this version in TX path, instead of * skb_reset_mac_header() + eth_hdr() */ static inline struct ethhdr *skb_eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb->data; } static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_inner_mac_header(skb); } int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); #endif /* _LINUX_IF_ETHER_H */
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 /* SPDX-License-Identifier: GPL-2.0 */ /* * thermal_core.h * * Copyright (C) 2012 Intel Corp * Author: Durgadoss R <durgadoss.r@intel.com> */ #ifndef __THERMAL_CORE_H__ #define __THERMAL_CORE_H__ #include <linux/cleanup.h> #include <linux/device.h> #include <linux/thermal.h> #include "thermal_netlink.h" #include "thermal_thresholds.h" #include "thermal_debugfs.h" struct thermal_attr { struct device_attribute attr; char name[THERMAL_NAME_LENGTH]; }; struct thermal_trip_attrs { struct thermal_attr type; struct thermal_attr temp; struct thermal_attr hyst; }; struct thermal_trip_desc { struct thermal_trip trip; struct thermal_trip_attrs trip_attrs; struct list_head list_node; struct list_head thermal_instances; int threshold; }; /** * struct thermal_governor - structure that holds thermal governor information * @name: name of the governor * @bind_to_tz: callback called when binding to a thermal zone. If it * returns 0, the governor is bound to the thermal zone, * otherwise it fails. * @unbind_from_tz: callback called when a governor is unbound from a * thermal zone. * @trip_crossed: called for trip points that have just been crossed * @manage: called on thermal zone temperature updates * @update_tz: callback called when thermal zone internals have changed, e.g. * thermal cooling instance was added/removed * @governor_list: node in thermal_governor_list (in thermal_core.c) */ struct thermal_governor { const char *name; int (*bind_to_tz)(struct thermal_zone_device *tz); void (*unbind_from_tz)(struct thermal_zone_device *tz); void (*trip_crossed)(struct thermal_zone_device *tz, const struct thermal_trip *trip, bool upward); void (*manage)(struct thermal_zone_device *tz); void (*update_tz)(struct thermal_zone_device *tz, enum thermal_notify_event reason); struct list_head governor_list; }; #define TZ_STATE_FLAG_SUSPENDED BIT(0) #define TZ_STATE_FLAG_RESUMING BIT(1) #define TZ_STATE_FLAG_INIT BIT(2) #define TZ_STATE_FLAG_EXIT BIT(3) #define TZ_STATE_READY 0 /** * struct thermal_zone_device - structure for a thermal zone * @id: unique id number for each thermal zone * @type: the thermal zone device type * @device: &struct device for this thermal zone * @removal: removal completion * @resume: resume completion * @trips_high: trips above the current zone temperature * @trips_reached: trips below or at the current zone temperature * @trips_invalid: trips with invalid temperature * @mode: current mode of this thermal zone * @devdata: private pointer for device private data * @num_trips: number of trip points the thermal zone supports * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. * @polling_delay_jiffies: number of jiffies to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) * @recheck_delay_jiffies: delay after a failed attempt to determine the zone * temperature before trying again * @temperature: current temperature. This is only for core code, * drivers should use thermal_zone_get_temp() to get the * current temperature * @last_temperature: previous temperature read * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION * @passive: 1 if you've crossed a passive trip point, 0 otherwise. * @prev_low_trip: the low current temperature if you've crossed a passive trip point. * @prev_high_trip: the above current temperature if you've crossed a passive trip point. * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone * @governor_data: private pointer for governor data * @ida: &struct ida to generate unique id for this zone's cooling * devices * @lock: lock to protect thermal_instances list * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling * @notify_event: Last notification event * @state: current state of the thermal zone * @trips: array of struct thermal_trip objects */ struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; struct device device; struct completion removal; struct completion resume; struct attribute_group trips_attribute_group; struct list_head trips_high; struct list_head trips_reached; struct list_head trips_invalid; enum thermal_device_mode mode; void *devdata; int num_trips; unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; unsigned long recheck_delay_jiffies; int temperature; int last_temperature; int emul_temperature; int passive; int prev_low_trip; int prev_high_trip; struct thermal_zone_device_ops ops; struct thermal_zone_params *tzp; struct thermal_governor *governor; void *governor_data; struct ida ida; struct mutex lock; struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; u8 state; #ifdef CONFIG_THERMAL_DEBUGFS struct thermal_debugfs *debugfs; #endif struct list_head user_thresholds; struct thermal_trip_desc trips[] __counted_by(num_trips); }; DEFINE_GUARD(thermal_zone, struct thermal_zone_device *, mutex_lock(&_T->lock), mutex_unlock(&_T->lock)) DEFINE_GUARD(thermal_zone_reverse, struct thermal_zone_device *, mutex_unlock(&_T->lock), mutex_lock(&_T->lock)) /* Initial thermal zone temperature. */ #define THERMAL_TEMP_INIT INT_MIN /* * Default and maximum delay after a failed thermal zone temperature check * before attempting to check it again (in jiffies). */ #define THERMAL_RECHECK_DELAY msecs_to_jiffies(250) #define THERMAL_MAX_RECHECK_DELAY (120 * HZ) /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) #define DEFAULT_THERMAL_GOVERNOR "step_wise" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE) #define DEFAULT_THERMAL_GOVERNOR "fair_share" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) #define DEFAULT_THERMAL_GOVERNOR "user_space" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) #define DEFAULT_THERMAL_GOVERNOR "power_allocator" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_BANG_BANG) #define DEFAULT_THERMAL_GOVERNOR "bang_bang" #endif /* Initial state of a cooling device during binding */ #define THERMAL_NO_TARGET -1UL /* Init section thermal table */ extern struct thermal_governor *__governor_thermal_table[]; extern struct thermal_governor *__governor_thermal_table_end[]; #define THERMAL_TABLE_ENTRY(table, name) \ static typeof(name) *__thermal_table_entry_##name \ __used __section("__" #table "_thermal_table") = &name #define THERMAL_GOVERNOR_DECLARE(name) THERMAL_TABLE_ENTRY(governor, name) #define for_each_governor_table(__governor) \ for (__governor = __governor_thermal_table; \ __governor < __governor_thermal_table_end; \ __governor++) int for_each_thermal_zone(int (*cb)(struct thermal_zone_device *, void *), void *); int for_each_thermal_cooling_device(int (*cb)(struct thermal_cooling_device *, void *), void *); int for_each_thermal_governor(int (*cb)(struct thermal_governor *, void *), void *thermal_governor); struct thermal_zone_device *thermal_zone_get_by_id(int id); DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *, if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id) static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) { return cdev->ops->get_requested_power && cdev->ops->state2power && cdev->ops->power2state; } void thermal_cdev_update(struct thermal_cooling_device *); void thermal_cdev_update_nocheck(struct thermal_cooling_device *cdev); void __thermal_cdev_update(struct thermal_cooling_device *cdev); int get_tz_trend(struct thermal_zone_device *tz, const struct thermal_trip *trip); /* * This structure is used to describe the behavior of * a certain cooling device on a certain trip point * in a certain thermal zone */ struct thermal_instance { int id; char name[THERMAL_NAME_LENGTH]; struct thermal_cooling_device *cdev; const struct thermal_trip *trip; bool initialized; unsigned long upper; /* Highest cooling state for this trip point */ unsigned long lower; /* Lowest cooling state for this trip point */ unsigned long target; /* expected cooling state */ char attr_name[THERMAL_NAME_LENGTH]; struct device_attribute attr; char weight_attr_name[THERMAL_NAME_LENGTH]; struct device_attribute weight_attr; struct list_head trip_node; /* node in trip->thermal_instances */ struct list_head cdev_node; /* node in cdev->thermal_instances */ unsigned int weight; /* The weight of the cooling device */ bool upper_no_limit; }; #define to_thermal_zone(_dev) \ container_of(_dev, struct thermal_zone_device, device) #define to_cooling_device(_dev) \ container_of(_dev, struct thermal_cooling_device, device) int thermal_register_governor(struct thermal_governor *); void thermal_unregister_governor(struct thermal_governor *); int thermal_zone_device_set_policy(struct thermal_zone_device *, char *); int thermal_build_list_of_policies(char *buf); void __thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event); void thermal_zone_device_critical_reboot(struct thermal_zone_device *tz); void thermal_zone_device_critical_shutdown(struct thermal_zone_device *tz); void thermal_governor_update_tz(struct thermal_zone_device *tz, enum thermal_notify_event reason); /* Helpers */ #define for_each_trip_desc(__tz, __td) \ for (__td = __tz->trips; __td - __tz->trips < __tz->num_trips; __td++) #define trip_to_trip_desc(__trip) \ container_of(__trip, struct thermal_trip_desc, trip) const char *thermal_trip_type_name(enum thermal_trip_type trip_type); void thermal_zone_set_trips(struct thermal_zone_device *tz, int low, int high); int thermal_zone_trip_id(const struct thermal_zone_device *tz, const struct thermal_trip *trip); int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); void thermal_zone_set_trip_hyst(struct thermal_zone_device *tz, struct thermal_trip *trip, int hyst); /* sysfs I/F */ int thermal_zone_create_device_groups(struct thermal_zone_device *tz); void thermal_zone_destroy_device_groups(struct thermal_zone_device *); void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *); void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev); void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev); /* used only at binding time */ ssize_t trip_point_show(struct device *, struct device_attribute *, char *); ssize_t weight_show(struct device *, struct device_attribute *, char *); ssize_t weight_store(struct device *, struct device_attribute *, const char *, size_t); #ifdef CONFIG_THERMAL_STATISTICS void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, unsigned long new_state); #else static inline void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, unsigned long new_state) {} #endif /* CONFIG_THERMAL_STATISTICS */ #endif /* __THERMAL_CORE_H__ */
5210 5196 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM workqueue #if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_WORKQUEUE_H #include <linux/tracepoint.h> #include <linux/workqueue.h> struct pool_workqueue; /** * workqueue_queue_work - called when a work gets queued * @req_cpu: the requested cpu * @pwq: pointer to struct pool_workqueue * @work: pointer to struct work_struct * * This event occurs when a work is queued immediately or once a * delayed work is actually queued on a workqueue (ie: once the delay * has been reached). */ TRACE_EVENT(workqueue_queue_work, TP_PROTO(int req_cpu, struct pool_workqueue *pwq, struct work_struct *work), TP_ARGS(req_cpu, pwq, work), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) __string( workqueue, pwq->wq->name) __field( int, req_cpu ) __field( int, cpu ) ), TP_fast_assign( __entry->work = work; __entry->function = work->func; __assign_str(workqueue); __entry->req_cpu = req_cpu; __entry->cpu = pwq->pool->cpu; ), TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%d cpu=%d", __entry->work, __entry->function, __get_str(workqueue), __entry->req_cpu, __entry->cpu) ); /** * workqueue_activate_work - called when a work gets activated * @work: pointer to struct work_struct * * This event occurs when a queued work is put on the active queue, * which happens immediately after queueing unless @max_active limit * is reached. */ TRACE_EVENT(workqueue_activate_work, TP_PROTO(struct work_struct *work), TP_ARGS(work), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) ), TP_fast_assign( __entry->work = work; __entry->function = work->func; ), TP_printk("work struct %p function=%ps ", __entry->work, __entry->function) ); /** * workqueue_execute_start - called immediately before the workqueue callback * @work: pointer to struct work_struct * * Allows to track workqueue execution. */ TRACE_EVENT(workqueue_execute_start, TP_PROTO(struct work_struct *work), TP_ARGS(work), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) ), TP_fast_assign( __entry->work = work; __entry->function = work->func; ), TP_printk("work struct %p: function %ps", __entry->work, __entry->function) ); /** * workqueue_execute_end - called immediately after the workqueue callback * @work: pointer to struct work_struct * @function: pointer to worker function * * Allows to track workqueue execution. */ TRACE_EVENT(workqueue_execute_end, TP_PROTO(struct work_struct *work, work_func_t function), TP_ARGS(work, function), TP_STRUCT__entry( __field( void *, work ) __field( void *, function) ), TP_fast_assign( __entry->work = work; __entry->function = function; ), TP_printk("work struct %p: function %ps", __entry->work, __entry->function) ); #endif /* _TRACE_WORKQUEUE_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
163 133 133 132 133 131 61 61 39 13 43 5 5 1 3 3 134 123 13 112 47 47 125 51 1 13 1 54 55 3 2 54 36 22 8 25 14 17 27 26 17 17 38 22 2 38 6 1 10 3 4 37 34 3 14 12 3 12 3 12 3 29 38 38 2 14 1 10 10 2 4 3 4 2 5 4 2 1 39 39 1 3 5 3 43 3 6 38 4 1 1 4 3 4 11 1 10 11 11 10 18 16 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This abstraction carries sctp events to the ULP (sockets). * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Jon Grimm <jgrimm@us.ibm.com> * La Monte H.P. Yarroll <piggy@acm.org> * Sridhar Samudrala <sri@us.ibm.com> */ #include <linux/slab.h> #include <linux/types.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/busy_poll.h> #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *); static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *, struct sctp_ulpevent *); static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); /* 1st Level Abstractions */ /* Initialize a ULP queue from a block of memory. */ void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc) { memset(ulpq, 0, sizeof(struct sctp_ulpq)); ulpq->asoc = asoc; skb_queue_head_init(&ulpq->reasm); skb_queue_head_init(&ulpq->reasm_uo); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; } /* Flush the reassembly and ordering queues. */ void sctp_ulpq_flush(struct sctp_ulpq *ulpq) { struct sk_buff *skb; struct sctp_ulpevent *event; while ((skb = __skb_dequeue(&ulpq->lobby)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } while ((skb = __skb_dequeue(&ulpq->reasm)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } } /* Dispose of a ulpqueue. */ void sctp_ulpq_free(struct sctp_ulpq *ulpq) { sctp_ulpq_flush(ulpq); } /* Process an incoming DATA chunk. */ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sk_buff_head temp; struct sctp_ulpevent *event; int event_eor = 0; /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); if (!event) return -ENOMEM; event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; /* Do reassembly if needed. */ event = sctp_ulpq_reasm(ulpq, event); /* Do ordering if needed. */ if (event) { /* Create a temporary list to collect chunks on. */ skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); if (event->msg_flags & MSG_EOR) event = sctp_ulpq_order(ulpq, event); } /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ if (event) { event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; sctp_ulpq_tail_event(ulpq, &temp); } return event_eor; } /* Add a new event for propagation to the ULP. */ /* Clear the partial delivery mode for this socket. Note: This * assumes that no association is currently in partial delivery mode. */ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) { struct sctp_sock *sp = sctp_sk(sk); if (atomic_dec_and_test(&sp->pd_mode)) { /* This means there are no other associations in PD, so * we can go ahead and clear out the lobby in one shot */ if (!skb_queue_empty(&sp->pd_lobby)) { skb_queue_splice_tail_init(&sp->pd_lobby, &sk->sk_receive_queue); return 1; } } else { /* There are other associations in PD, so we only need to * pull stuff out of the lobby that belongs to the * associations that is exiting PD (all of its notifications * are posted here). */ if (!skb_queue_empty(&sp->pd_lobby) && asoc) { struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; sctp_skb_for_each(skb, &sp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == asoc) { __skb_unlink(skb, &sp->pd_lobby); __skb_queue_tail(&sk->sk_receive_queue, skb); } } } } return 0; } /* Set the pd_mode on the socket and ulpq */ static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq) { struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk); atomic_inc(&sp->pd_mode); ulpq->pd_mode = 1; } /* Clear the pd_mode and restart any pending messages waiting for delivery. */ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) { ulpq->pd_mode = 0; sctp_ulpq_reasm_drain(ulpq); return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc); } int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list) { struct sock *sk = ulpq->asoc->base.sk; struct sctp_sock *sp = sctp_sk(sk); struct sctp_ulpevent *event; struct sk_buff_head *queue; struct sk_buff *skb; int clear_pd = 0; skb = __skb_peek(skb_list); event = sctp_skb2event(skb); /* If the socket is just going to throw this away, do not * even try to deliver it. */ if (sk->sk_shutdown & RCV_SHUTDOWN && (sk->sk_shutdown & SEND_SHUTDOWN || !sctp_ulpevent_is_notification(event))) goto out_free; if (!sctp_ulpevent_is_notification(event)) { sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); } /* Check if the user wishes to receive this event. */ if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe)) goto out_free; /* If we are in partial delivery mode, post to the lobby until * partial delivery is cleared, unless, of course _this_ is * the association the cause of the partial delivery. */ if (atomic_read(&sp->pd_mode) == 0) { queue = &sk->sk_receive_queue; } else { if (ulpq->pd_mode) { /* If the association is in partial delivery, we * need to finish delivering the partially processed * packet before passing any other data. This is * because we don't truly support stream interleaving. */ if ((event->msg_flags & MSG_NOTIFICATION) || (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK))) queue = &sp->pd_lobby; else { clear_pd = event->msg_flags & MSG_EOR; queue = &sk->sk_receive_queue; } } else { /* * If fragment interleave is enabled, we * can queue this to the receive queue instead * of the lobby. */ if (sp->frag_interleave) queue = &sk->sk_receive_queue; else queue = &sp->pd_lobby; } } skb_queue_splice_tail_init(skb_list, queue); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive * queue. */ if (clear_pd) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { if (!sock_owned_by_user(sk)) sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } return 1; out_free: sctp_queue_purge_ulpevents(skb_list); return 0; } /* 2nd Level Abstractions */ /* Helper function to store chunks that need to be reassembled. */ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff *pos; struct sctp_ulpevent *cevent; __u32 tsn, ctsn; tsn = event->tsn; /* See if it belongs at the end. */ pos = skb_peek_tail(&ulpq->reasm); if (!pos) { __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); return; } /* Short circuit just dropping it at the end. */ cevent = sctp_skb2event(pos); ctsn = cevent->tsn; if (TSN_lt(ctsn, tsn)) { __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); return; } /* Find the right place in this list. We store them by TSN. */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; if (TSN_lt(tsn, ctsn)) break; } /* Insert before pos. */ __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); } /* Helper function to return an event corresponding to the reassembled * datagram. * This routine creates a re-assembled skb given the first and last skb's * as stored in the reassembly queue. The skb's may be non-linear if the sctp * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; struct sctp_ulpevent *event; struct sk_buff *pnext, *last; struct sk_buff *list = skb_shinfo(f_frag)->frag_list; /* Store the pointer to the 2nd skb */ if (f_frag == l_frag) pos = NULL; else pos = f_frag->next; /* Get the last skb in the f_frag's frag_list if present. */ for (last = list; list; last = list, list = list->next) ; /* Add the list of remaining fragments to the first fragments * frag_list. */ if (last) last->next = pos; else { if (skb_cloned(f_frag)) { /* This is a cloned skb, we can't just modify * the frag_list. We need a new skb to do that. * Instead of calling skb_unshare(), we'll do it * ourselves since we need to delay the free. */ new = skb_copy(f_frag, GFP_ATOMIC); if (!new) return NULL; /* try again later */ sctp_skb_set_owner_r(new, f_frag->sk); skb_shinfo(new)->frag_list = pos; } else skb_shinfo(f_frag)->frag_list = pos; } /* Remove the first fragment from the reassembly queue. */ __skb_unlink(f_frag, queue); /* if we did unshare, then free the old skb and re-assign */ if (new) { kfree_skb(f_frag); f_frag = new; } while (pos) { pnext = pos->next; /* Update the len and data_len fields of the first fragment. */ f_frag->len += pos->len; f_frag->data_len += pos->len; /* Remove the fragment from the reassembly queue. */ __skb_unlink(pos, queue); /* Break if we have reached the last fragment. */ if (pos == l_frag) break; pos->next = pnext; pos = pnext; } event = sctp_skb2event(f_frag); SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS); return event; } /* Helper function to check if an incoming chunk has filled up the last * missing fragment in a SCTP datagram and return the corresponding event. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ulpq) { struct sk_buff *pos; struct sctp_ulpevent *cevent; struct sk_buff *first_frag = NULL; __u32 ctsn, next_tsn; struct sctp_ulpevent *retval = NULL; struct sk_buff *pd_first = NULL; struct sk_buff *pd_last = NULL; size_t pd_len = 0; struct sctp_association *asoc; u32 pd_point; /* Initialized to 0 just to avoid compiler warning message. Will * never be used with this value. It is referenced only after it * is set when we find the first fragment of a message. */ next_tsn = 0; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for a sequence of * fragmented chunks that complete a datagram. * 'first_frag' and next_tsn are reset when we find a chunk which * is the first fragment of a datagram. Once these 2 fields are set * we expect to find the remaining middle fragments and the last * fragment in order. If not, first_frag is reset to NULL and we * start the next pass when we find another first fragment. * * There is a potential to do partial delivery if user sets * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here * to see if can do PD. */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: /* If this "FIRST_FRAG" is the first * element in the queue, then count it towards * possible PD. */ if (skb_queue_is_first(&ulpq->reasm, pos)) { pd_first = pos; pd_last = pos; pd_len = pos->len; } else { pd_first = NULL; pd_last = NULL; pd_len = 0; } first_frag = pos; next_tsn = ctsn + 1; break; case SCTP_DATA_MIDDLE_FRAG: if ((first_frag) && (ctsn == next_tsn)) { next_tsn++; if (pd_first) { pd_last = pos; pd_len += pos->len; } } else first_frag = NULL; break; case SCTP_DATA_LAST_FRAG: if (first_frag && (ctsn == next_tsn)) goto found; else first_frag = NULL; break; } } asoc = ulpq->asoc; if (pd_first) { /* Make sure we can enter partial deliver. * We can trigger partial delivery only if framgent * interleave is set, or the socket is not already * in partial delivery. */ if (!sctp_sk(asoc->base.sk)->frag_interleave && atomic_read(&sctp_sk(asoc->base.sk)->pd_mode)) goto done; cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm, pd_first, pd_last); if (retval) sctp_ulpq_set_pd(ulpq); } } done: return retval; found: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; } /* Retrieve the next set of fragments of a partial message. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; int is_last; struct sctp_ulpevent *retval; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for the first * sequence of fragmented chunks. */ if (skb_queue_empty(&ulpq->reasm)) return NULL; last_frag = first_frag = NULL; retval = NULL; next_tsn = 0; is_last = 0; skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: if (!first_frag) return NULL; goto done; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; } else if (next_tsn == ctsn) { next_tsn++; last_frag = pos; } else goto done; break; case SCTP_DATA_LAST_FRAG: if (!first_frag) first_frag = pos; else if (ctsn != next_tsn) goto done; last_frag = pos; is_last = 1; goto done; default: return NULL; } } /* We have the reassembled event. There is no need to look * further. */ done: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; return retval; } /* Helper function to reassemble chunks. Hold chunks on the reasm queue that * need reassembling. */ static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sctp_ulpevent *retval = NULL; /* Check if this is part of a fragmented message. */ if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) { event->msg_flags |= MSG_EOR; return event; } sctp_ulpq_store_reasm(ulpq, event); if (!ulpq->pd_mode) retval = sctp_ulpq_retrieve_reassembled(ulpq); else { __u32 ctsn, ctsnap; /* Do not even bother unless this is the next tsn to * be delivered. */ ctsn = event->tsn; ctsnap = sctp_tsnmap_get_ctsn(&ulpq->asoc->peer.tsn_map); if (TSN_lte(ctsn, ctsnap)) retval = sctp_ulpq_retrieve_partial(ulpq); } return retval; } /* Retrieve the first part (sequential fragments) for partial delivery. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; struct sctp_ulpevent *retval; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for a sequence of * fragmented chunks that start a datagram. */ if (skb_queue_empty(&ulpq->reasm)) return NULL; last_frag = first_frag = NULL; retval = NULL; next_tsn = 0; skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; } else goto done; break; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) return NULL; if (ctsn == next_tsn) { next_tsn++; last_frag = pos; } else goto done; break; case SCTP_DATA_LAST_FRAG: if (!first_frag) return NULL; else goto done; break; default: return NULL; } } /* We have the reassembled event. There is no need to look * further. */ done: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, last_frag); return retval; } /* * Flush out stale fragments from the reassembly queue when processing * a Forward TSN. * * RFC 3758, Section 3.6 * * After receiving and processing a FORWARD TSN, the data receiver MUST * take cautions in updating its re-assembly queue. The receiver MUST * remove any partially reassembled message, which is still missing one * or more TSNs earlier than or equal to the new cumulative TSN point. * In the event that the receiver has invoked the partial delivery API, * a notification SHOULD also be generated to inform the upper layer API * that the message being partially delivered will NOT be completed. */ void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 fwd_tsn) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *event; __u32 tsn; if (skb_queue_empty(&ulpq->reasm)) return; skb_queue_walk_safe(&ulpq->reasm, pos, tmp) { event = sctp_skb2event(pos); tsn = event->tsn; /* Since the entire message must be abandoned by the * sender (item A3 in Section 3.5, RFC 3758), we can * free all fragments on the list that are less then * or equal to ctsn_point */ if (TSN_lte(tsn, fwd_tsn)) { __skb_unlink(pos, &ulpq->reasm); sctp_ulpevent_free(event); } else break; } } /* * Drain the reassembly queue. If we just cleared parted delivery, it * is possible that the reassembly queue will contain already reassembled * messages. Retrieve any such messages and give them to the user. */ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq) { struct sctp_ulpevent *event = NULL; if (skb_queue_empty(&ulpq->reasm)) return; while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) { struct sk_buff_head temp; skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); /* Do ordering if needed. */ if (event->msg_flags & MSG_EOR) event = sctp_ulpq_order(ulpq, event); /* Send event to the ULP. 'event' is the * sctp_ulpevent for very first SKB on the temp' list. */ if (event) sctp_ulpq_tail_event(ulpq, &temp); } } /* Helper function to gather skbs that have possibly become * ordered by an incoming chunk. */ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff_head *event_list; struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *stream; __u16 sid, csid, cssn; sid = event->stream; stream = &ulpq->asoc->stream; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; /* We are holding the chunks by stream, by SSN. */ sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; /* Have we gone too far? */ if (csid > sid) break; /* Have we not gone far enough? */ if (csid < sid) continue; if (cssn != sctp_ssn_peek(stream, in, sid)) break; /* Found it, so mark in the stream. */ sctp_ssn_next(stream, in, sid); __skb_unlink(pos, &ulpq->lobby); /* Attach all gathered skbs to the event. */ __skb_queue_tail(event_list, pos); } } /* Helper function to store chunks needing ordering. */ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff *pos; struct sctp_ulpevent *cevent; __u16 sid, csid; __u16 ssn, cssn; pos = skb_peek_tail(&ulpq->lobby); if (!pos) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } sid = event->stream; ssn = event->ssn; cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (sid > csid) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } if ((sid == csid) && SSN_lt(cssn, ssn)) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } /* Find the right place in this list. We store them by * stream ID and then by SSN. */ skb_queue_walk(&ulpq->lobby, pos) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (csid > sid) break; if (csid == sid && SSN_lt(ssn, cssn)) break; } /* Insert before pos. */ __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { __u16 sid, ssn; struct sctp_stream *stream; /* Check if this message needs ordering. */ if (event->msg_flags & SCTP_DATA_UNORDERED) return event; /* Note: The stream ID must be verified before this routine. */ sid = event->stream; ssn = event->ssn; stream = &ulpq->asoc->stream; /* Is this the expected SSN for this stream ID? */ if (ssn != sctp_ssn_peek(stream, in, sid)) { /* We've received something out of order, so find where it * needs to be placed. We order by stream and then by SSN. */ sctp_ulpq_store_ordered(ulpq, event); return NULL; } /* Mark that the next chunk has been found. */ sctp_ssn_next(stream, in, sid); /* Go find any other chunks that were waiting for * ordering. */ sctp_ulpq_retrieve_ordered(ulpq, event); return event; } /* Helper function to gather skbs that have possibly become * ordered by forward tsn skipping their dependencies. */ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_ulpevent *event; struct sctp_stream *stream; struct sk_buff_head temp; struct sk_buff_head *lobby = &ulpq->lobby; __u16 csid, cssn; stream = &ulpq->asoc->stream; /* We are holding the chunks by stream, by SSN. */ skb_queue_head_init(&temp); event = NULL; sctp_skb_for_each(pos, lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; /* Have we gone too far? */ if (csid > sid) break; /* Have we not gone far enough? */ if (csid < sid) continue; /* see if this ssn has been marked by skipping */ if (!SSN_lt(cssn, sctp_ssn_peek(stream, in, csid))) break; __skb_unlink(pos, lobby); if (!event) /* Create a temporary list to collect chunks on. */ event = sctp_skb2event(pos); /* Attach all gathered skbs to the event. */ __skb_queue_tail(&temp, pos); } /* If we didn't reap any data, see if the next expected SSN * is next on the queue and if so, use that. */ if (event == NULL && pos != (struct sk_buff *)lobby) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (csid == sid && cssn == sctp_ssn_peek(stream, in, csid)) { sctp_ssn_next(stream, in, csid); __skb_unlink(pos, lobby); __skb_queue_tail(&temp, pos); event = sctp_skb2event(pos); } } /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ if (event) { /* see if we have more ordered that we can deliver */ sctp_ulpq_retrieve_ordered(ulpq, event); sctp_ulpq_tail_event(ulpq, &temp); } } /* Skip over an SSN. This is used during the processing of * Forwared TSN chunk to skip over the abandoned ordered data */ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) { struct sctp_stream *stream; /* Note: The stream ID must be verified before this routine. */ stream = &ulpq->asoc->stream; /* Is this an old SSN? If so ignore. */ if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid))) return; /* Mark that we are no longer expecting this SSN or lower. */ sctp_ssn_skip(stream, in, sid, ssn); /* Go find any other chunks that were waiting for * ordering and deliver them if needed. */ sctp_ulpq_reap_ordered(ulpq, sid); } __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list, __u16 needed) { __u16 freed = 0; __u32 tsn, last_tsn; struct sk_buff *skb, *flist, *last; struct sctp_ulpevent *event; struct sctp_tsnmap *tsnmap; tsnmap = &ulpq->asoc->peer.tsn_map; while ((skb = skb_peek_tail(list)) != NULL) { event = sctp_skb2event(skb); tsn = event->tsn; /* Don't renege below the Cumulative TSN ACK Point. */ if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) break; /* Events in ordering queue may have multiple fragments * corresponding to additional TSNs. Sum the total * freed space; find the last TSN. */ freed += skb_headlen(skb); flist = skb_shinfo(skb)->frag_list; for (last = flist; flist; flist = flist->next) { last = flist; freed += skb_headlen(last); } if (last) last_tsn = sctp_skb2event(last)->tsn; else last_tsn = tsn; /* Unlink the event, then renege all applicable TSNs. */ __skb_unlink(skb, list); sctp_ulpevent_free(event); while (TSN_lte(tsn, last_tsn)) { sctp_tsnmap_renege(tsnmap, tsn); tsn++; } if (freed >= needed) return freed; } return freed; } /* Renege 'needed' bytes from the ordering queue. */ static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed) { return sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed); } /* Renege 'needed' bytes from the reassembly queue. */ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) { return sctp_ulpq_renege_list(ulpq, &ulpq->reasm, needed); } /* Partial deliver the first message as there is pressure on rwnd. */ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_association *asoc; struct sctp_sock *sp; __u32 ctsn; struct sk_buff *skb; asoc = ulpq->asoc; sp = sctp_sk(asoc->base.sk); /* If the association is already in Partial Delivery mode * we have nothing to do. */ if (ulpq->pd_mode) return; /* Data must be at or below the Cumulative TSN ACK Point to * start partial delivery. */ skb = skb_peek(&asoc->ulpq.reasm); if (skb != NULL) { ctsn = sctp_skb2event(skb)->tsn; if (!TSN_lte(ctsn, sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map))) return; } /* If the user enabled fragment interleave socket option, * multiple associations can enter partial delivery. * Otherwise, we can only enter partial delivery if the * socket is not in partial deliver mode. */ if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) { /* Is partial delivery possible? */ event = sctp_ulpq_retrieve_first(ulpq); /* Send event to the ULP. */ if (event) { struct sk_buff_head temp; skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); sctp_ulpq_tail_event(ulpq, &temp); sctp_ulpq_set_pd(ulpq); return; } } } /* Renege some packets to make room for an incoming chunk. */ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sctp_association *asoc = ulpq->asoc; __u32 freed = 0; __u16 needed; needed = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_data_chunk); if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { freed = sctp_ulpq_renege_order(ulpq, needed); if (freed < needed) freed += sctp_ulpq_renege_frags(ulpq, needed - freed); } /* If able to free enough room, accept this chunk. */ if (sk_rmem_schedule(asoc->base.sk, chunk->skb, needed) && freed >= needed) { int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); /* * Enter partial delivery if chunk has not been * delivered; otherwise, drain the reassembly queue. */ if (retval <= 0) sctp_ulpq_partial_delivery(ulpq, gfp); else if (retval == 1) sctp_ulpq_reasm_drain(ulpq); } } /* Notify the application if an association is aborted and in * partial delivery mode. Send up any pending received messages. */ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sctp_sock *sp; struct sock *sk; if (!ulpq->pd_mode) return; sk = ulpq->asoc->base.sk; sp = sctp_sk(sk); if (sctp_ulpevent_type_enabled(ulpq->asoc->subscribe, SCTP_PARTIAL_DELIVERY_EVENT)) ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED, 0, 0, 0, gfp); if (ev) __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); /* If there is data waiting, send it up the socket now. */ if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) { sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } }
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for VRC-2 2-axis Car controller * * Copyright (C) 2022 Marcus Folkesson <marcus.folkesson@gmail.com> */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> /* * VID/PID are probably "borrowed", so keep them locally and * do not populate hid-ids.h with those. */ #define USB_VENDOR_ID_VRC2 (0x07c0) #define USB_DEVICE_ID_VRC2 (0x1125) static const __u8 vrc2_rdesc_fixed[] = { 0x05, 0x01, // Usage Page (Generic Desktop Ctrls) 0x09, 0x04, // Usage (Joystick) 0xA1, 0x01, // Collection (Application) 0x09, 0x01, // Usage (Pointer) 0xA1, 0x00, // Collection (Physical) 0x09, 0x30, // Usage (X) 0x09, 0x31, // Usage (Y) 0x15, 0x00, // Logical Minimum (0) 0x26, 0xFF, 0x07, // Logical Maximum (2047) 0x35, 0x00, // Physical Minimum (0) 0x46, 0xFF, 0x00, // Physical Maximum (255) 0x75, 0x10, // Report Size (16) 0x95, 0x02, // Report Count (2) 0x81, 0x02, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) 0xC0, // End Collection 0x75, 0x08, // Report Size (8) 0x95, 0x03, // Report Count (3) 0x81, 0x03, // Input (Cnst,Var,Abs) 0xC0, // End Collection }; static const __u8 *vrc2_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { hid_info(hdev, "fixing up VRC-2 report descriptor\n"); *rsize = sizeof(vrc2_rdesc_fixed); return vrc2_rdesc_fixed; } static int vrc2_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; /* * The device gives us 2 separate USB endpoints. * One of those (the one with report descriptor size of 23) is just bogus so ignore it */ if (hdev->dev_rsize == 23) return -ENODEV; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } return 0; } static const struct hid_device_id vrc2_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_VRC2, USB_DEVICE_ID_VRC2) }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(hid, vrc2_devices); static struct hid_driver vrc2_driver = { .name = "vrc2", .id_table = vrc2_devices, .report_fixup = vrc2_report_fixup, .probe = vrc2_probe, }; module_hid_driver(vrc2_driver); MODULE_AUTHOR("Marcus Folkesson <marcus.folkesson@gmail.com>"); MODULE_DESCRIPTION("HID driver for VRC-2 2-axis Car controller"); MODULE_LICENSE("GPL");
1 6 5 1 1 1 2 1 1 7 1 1 1 1 1 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 // SPDX-License-Identifier: GPL-2.0-only /* * File: datagram.c * * Datagram (ISI) Phonet sockets * * Copyright (C) 2008 Nokia Corporation. * * Authors: Sakari Ailus <sakari.ailus@nokia.com> * Rémi Denis-Courmont */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/socket.h> #include <asm/ioctls.h> #include <net/sock.h> #include <linux/phonet.h> #include <linux/export.h> #include <net/phonet/phonet.h> static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); /* associated socket ceases to exist */ static void pn_sock_close(struct sock *sk, long timeout) { sk_common_release(sk); } static int pn_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; switch (cmd) { case SIOCINQ: spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: { u32 res = *karg; if (res >= 256) return -EINVAL; if (cmd == SIOCPNADDRESOURCE) return pn_sock_bind_res(sk, res); else return pn_sock_unbind_res(sk, res); } } return -ENOIOCTLCMD; } /* Destroy socket. All references are gone. */ static void pn_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } static int pn_init(struct sock *sk) { sk->sk_destruct = pn_destruct; return 0; } static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; int err; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) return -EOPNOTSUPP; if (target == NULL) return -EDESTADDRREQ; if (msg->msg_namelen < sizeof(struct sockaddr_pn)) return -EINVAL; if (target->spn_family != AF_PHONET) return -EAFNOSUPPORT; skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) return err; skb_reserve(skb, MAX_PHONET_HEADER); err = memcpy_from_msg((void *)skb_put(skb, len), msg, len); if (err < 0) { kfree_skb(skb); return err; } /* * Fill in the Phonet header and * finally pass the packet forwards. */ err = pn_skb_send(sk, skb, target); /* If ok, return len. */ return (err >= 0) ? len : err; } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; int rval = -EOPNOTSUPP; int copylen; if (flags & ~(MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) goto out_nofree; skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; pn_skb_get_src_sockaddr(skb, &sa); copylen = skb->len; if (len < copylen) { msg->msg_flags |= MSG_TRUNC; copylen = len; } rval = skb_copy_datagram_msg(skb, 0, msg, copylen); if (rval) { rval = -EFAULT; goto out; } rval = (flags & MSG_TRUNC) ? skb->len : copylen; if (msg->msg_name != NULL) { __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); *addr_len = sizeof(sa); } out: skb_free_datagram(sk, skb); out_nofree: return rval; } /* Queue an skb for a sock. */ static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); return err ? NET_RX_DROP : NET_RX_SUCCESS; } /* Module registration */ static struct proto pn_proto = { .close = pn_sock_close, .ioctl = pn_ioctl, .init = pn_init, .sendmsg = pn_sendmsg, .recvmsg = pn_recvmsg, .backlog_rcv = pn_backlog_rcv, .hash = pn_sock_hash, .unhash = pn_sock_unhash, .get_port = pn_sock_get_port, .obj_size = sizeof(struct pn_sock), .owner = THIS_MODULE, .name = "PHONET", }; static const struct phonet_protocol pn_dgram_proto = { .ops = &phonet_dgram_ops, .prot = &pn_proto, .sock_type = SOCK_DGRAM, }; int __init isi_register(void) { return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); } void __exit isi_unregister(void) { phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); }
4 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/linux/if_team.h - Network team device driver header * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com> */ #ifndef _LINUX_IF_TEAM_H_ #define _LINUX_IF_TEAM_H_ #include <linux/netpoll.h> #include <net/sch_generic.h> #include <linux/types.h> #include <uapi/linux/if_team.h> struct team_pcpu_stats { u64_stats_t rx_packets; u64_stats_t rx_bytes; u64_stats_t rx_multicast; u64_stats_t tx_packets; u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_dropped; u32 tx_dropped; u32 rx_nohandler; }; struct team; struct team_port { struct net_device *dev; struct hlist_node hlist; /* node in enabled ports hash list */ struct list_head list; /* node in ordinary list */ struct team *team; int index; /* index of enabled port. If disabled, it's set to -1 */ bool linkup; /* either state.linkup or user.linkup */ struct { bool linkup; u32 speed; u8 duplex; } state; /* Values set by userspace */ struct { bool linkup; bool linkup_enabled; } user; /* Custom gennetlink interface related flags */ bool changed; bool removed; /* * A place for storing original values of the device before it * become a port. */ struct { unsigned char dev_addr[MAX_ADDR_LEN]; unsigned int mtu; } orig; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif s32 priority; /* lower number ~ higher priority */ u16 queue_id; struct list_head qom_list; /* node in queue override mapping list */ struct rcu_head rcu; long mode_priv[]; }; static inline struct team_port *team_port_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } static inline bool team_port_enabled(struct team_port *port) { return port->index != -1; } static inline bool team_port_txable(struct team_port *port) { return port->linkup && team_port_enabled(port); } static inline bool team_port_dev_txable(const struct net_device *port_dev) { struct team_port *port; bool txable; rcu_read_lock(); port = team_port_get_rcu(port_dev); txable = port ? team_port_txable(port) : false; rcu_read_unlock(); return txable; } #ifdef CONFIG_NET_POLL_CONTROLLER static inline void team_netpoll_send_skb(struct team_port *port, struct sk_buff *skb) { netpoll_send_skb(port->np, skb); } #else static inline void team_netpoll_send_skb(struct team_port *port, struct sk_buff *skb) { } #endif struct team_mode_ops { int (*init)(struct team *team); void (*exit)(struct team *team); rx_handler_result_t (*receive)(struct team *team, struct team_port *port, struct sk_buff *skb); bool (*transmit)(struct team *team, struct sk_buff *skb); int (*port_enter)(struct team *team, struct team_port *port); void (*port_leave)(struct team *team, struct team_port *port); void (*port_change_dev_addr)(struct team *team, struct team_port *port); void (*port_enabled)(struct team *team, struct team_port *port); void (*port_disabled)(struct team *team, struct team_port *port); }; extern int team_modeop_port_enter(struct team *team, struct team_port *port); extern void team_modeop_port_change_dev_addr(struct team *team, struct team_port *port); enum team_option_type { TEAM_OPTION_TYPE_U32, TEAM_OPTION_TYPE_STRING, TEAM_OPTION_TYPE_BINARY, TEAM_OPTION_TYPE_BOOL, TEAM_OPTION_TYPE_S32, }; struct team_option_inst_info { u32 array_index; struct team_port *port; /* != NULL if per-port */ }; struct team_gsetter_ctx { union { u32 u32_val; const char *str_val; struct { const void *ptr; u32 len; } bin_val; bool bool_val; s32 s32_val; } data; struct team_option_inst_info *info; }; struct team_option { struct list_head list; const char *name; bool per_port; unsigned int array_size; /* != 0 means the option is array */ enum team_option_type type; void (*init)(struct team *team, struct team_option_inst_info *info); void (*getter)(struct team *team, struct team_gsetter_ctx *ctx); int (*setter)(struct team *team, struct team_gsetter_ctx *ctx); }; extern void team_option_inst_set_change(struct team_option_inst_info *opt_inst_info); extern void team_options_change_check(struct team *team); struct team_mode { const char *kind; struct module *owner; size_t priv_size; size_t port_priv_size; const struct team_mode_ops *ops; enum netdev_lag_tx_type lag_tx_type; }; #define TEAM_PORT_HASHBITS 4 #define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS) #define TEAM_MODE_PRIV_LONGS 4 #define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS) struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; const struct header_ops *header_ops_cache; /* * List of enabled ports and their count */ int en_port_count; struct hlist_head en_port_hlist[TEAM_PORT_HASHENTRIES]; struct list_head port_list; /* list of all ports */ struct list_head option_list; struct list_head option_inst_list; /* list of option instances */ const struct team_mode *mode; struct team_mode_ops ops; bool user_carrier_enabled; bool queue_override_enabled; struct list_head *qom_lists; /* array of queue override mapping lists */ bool port_mtu_change_allowed; bool notifier_ctx; struct { unsigned int count; unsigned int interval; /* in ms */ atomic_t count_pending; struct delayed_work dw; } notify_peers; struct { unsigned int count; unsigned int interval; /* in ms */ atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, struct sk_buff *skb) { BUILD_BUG_ON(sizeof(skb->queue_mapping) != sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); skb->dev = port->dev; if (unlikely(netpoll_tx_running(team->dev))) { team_netpoll_send_skb(port, skb); return 0; } return dev_queue_xmit(skb); } static inline struct hlist_head *team_port_index_hash(struct team *team, int port_index) { return &team->en_port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)]; } static inline struct team_port *team_get_port_by_index(struct team *team, int port_index) { struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); hlist_for_each_entry(port, head, hlist) if (port->index == port_index) return port; return NULL; } static inline int team_num_to_port_index(struct team *team, unsigned int num) { int en_port_count = READ_ONCE(team->en_port_count); if (unlikely(!en_port_count)) return 0; return num % en_port_count; } static inline struct team_port *team_get_port_by_index_rcu(struct team *team, int port_index) { struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); hlist_for_each_entry_rcu(port, head, hlist) if (port->index == port_index) return port; return NULL; } static inline struct team_port * team_get_first_port_txable_rcu(struct team *team, struct team_port *port) { struct team_port *cur; if (likely(team_port_txable(port))) return port; cur = port; list_for_each_entry_continue_rcu(cur, &team->port_list, list) if (team_port_txable(cur)) return cur; list_for_each_entry_rcu(cur, &team->port_list, list) { if (cur == port) break; if (team_port_txable(cur)) return cur; } return NULL; } extern int team_options_register(struct team *team, const struct team_option *option, size_t option_count); extern void team_options_unregister(struct team *team, const struct team_option *option, size_t option_count); extern int team_mode_register(const struct team_mode *mode); extern void team_mode_unregister(const struct team_mode *mode); #define TEAM_DEFAULT_NUM_TX_QUEUES 16 #define TEAM_DEFAULT_NUM_RX_QUEUES 16 #define MODULE_ALIAS_TEAM_MODE(kind) MODULE_ALIAS("team-mode-" kind) #endif /* _LINUX_IF_TEAM_H_ */
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 // SPDX-License-Identifier: GPL-2.0 /* ATM driver model support. */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/kobject.h> #include <linux/atmdev.h> #include "common.h" #include "resources.h" #define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev) static ssize_t type_show(struct device *cdev, struct device_attribute *attr, char *buf) { struct atm_dev *adev = to_atm_dev(cdev); return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type); } static ssize_t address_show(struct device *cdev, struct device_attribute *attr, char *buf) { struct atm_dev *adev = to_atm_dev(cdev); return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi); } static ssize_t atmaddress_show(struct device *cdev, struct device_attribute *attr, char *buf) { unsigned long flags; struct atm_dev *adev = to_atm_dev(cdev); struct atm_dev_addr *aaddr; int count = 0; spin_lock_irqsave(&adev->lock, flags); list_for_each_entry(aaddr, &adev->local, entry) { count += scnprintf(buf + count, PAGE_SIZE - count, "%1phN.%2phN.%10phN.%6phN.%1phN\n", &aaddr->addr.sas_addr.prv[0], &aaddr->addr.sas_addr.prv[1], &aaddr->addr.sas_addr.prv[3], &aaddr->addr.sas_addr.prv[13], &aaddr->addr.sas_addr.prv[19]); } spin_unlock_irqrestore(&adev->lock, flags); return count; } static ssize_t atmindex_show(struct device *cdev, struct device_attribute *attr, char *buf) { struct atm_dev *adev = to_atm_dev(cdev); return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number); } static ssize_t carrier_show(struct device *cdev, struct device_attribute *attr, char *buf) { struct atm_dev *adev = to_atm_dev(cdev); return scnprintf(buf, PAGE_SIZE, "%d\n", adev->signal == ATM_PHY_SIG_LOST ? 0 : 1); } static ssize_t link_rate_show(struct device *cdev, struct device_attribute *attr, char *buf) { struct atm_dev *adev = to_atm_dev(cdev); int link_rate; /* show the link rate, not the data rate */ switch (adev->link_rate) { case ATM_OC3_PCR: link_rate = 155520000; break; case ATM_OC12_PCR: link_rate = 622080000; break; case ATM_25_PCR: link_rate = 25600000; break; default: link_rate = adev->link_rate * 8 * 53; } return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate); } static DEVICE_ATTR_RO(address); static DEVICE_ATTR_RO(atmaddress); static DEVICE_ATTR_RO(atmindex); static DEVICE_ATTR_RO(carrier); static DEVICE_ATTR_RO(type); static DEVICE_ATTR_RO(link_rate); static struct device_attribute *atm_attrs[] = { &dev_attr_atmaddress, &dev_attr_address, &dev_attr_atmindex, &dev_attr_carrier, &dev_attr_type, &dev_attr_link_rate, NULL }; static int atm_uevent(const struct device *cdev, struct kobj_uevent_env *env) { const struct atm_dev *adev; if (!cdev) return -ENODEV; adev = to_atm_dev(cdev); if (add_uevent_var(env, "NAME=%s%d", adev->type, adev->number)) return -ENOMEM; return 0; } static void atm_release(struct device *cdev) { struct atm_dev *adev = to_atm_dev(cdev); kfree(adev); } static struct class atm_class = { .name = "atm", .dev_release = atm_release, .dev_uevent = atm_uevent, }; int atm_register_sysfs(struct atm_dev *adev, struct device *parent) { struct device *cdev = &adev->class_dev; int i, j, err; cdev->class = &atm_class; cdev->parent = parent; dev_set_drvdata(cdev, adev); dev_set_name(cdev, "%s%d", adev->type, adev->number); err = device_register(cdev); if (err < 0) return err; for (i = 0; atm_attrs[i]; i++) { err = device_create_file(cdev, atm_attrs[i]); if (err) goto err_out; } return 0; err_out: for (j = 0; j < i; j++) device_remove_file(cdev, atm_attrs[j]); device_del(cdev); return err; } void atm_unregister_sysfs(struct atm_dev *adev) { struct device *cdev = &adev->class_dev; device_del(cdev); } int __init atm_sysfs_init(void) { return class_register(&atm_class); } void __exit atm_sysfs_exit(void) { class_unregister(&atm_class); }
9 1 10 8 2 2 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for ELECOM devices: * - BM084 Bluetooth Mouse * - EX-G Trackballs (M-XT3DRBK, M-XT3URBK, M-XT4DRBK) * - DEFT Trackballs (M-DT1DRBK, M-DT1URBK, M-DT2DRBK, M-DT2URBK) * - HUGE Trackballs (M-HT1DRBK, M-HT1URBK) * * Copyright (c) 2010 Richard Nauber <Richard.Nauber@gmail.com> * Copyright (c) 2016 Yuxuan Shui <yshuiv7@gmail.com> * Copyright (c) 2017 Diego Elio Pettenò <flameeyes@flameeyes.eu> * Copyright (c) 2017 Alex Manoussakis <amanou@gnu.org> * Copyright (c) 2017 Tomasz Kramkowski <tk@the-tk.com> * Copyright (c) 2020 YOSHIOKA Takuma <lo48576@hard-wi.red> * Copyright (c) 2022 Takahiro Fujii <fujii@xaxxi.net> */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" /* * Certain ELECOM mice misreport their button count meaning that they only work * correctly with the ELECOM mouse assistant software which is unavailable for * Linux. A four extra INPUT reports and a FEATURE report are described by the * report descriptor but it does not appear that these enable software to * control what the extra buttons map to. The only simple and straightforward * solution seems to involve fixing up the report descriptor. */ #define MOUSE_BUTTONS_MAX 8 static void mouse_button_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int rsize, unsigned int button_bit_count, unsigned int padding_bit, unsigned int button_report_size, unsigned int button_usage_maximum, int nbuttons) { if (rsize < 32 || rdesc[button_bit_count] != 0x95 || rdesc[button_report_size] != 0x75 || rdesc[button_report_size + 1] != 0x01 || rdesc[button_usage_maximum] != 0x29 || rdesc[padding_bit] != 0x75) return; hid_info(hdev, "Fixing up Elecom mouse button count\n"); nbuttons = clamp(nbuttons, 0, MOUSE_BUTTONS_MAX); rdesc[button_bit_count + 1] = nbuttons; rdesc[button_usage_maximum + 1] = nbuttons; rdesc[padding_bit + 1] = MOUSE_BUTTONS_MAX - nbuttons; } static const __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { switch (hdev->product) { case USB_DEVICE_ID_ELECOM_BM084: /* The BM084 Bluetooth mouse includes a non-existing horizontal * wheel in the HID descriptor. */ if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n"); rdesc[47] = 0x00; } break; case USB_DEVICE_ID_ELECOM_M_XGL20DLBK: /* * Report descriptor format: * 20: button bit count * 28: padding bit count * 22: button report size * 14: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 20, 28, 22, 14, 8); break; case USB_DEVICE_ID_ELECOM_M_XT3URBK: case USB_DEVICE_ID_ELECOM_M_XT3DRBK: case USB_DEVICE_ID_ELECOM_M_XT4DRBK: /* * Report descriptor format: * 12: button bit count * 30: padding bit count * 14: button report size * 20: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 6); break; case USB_DEVICE_ID_ELECOM_M_DT1URBK: case USB_DEVICE_ID_ELECOM_M_DT1DRBK: case USB_DEVICE_ID_ELECOM_M_HT1URBK_010C: case USB_DEVICE_ID_ELECOM_M_HT1URBK_019B: case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D: /* * Report descriptor format: * 12: button bit count * 30: padding bit count * 14: button report size * 20: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 8); break; case USB_DEVICE_ID_ELECOM_M_DT2DRBK: case USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C: /* * Report descriptor format: * 22: button bit count * 30: padding bit count * 24: button report size * 16: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 22, 30, 24, 16, 8); break; } return rdesc; } static const struct hid_device_id elecom_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XGL20DLBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT3URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT3DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT2DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, { } }; MODULE_DEVICE_TABLE(hid, elecom_devices); static struct hid_driver elecom_driver = { .name = "elecom", .id_table = elecom_devices, .report_fixup = elecom_report_fixup }; module_hid_driver(elecom_driver); MODULE_DESCRIPTION("HID driver for ELECOM devices"); MODULE_LICENSE("GPL");
1 1 1 2 1 1 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-radio-tx.c - radio transmitter support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/delay.h> #include <linux/videodev2.h> #include <linux/v4l2-dv-timings.h> #include <media/v4l2-common.h> #include <media/v4l2-event.h> #include <media/v4l2-dv-timings.h> #include "vivid-core.h" #include "vivid-ctrls.h" #include "vivid-radio-common.h" #include "vivid-radio-tx.h" ssize_t vivid_radio_tx_write(struct file *file, const char __user *buf, size_t size, loff_t *offset) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_rds_data *data = dev->rds_gen.data; ktime_t timestamp; unsigned blk; int i; if (dev->radio_tx_rds_controls) return -EINVAL; if (size < sizeof(*data)) return -EINVAL; size = sizeof(*data) * (size / sizeof(*data)); if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; if (dev->radio_tx_rds_owner && file_to_v4l2_fh(file) != dev->radio_tx_rds_owner) { mutex_unlock(&dev->mutex); return -EBUSY; } dev->radio_tx_rds_owner = file_to_v4l2_fh(file); retry: timestamp = ktime_sub(ktime_get(), dev->radio_rds_init_time); blk = ktime_divns(timestamp, VIVID_RDS_NSEC_PER_BLK); if (blk - VIVID_RDS_GEN_BLOCKS >= dev->radio_tx_rds_last_block) dev->radio_tx_rds_last_block = blk - VIVID_RDS_GEN_BLOCKS + 1; /* * No data is available if there hasn't been time to get new data, * or if the RDS receiver has been disabled, or if we use the data * from the RDS transmitter and that RDS transmitter has been disabled, * or if the signal quality is too weak. */ if (blk == dev->radio_tx_rds_last_block || !(dev->radio_tx_subchans & V4L2_TUNER_SUB_RDS)) { mutex_unlock(&dev->mutex); if (file->f_flags & O_NONBLOCK) return -EWOULDBLOCK; if (msleep_interruptible(20) && signal_pending(current)) return -EINTR; if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; goto retry; } for (i = 0; i < size && blk > dev->radio_tx_rds_last_block; dev->radio_tx_rds_last_block++) { unsigned data_blk = dev->radio_tx_rds_last_block % VIVID_RDS_GEN_BLOCKS; struct v4l2_rds_data rds; if (copy_from_user(&rds, buf + i, sizeof(rds))) { i = -EFAULT; break; } i += sizeof(rds); if (!dev->radio_rds_loop) continue; if ((rds.block & V4L2_RDS_BLOCK_MSK) == V4L2_RDS_BLOCK_INVALID || (rds.block & V4L2_RDS_BLOCK_ERROR)) continue; rds.block &= V4L2_RDS_BLOCK_MSK; data[data_blk] = rds; } mutex_unlock(&dev->mutex); return i; } __poll_t vivid_radio_tx_poll(struct file *file, struct poll_table_struct *wait) { return EPOLLOUT | EPOLLWRNORM | v4l2_ctrl_poll(file, wait); } int vidioc_g_modulator(struct file *file, void *priv, struct v4l2_modulator *a) { struct vivid_dev *dev = video_drvdata(file); if (a->index > 0) return -EINVAL; strscpy(a->name, "AM/FM/SW Transmitter", sizeof(a->name)); a->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_FREQ_BANDS | V4L2_TUNER_CAP_RDS | (dev->radio_tx_rds_controls ? V4L2_TUNER_CAP_RDS_CONTROLS : V4L2_TUNER_CAP_RDS_BLOCK_IO); a->rangelow = AM_FREQ_RANGE_LOW; a->rangehigh = FM_FREQ_RANGE_HIGH; a->txsubchans = dev->radio_tx_subchans; return 0; } int vidioc_s_modulator(struct file *file, void *priv, const struct v4l2_modulator *a) { struct vivid_dev *dev = video_drvdata(file); if (a->index) return -EINVAL; if (a->txsubchans & ~0x13) return -EINVAL; dev->radio_tx_subchans = a->txsubchans; return 0; }
6 8 2 1 12 8 12 12 6 1 6 1 6 17 17 8 7 9 7 7 11 28 28 9 9 9 18 19 19 9 17 17 5 6 6 6 17 17 17 17 3 3 3 3 3 3 8 8 1 1 1 6 17 3 1 2 4 1 1 1 4 1 1 2 1 4 10 10 11 11 11 11 11 11 5 5 5 1 1 1 1 2 2 1 3 3 1 2 3 3 3 3 5 5 2 2 8 8 1 8 8 8 8 2 7 3 8 3 3 4 36 2 2 1 1 1 2 8 8 5 1 5 1 3 2 2 2 1 1 1 1 1 1 1 1 1 1 1 5 8 5 3 3 1 2 2 1 1 4 4 11 11 11 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 12 4 8 4 4 4 3 1 2 1 4 11 11 1 1 1 20 20 20 4 1 14 1 19 9 13 13 12 1 8 25 25 14 21 1 19 6 18 1 1 1 17 1 17 16 10 7 2 5 2 20 2 2 1 1 2 2 3 3 3 1 1 6 6 6 3 3 3 5 5 4 5 4 5 2 2 32 7 23 24 24 24 24 22 19 2 1 18 3 2 1 1 1 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> Copyright (C) 2010 Google Inc. Copyright (C) 2011 ProFUSION Embedded Systems Copyright (c) 2012 Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth L2CAP core. */ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/crc16.h> #include <linux/filter.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include "smp.h" #define LE_FLOWCTL_MAX_CREDITS 65535 bool disable_ertm; bool enable_ecred = IS_ENABLED(CONFIG_BT_LE_L2CAP_ECRED); static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD; static LIST_HEAD(chan_list); static DEFINE_RWLOCK(chan_list_lock); static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data); static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size); static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event); static void l2cap_retrans_timeout(struct work_struct *work); static void l2cap_monitor_timeout(struct work_struct *work); static void l2cap_ack_timeout(struct work_struct *work); static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type) { if (link_type == LE_LINK) { if (bdaddr_type == ADDR_LE_DEV_PUBLIC) return BDADDR_LE_PUBLIC; else return BDADDR_LE_RANDOM; } return BDADDR_BREDR; } static inline u8 bdaddr_src_type(struct hci_conn *hcon) { return bdaddr_type(hcon->type, hcon->src_type); } static inline u8 bdaddr_dst_type(struct hci_conn *hcon) { return bdaddr_type(hcon->type, hcon->dst_type); } /* ---- L2CAP channels ---- */ static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; list_for_each_entry(c, &conn->chan_l, list) { if (c->dcid == cid) return c; } return NULL; } static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; list_for_each_entry(c, &conn->chan_l, list) { if (c->scid == cid) return c; } return NULL; } /* Find channel with given SCID. * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; c = __l2cap_get_chan_by_scid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ c = l2cap_chan_hold_unless_zero(c); if (c) l2cap_chan_lock(c); } return c; } /* Find channel with given DCID. * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) { struct l2cap_chan *c; c = __l2cap_get_chan_by_dcid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ c = l2cap_chan_hold_unless_zero(c); if (c) l2cap_chan_lock(c); } return c; } static struct l2cap_chan *__l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident) { struct l2cap_chan *c; list_for_each_entry(c, &conn->chan_l, list) { if (c->ident == ident) return c; } return NULL; } static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src, u8 src_type) { struct l2cap_chan *c; list_for_each_entry(c, &chan_list, global_l) { if (src_type == BDADDR_BREDR && c->src_type != BDADDR_BREDR) continue; if (src_type != BDADDR_BREDR && c->src_type == BDADDR_BREDR) continue; if (c->sport == psm && !bacmp(&c->src, src)) return c; } return NULL; } int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm) { int err; write_lock(&chan_list_lock); if (psm && __l2cap_global_chan_by_addr(psm, src, chan->src_type)) { err = -EADDRINUSE; goto done; } if (psm) { chan->psm = psm; chan->sport = psm; err = 0; } else { u16 p, start, end, incr; if (chan->src_type == BDADDR_BREDR) { start = L2CAP_PSM_DYN_START; end = L2CAP_PSM_AUTO_END; incr = 2; } else { start = L2CAP_PSM_LE_DYN_START; end = L2CAP_PSM_LE_DYN_END; incr = 1; } err = -EINVAL; for (p = start; p <= end; p += incr) if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src, chan->src_type)) { chan->psm = cpu_to_le16(p); chan->sport = cpu_to_le16(p); err = 0; break; } } done: write_unlock(&chan_list_lock); return err; } EXPORT_SYMBOL_GPL(l2cap_add_psm); int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid) { write_lock(&chan_list_lock); /* Override the defaults (which are for conn-oriented) */ chan->omtu = L2CAP_DEFAULT_MTU; chan->chan_type = L2CAP_CHAN_FIXED; chan->scid = scid; write_unlock(&chan_list_lock); return 0; } static u16 l2cap_alloc_cid(struct l2cap_conn *conn) { u16 cid, dyn_end; if (conn->hcon->type == LE_LINK) dyn_end = L2CAP_CID_LE_DYN_END; else dyn_end = L2CAP_CID_DYN_END; for (cid = L2CAP_CID_DYN_START; cid <= dyn_end; cid++) { if (!__l2cap_get_chan_by_scid(conn, cid)) return cid; } return 0; } static void l2cap_state_change(struct l2cap_chan *chan, int state) { BT_DBG("chan %p %s -> %s", chan, state_to_string(chan->state), state_to_string(state)); chan->state = state; chan->ops->state_change(chan, state, 0); } static inline void l2cap_state_change_and_error(struct l2cap_chan *chan, int state, int err) { chan->state = state; chan->ops->state_change(chan, chan->state, err); } static inline void l2cap_chan_set_err(struct l2cap_chan *chan, int err) { chan->ops->state_change(chan, chan->state, err); } static void __set_retrans_timer(struct l2cap_chan *chan) { if (!delayed_work_pending(&chan->monitor_timer) && chan->retrans_timeout) { l2cap_set_timer(chan, &chan->retrans_timer, secs_to_jiffies(chan->retrans_timeout)); } } static void __set_monitor_timer(struct l2cap_chan *chan) { __clear_retrans_timer(chan); if (chan->monitor_timeout) { l2cap_set_timer(chan, &chan->monitor_timer, secs_to_jiffies(chan->monitor_timeout)); } } static struct sk_buff *l2cap_ertm_seq_in_queue(struct sk_buff_head *head, u16 seq) { struct sk_buff *skb; skb_queue_walk(head, skb) { if (bt_cb(skb)->l2cap.txseq == seq) return skb; } return NULL; } /* ---- L2CAP sequence number lists ---- */ /* For ERTM, ordered lists of sequence numbers must be tracked for * SREJ requests that are received and for frames that are to be * retransmitted. These seq_list functions implement a singly-linked * list in an array, where membership in the list can also be checked * in constant time. Items can also be added to the tail of the list * and removed from the head in constant time, without further memory * allocs or frees. */ static int l2cap_seq_list_init(struct l2cap_seq_list *seq_list, u16 size) { size_t alloc_size, i; /* Allocated size is a power of 2 to map sequence numbers * (which may be up to 14 bits) in to a smaller array that is * sized for the negotiated ERTM transmit windows. */ alloc_size = roundup_pow_of_two(size); seq_list->list = kmalloc_array(alloc_size, sizeof(u16), GFP_KERNEL); if (!seq_list->list) return -ENOMEM; seq_list->mask = alloc_size - 1; seq_list->head = L2CAP_SEQ_LIST_CLEAR; seq_list->tail = L2CAP_SEQ_LIST_CLEAR; for (i = 0; i < alloc_size; i++) seq_list->list[i] = L2CAP_SEQ_LIST_CLEAR; return 0; } static inline void l2cap_seq_list_free(struct l2cap_seq_list *seq_list) { kfree(seq_list->list); } static inline bool l2cap_seq_list_contains(struct l2cap_seq_list *seq_list, u16 seq) { /* Constant-time check for list membership */ return seq_list->list[seq & seq_list->mask] != L2CAP_SEQ_LIST_CLEAR; } static inline u16 l2cap_seq_list_pop(struct l2cap_seq_list *seq_list) { u16 seq = seq_list->head; u16 mask = seq_list->mask; seq_list->head = seq_list->list[seq & mask]; seq_list->list[seq & mask] = L2CAP_SEQ_LIST_CLEAR; if (seq_list->head == L2CAP_SEQ_LIST_TAIL) { seq_list->head = L2CAP_SEQ_LIST_CLEAR; seq_list->tail = L2CAP_SEQ_LIST_CLEAR; } return seq; } static void l2cap_seq_list_clear(struct l2cap_seq_list *seq_list) { u16 i; if (seq_list->head == L2CAP_SEQ_LIST_CLEAR) return; for (i = 0; i <= seq_list->mask; i++) seq_list->list[i] = L2CAP_SEQ_LIST_CLEAR; seq_list->head = L2CAP_SEQ_LIST_CLEAR; seq_list->tail = L2CAP_SEQ_LIST_CLEAR; } static void l2cap_seq_list_append(struct l2cap_seq_list *seq_list, u16 seq) { u16 mask = seq_list->mask; /* All appends happen in constant time */ if (seq_list->list[seq & mask] != L2CAP_SEQ_LIST_CLEAR) return; if (seq_list->tail == L2CAP_SEQ_LIST_CLEAR) seq_list->head = seq; else seq_list->list[seq_list->tail & mask] = seq; seq_list->tail = seq; seq_list->list[seq & mask] = L2CAP_SEQ_LIST_TAIL; } static void l2cap_chan_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, chan_timer.work); struct l2cap_conn *conn = chan->conn; int reason; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); if (!conn) return; mutex_lock(&conn->lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. */ l2cap_chan_lock(chan); if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG) reason = ECONNREFUSED; else if (chan->state == BT_CONNECT && chan->sec_level != BT_SECURITY_SDP) reason = ECONNREFUSED; else reason = ETIMEDOUT; l2cap_chan_close(chan, reason); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->lock); } struct l2cap_chan *l2cap_chan_create(void) { struct l2cap_chan *chan; chan = kzalloc(sizeof(*chan), GFP_ATOMIC); if (!chan) return NULL; skb_queue_head_init(&chan->tx_q); skb_queue_head_init(&chan->srej_q); mutex_init(&chan->lock); /* Set default lock nesting level */ atomic_set(&chan->nesting, L2CAP_NESTING_NORMAL); /* Available receive buffer space is initially unknown */ chan->rx_avail = -1; write_lock(&chan_list_lock); list_add(&chan->global_l, &chan_list); write_unlock(&chan_list_lock); INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout); INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout); INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout); INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout); chan->state = BT_OPEN; kref_init(&chan->kref); /* This flag is cleared in l2cap_chan_ready() */ set_bit(CONF_NOT_COMPLETE, &chan->conf_state); BT_DBG("chan %p", chan); return chan; } EXPORT_SYMBOL_GPL(l2cap_chan_create); static void l2cap_chan_destroy(struct kref *kref) { struct l2cap_chan *chan = container_of(kref, struct l2cap_chan, kref); BT_DBG("chan %p", chan); write_lock(&chan_list_lock); list_del(&chan->global_l); write_unlock(&chan_list_lock); kfree(chan); } void l2cap_chan_hold(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); kref_get(&c->kref); } struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); if (!kref_get_unless_zero(&c->kref)) return NULL; return c; } void l2cap_chan_put(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); kref_put(&c->kref, l2cap_chan_destroy); } EXPORT_SYMBOL_GPL(l2cap_chan_put); void l2cap_chan_set_defaults(struct l2cap_chan *chan) { chan->fcs = L2CAP_FCS_CRC16; chan->max_tx = L2CAP_DEFAULT_MAX_TX; chan->tx_win = L2CAP_DEFAULT_TX_WINDOW; chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; chan->remote_max_tx = chan->max_tx; chan->remote_tx_win = chan->tx_win; chan->ack_win = L2CAP_DEFAULT_TX_WINDOW; chan->sec_level = BT_SECURITY_LOW; chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; chan->conf_state = 0; set_bit(CONF_NOT_COMPLETE, &chan->conf_state); set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults); static __u16 l2cap_le_rx_credits(struct l2cap_chan *chan) { size_t sdu_len = chan->sdu ? chan->sdu->len : 0; if (chan->mps == 0) return 0; /* If we don't know the available space in the receiver buffer, give * enough credits for a full packet. */ if (chan->rx_avail == -1) return (chan->imtu / chan->mps) + 1; /* If we know how much space is available in the receive buffer, give * out as many credits as would fill the buffer. */ if (chan->rx_avail <= sdu_len) return 0; return DIV_ROUND_UP(chan->rx_avail - sdu_len, chan->mps); } static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) { chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; chan->tx_credits = tx_credits; /* Derive MPS from connection MTU to stop HCI fragmentation */ chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE); chan->rx_credits = l2cap_le_rx_credits(chan); skb_queue_head_init(&chan->tx_q); } static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits) { l2cap_le_flowctl_init(chan, tx_credits); /* L2CAP implementations shall support a minimum MPS of 64 octets */ if (chan->mps < L2CAP_ECRED_MIN_MPS) { chan->mps = L2CAP_ECRED_MIN_MPS; chan->rx_credits = l2cap_le_rx_credits(chan); } } void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, __le16_to_cpu(chan->psm), chan->dcid); conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM; chan->conn = conn; switch (chan->chan_type) { case L2CAP_CHAN_CONN_ORIENTED: /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); if (conn->hcon->type == ACL_LINK) chan->omtu = L2CAP_DEFAULT_MTU; break; case L2CAP_CHAN_CONN_LESS: /* Connectionless socket */ chan->scid = L2CAP_CID_CONN_LESS; chan->dcid = L2CAP_CID_CONN_LESS; chan->omtu = L2CAP_DEFAULT_MTU; break; case L2CAP_CHAN_FIXED: /* Caller will set CID and CID specific MTU values */ break; default: /* Raw socket can send/recv signalling messages only */ chan->scid = L2CAP_CID_SIGNALING; chan->dcid = L2CAP_CID_SIGNALING; chan->omtu = L2CAP_DEFAULT_MTU; } chan->local_id = L2CAP_BESTEFFORT_ID; chan->local_stype = L2CAP_SERV_BESTEFFORT; chan->local_msdu = L2CAP_DEFAULT_MAX_SDU_SIZE; chan->local_sdu_itime = L2CAP_DEFAULT_SDU_ITIME; chan->local_acc_lat = L2CAP_DEFAULT_ACC_LAT; chan->local_flush_to = L2CAP_EFS_DEFAULT_FLUSH_TO; l2cap_chan_hold(chan); /* Only keep a reference for fixed channels if they requested it */ if (chan->chan_type != L2CAP_CHAN_FIXED || test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_hold(conn->hcon); /* Append to the list since the order matters for ECRED */ list_add_tail(&chan->list, &conn->chan_l); } void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { mutex_lock(&conn->lock); __l2cap_chan_add(conn, chan); mutex_unlock(&conn->lock); } void l2cap_chan_del(struct l2cap_chan *chan, int err) { struct l2cap_conn *conn = chan->conn; __clear_chan_timer(chan); BT_DBG("chan %p, conn %p, err %d, state %s", chan, conn, err, state_to_string(chan->state)); chan->ops->teardown(chan, err); if (conn) { /* Delete from channel list */ list_del(&chan->list); l2cap_chan_put(chan); chan->conn = NULL; /* Reference was only held for non-fixed channels or * fixed channels that explicitly requested it using the * FLAG_HOLD_HCI_CONN flag. */ if (chan->chan_type != L2CAP_CHAN_FIXED || test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_drop(conn->hcon); } if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) return; switch (chan->mode) { case L2CAP_MODE_BASIC: break; case L2CAP_MODE_LE_FLOWCTL: case L2CAP_MODE_EXT_FLOWCTL: skb_queue_purge(&chan->tx_q); break; case L2CAP_MODE_ERTM: __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); skb_queue_purge(&chan->srej_q); l2cap_seq_list_free(&chan->srej_list); l2cap_seq_list_free(&chan->retrans_list); fallthrough; case L2CAP_MODE_STREAMING: skb_queue_purge(&chan->tx_q); break; } } EXPORT_SYMBOL_GPL(l2cap_chan_del); static void __l2cap_chan_list_id(struct l2cap_conn *conn, u16 id, l2cap_chan_func_t func, void *data) { struct l2cap_chan *chan, *l; list_for_each_entry_safe(chan, l, &conn->chan_l, list) { if (chan->ident == id) func(chan, data); } } static void __l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data) { struct l2cap_chan *chan; list_for_each_entry(chan, &conn->chan_l, list) { func(chan, data); } } void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data) { if (!conn) return; mutex_lock(&conn->lock); __l2cap_chan_list(conn, func, data); mutex_unlock(&conn->lock); } EXPORT_SYMBOL_GPL(l2cap_chan_list); static void l2cap_conn_update_id_addr(struct work_struct *work) { struct l2cap_conn *conn = container_of(work, struct l2cap_conn, id_addr_timer.work); struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); bacpy(&chan->dst, &hcon->dst); chan->dst_type = bdaddr_dst_type(hcon); l2cap_chan_unlock(chan); } mutex_unlock(&conn->lock); } static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_le_conn_rsp rsp; u16 result; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) result = L2CAP_CR_LE_AUTHORIZATION; else result = L2CAP_CR_LE_BAD_PSM; l2cap_state_change(chan, BT_DISCONN); rsp.dcid = cpu_to_le16(chan->scid); rsp.mtu = cpu_to_le16(chan->imtu); rsp.mps = cpu_to_le16(chan->mps); rsp.credits = cpu_to_le16(chan->rx_credits); rsp.result = cpu_to_le16(result); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); } static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) { l2cap_state_change(chan, BT_DISCONN); __l2cap_ecred_conn_rsp_defer(chan); } static void l2cap_chan_connect_reject(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_conn_rsp rsp; u16 result; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) result = L2CAP_CR_SEC_BLOCK; else result = L2CAP_CR_BAD_PSM; l2cap_state_change(chan, BT_DISCONN); rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); } void l2cap_chan_close(struct l2cap_chan *chan, int reason) { struct l2cap_conn *conn = chan->conn; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); switch (chan->state) { case BT_LISTEN: chan->ops->teardown(chan, 0); break; case BT_CONNECTED: case BT_CONFIG: if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); l2cap_send_disconn_req(chan, reason); } else l2cap_chan_del(chan, reason); break; case BT_CONNECT2: if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { if (conn->hcon->type == ACL_LINK) l2cap_chan_connect_reject(chan); else if (conn->hcon->type == LE_LINK) { switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: l2cap_chan_le_connect_reject(chan); break; case L2CAP_MODE_EXT_FLOWCTL: l2cap_chan_ecred_connect_reject(chan); return; } } } l2cap_chan_del(chan, reason); break; case BT_CONNECT: case BT_DISCONN: l2cap_chan_del(chan, reason); break; default: chan->ops->teardown(chan, 0); break; } } EXPORT_SYMBOL(l2cap_chan_close); static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) { switch (chan->chan_type) { case L2CAP_CHAN_RAW: switch (chan->sec_level) { case BT_SECURITY_HIGH: case BT_SECURITY_FIPS: return HCI_AT_DEDICATED_BONDING_MITM; case BT_SECURITY_MEDIUM: return HCI_AT_DEDICATED_BONDING; default: return HCI_AT_NO_BONDING; } break; case L2CAP_CHAN_CONN_LESS: if (chan->psm == cpu_to_le16(L2CAP_PSM_3DSP)) { if (chan->sec_level == BT_SECURITY_LOW) chan->sec_level = BT_SECURITY_SDP; } if (chan->sec_level == BT_SECURITY_HIGH || chan->sec_level == BT_SECURITY_FIPS) return HCI_AT_NO_BONDING_MITM; else return HCI_AT_NO_BONDING; break; case L2CAP_CHAN_CONN_ORIENTED: if (chan->psm == cpu_to_le16(L2CAP_PSM_SDP)) { if (chan->sec_level == BT_SECURITY_LOW) chan->sec_level = BT_SECURITY_SDP; if (chan->sec_level == BT_SECURITY_HIGH || chan->sec_level == BT_SECURITY_FIPS) return HCI_AT_NO_BONDING_MITM; else return HCI_AT_NO_BONDING; } fallthrough; default: switch (chan->sec_level) { case BT_SECURITY_HIGH: case BT_SECURITY_FIPS: return HCI_AT_GENERAL_BONDING_MITM; case BT_SECURITY_MEDIUM: return HCI_AT_GENERAL_BONDING; default: return HCI_AT_NO_BONDING; } break; } } /* Service level security */ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) { struct l2cap_conn *conn = chan->conn; __u8 auth_type; if (conn->hcon->type == LE_LINK) return smp_conn_security(conn->hcon, chan->sec_level); auth_type = l2cap_get_auth_type(chan); return hci_conn_security(conn->hcon, chan->sec_level, auth_type, initiator); } static u8 l2cap_get_ident(struct l2cap_conn *conn) { u8 id; /* Get next available identificator. * 1 - 128 are used by kernel. * 129 - 199 are reserved. * 200 - 254 are used by utilities like l2ping, etc. */ mutex_lock(&conn->ident_lock); if (++conn->tx_ident > 128) conn->tx_ident = 1; id = conn->tx_ident; mutex_unlock(&conn->ident_lock); return id; } static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, u8 flags) { /* Check if the hcon still valid before attempting to send */ if (hci_conn_valid(conn->hcon->hdev, conn->hcon)) hci_send_acl(conn->hchan, skb, flags); else kfree_skb(skb); } static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); u8 flags; BT_DBG("code 0x%2.2x", code); if (!skb) return; /* Use NO_FLUSH if supported or we have an LE link (which does * not support auto-flushing packets) */ if (lmp_no_flush_capable(conn->hcon->hdev) || conn->hcon->type == LE_LINK) flags = ACL_START_NO_FLUSH; else flags = ACL_START; bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON; skb->priority = HCI_PRIO_MAX; l2cap_send_acl(conn, skb, flags); } static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) { struct hci_conn *hcon = chan->conn->hcon; u16 flags; BT_DBG("chan %p, skb %p len %d priority %u", chan, skb, skb->len, skb->priority); /* Use NO_FLUSH for LE links (where this is the only option) or * if the BR/EDR link supports it and flushing has not been * explicitly requested (through FLAG_FLUSHABLE). */ if (hcon->type == LE_LINK || (!test_bit(FLAG_FLUSHABLE, &chan->flags) && lmp_no_flush_capable(hcon->hdev))) flags = ACL_START_NO_FLUSH; else flags = ACL_START; bt_cb(skb)->force_active = test_bit(FLAG_FORCE_ACTIVE, &chan->flags); hci_send_acl(chan->conn->hchan, skb, flags); } static void __unpack_enhanced_control(u16 enh, struct l2cap_ctrl *control) { control->reqseq = (enh & L2CAP_CTRL_REQSEQ) >> L2CAP_CTRL_REQSEQ_SHIFT; control->final = (enh & L2CAP_CTRL_FINAL) >> L2CAP_CTRL_FINAL_SHIFT; if (enh & L2CAP_CTRL_FRAME_TYPE) { /* S-Frame */ control->sframe = 1; control->poll = (enh & L2CAP_CTRL_POLL) >> L2CAP_CTRL_POLL_SHIFT; control->super = (enh & L2CAP_CTRL_SUPERVISE) >> L2CAP_CTRL_SUPER_SHIFT; control->sar = 0; control->txseq = 0; } else { /* I-Frame */ control->sframe = 0; control->sar = (enh & L2CAP_CTRL_SAR) >> L2CAP_CTRL_SAR_SHIFT; control->txseq = (enh & L2CAP_CTRL_TXSEQ) >> L2CAP_CTRL_TXSEQ_SHIFT; control->poll = 0; control->super = 0; } } static void __unpack_extended_control(u32 ext, struct l2cap_ctrl *control) { control->reqseq = (ext & L2CAP_EXT_CTRL_REQSEQ) >> L2CAP_EXT_CTRL_REQSEQ_SHIFT; control->final = (ext & L2CAP_EXT_CTRL_FINAL) >> L2CAP_EXT_CTRL_FINAL_SHIFT; if (ext & L2CAP_EXT_CTRL_FRAME_TYPE) { /* S-Frame */ control->sframe = 1; control->poll = (ext & L2CAP_EXT_CTRL_POLL) >> L2CAP_EXT_CTRL_POLL_SHIFT; control->super = (ext & L2CAP_EXT_CTRL_SUPERVISE) >> L2CAP_EXT_CTRL_SUPER_SHIFT; control->sar = 0; control->txseq = 0; } else { /* I-Frame */ control->sframe = 0; control->sar = (ext & L2CAP_EXT_CTRL_SAR) >> L2CAP_EXT_CTRL_SAR_SHIFT; control->txseq = (ext & L2CAP_EXT_CTRL_TXSEQ) >> L2CAP_EXT_CTRL_TXSEQ_SHIFT; control->poll = 0; control->super = 0; } } static inline void __unpack_control(struct l2cap_chan *chan, struct sk_buff *skb) { if (test_bit(FLAG_EXT_CTRL, &chan->flags)) { __unpack_extended_control(get_unaligned_le32(skb->data), &bt_cb(skb)->l2cap); skb_pull(skb, L2CAP_EXT_CTRL_SIZE); } else { __unpack_enhanced_control(get_unaligned_le16(skb->data), &bt_cb(skb)->l2cap); skb_pull(skb, L2CAP_ENH_CTRL_SIZE); } } static u32 __pack_extended_control(struct l2cap_ctrl *control) { u32 packed; packed = control->reqseq << L2CAP_EXT_CTRL_REQSEQ_SHIFT; packed |= control->final << L2CAP_EXT_CTRL_FINAL_SHIFT; if (control->sframe) { packed |= control->poll << L2CAP_EXT_CTRL_POLL_SHIFT; packed |= control->super << L2CAP_EXT_CTRL_SUPER_SHIFT; packed |= L2CAP_EXT_CTRL_FRAME_TYPE; } else { packed |= control->sar << L2CAP_EXT_CTRL_SAR_SHIFT; packed |= control->txseq << L2CAP_EXT_CTRL_TXSEQ_SHIFT; } return packed; } static u16 __pack_enhanced_control(struct l2cap_ctrl *control) { u16 packed; packed = control->reqseq << L2CAP_CTRL_REQSEQ_SHIFT; packed |= control->final << L2CAP_CTRL_FINAL_SHIFT; if (control->sframe) { packed |= control->poll << L2CAP_CTRL_POLL_SHIFT; packed |= control->super << L2CAP_CTRL_SUPER_SHIFT; packed |= L2CAP_CTRL_FRAME_TYPE; } else { packed |= control->sar << L2CAP_CTRL_SAR_SHIFT; packed |= control->txseq << L2CAP_CTRL_TXSEQ_SHIFT; } return packed; } static inline void __pack_control(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { if (test_bit(FLAG_EXT_CTRL, &chan->flags)) { put_unaligned_le32(__pack_extended_control(control), skb->data + L2CAP_HDR_SIZE); } else { put_unaligned_le16(__pack_enhanced_control(control), skb->data + L2CAP_HDR_SIZE); } } static inline unsigned int __ertm_hdr_size(struct l2cap_chan *chan) { if (test_bit(FLAG_EXT_CTRL, &chan->flags)) return L2CAP_EXT_HDR_SIZE; else return L2CAP_ENH_HDR_SIZE; } static struct sk_buff *l2cap_create_sframe_pdu(struct l2cap_chan *chan, u32 control) { struct sk_buff *skb; struct l2cap_hdr *lh; int hlen = __ertm_hdr_size(chan); if (chan->fcs == L2CAP_FCS_CRC16) hlen += L2CAP_FCS_SIZE; skb = bt_skb_alloc(hlen, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); lh = skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) put_unaligned_le32(control, skb_put(skb, L2CAP_EXT_CTRL_SIZE)); else put_unaligned_le16(control, skb_put(skb, L2CAP_ENH_CTRL_SIZE)); if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *)skb->data, skb->len); put_unaligned_le16(fcs, skb_put(skb, L2CAP_FCS_SIZE)); } skb->priority = HCI_PRIO_MAX; return skb; } static void l2cap_send_sframe(struct l2cap_chan *chan, struct l2cap_ctrl *control) { struct sk_buff *skb; u32 control_field; BT_DBG("chan %p, control %p", chan, control); if (!control->sframe) return; if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state) && !control->poll) control->final = 1; if (control->super == L2CAP_SUPER_RR) clear_bit(CONN_RNR_SENT, &chan->conn_state); else if (control->super == L2CAP_SUPER_RNR) set_bit(CONN_RNR_SENT, &chan->conn_state); if (control->super != L2CAP_SUPER_SREJ) { chan->last_acked_seq = control->reqseq; __clear_ack_timer(chan); } BT_DBG("reqseq %d, final %d, poll %d, super %d", control->reqseq, control->final, control->poll, control->super); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) control_field = __pack_extended_control(control); else control_field = __pack_enhanced_control(control); skb = l2cap_create_sframe_pdu(chan, control_field); if (!IS_ERR(skb)) l2cap_do_send(chan, skb); } static void l2cap_send_rr_or_rnr(struct l2cap_chan *chan, bool poll) { struct l2cap_ctrl control; BT_DBG("chan %p, poll %d", chan, poll); memset(&control, 0, sizeof(control)); control.sframe = 1; control.poll = poll; if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) control.super = L2CAP_SUPER_RNR; else control.super = L2CAP_SUPER_RR; control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &control); } static inline int __l2cap_no_conn_pending(struct l2cap_chan *chan) { if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) return true; return !test_bit(CONF_CONNECT_PEND, &chan->conf_state); } void l2cap_send_conn_req(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_conn_req req; req.scid = cpu_to_le16(chan->scid); req.psm = chan->psm; chan->ident = l2cap_get_ident(conn); set_bit(CONF_CONNECT_PEND, &chan->conf_state); l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_REQ, sizeof(req), &req); } static void l2cap_chan_ready(struct l2cap_chan *chan) { /* The channel may have already been flagged as connected in * case of receiving data before the L2CAP info req/rsp * procedure is complete. */ if (chan->state == BT_CONNECTED) return; /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: case L2CAP_MODE_EXT_FLOWCTL: if (!chan->tx_credits) chan->ops->suspend(chan); break; } chan->state = BT_CONNECTED; chan->ops->ready(chan); } static void l2cap_le_connect(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_le_conn_req req; if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags)) return; if (!chan->imtu) chan->imtu = chan->conn->mtu; l2cap_le_flowctl_init(chan, 0); memset(&req, 0, sizeof(req)); req.psm = chan->psm; req.scid = cpu_to_le16(chan->scid); req.mtu = cpu_to_le16(chan->imtu); req.mps = cpu_to_le16(chan->mps); req.credits = cpu_to_le16(chan->rx_credits); chan->ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_REQ, sizeof(req), &req); } struct l2cap_ecred_conn_data { struct { struct l2cap_ecred_conn_req_hdr req; __le16 scid[5]; } __packed pdu; struct l2cap_chan *chan; struct pid *pid; int count; }; static void l2cap_ecred_defer_connect(struct l2cap_chan *chan, void *data) { struct l2cap_ecred_conn_data *conn = data; struct pid *pid; if (chan == conn->chan) return; if (!test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags)) return; pid = chan->ops->get_peer_pid(chan); /* Only add deferred channels with the same PID/PSM */ if (conn->pid != pid || chan->psm != conn->chan->psm || chan->ident || chan->mode != L2CAP_MODE_EXT_FLOWCTL || chan->state != BT_CONNECT) return; if (test_and_set_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; l2cap_ecred_init(chan, 0); /* Set the same ident so we can match on the rsp */ chan->ident = conn->chan->ident; /* Include all channels deferred */ conn->pdu.scid[conn->count] = cpu_to_le16(chan->scid); conn->count++; } static void l2cap_ecred_connect(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_ecred_conn_data data; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) return; if (test_and_set_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; l2cap_ecred_init(chan, 0); memset(&data, 0, sizeof(data)); data.pdu.req.psm = chan->psm; data.pdu.req.mtu = cpu_to_le16(chan->imtu); data.pdu.req.mps = cpu_to_le16(chan->mps); data.pdu.req.credits = cpu_to_le16(chan->rx_credits); data.pdu.scid[0] = cpu_to_le16(chan->scid); chan->ident = l2cap_get_ident(conn); data.count = 1; data.chan = chan; data.pid = chan->ops->get_peer_pid(chan); __l2cap_chan_list(conn, l2cap_ecred_defer_connect, &data); l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_CONN_REQ, sizeof(data.pdu.req) + data.count * sizeof(__le16), &data.pdu); } static void l2cap_le_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; if (!smp_conn_security(conn->hcon, chan->sec_level)) return; if (!chan->psm) { l2cap_chan_ready(chan); return; } if (chan->state == BT_CONNECT) { if (chan->mode == L2CAP_MODE_EXT_FLOWCTL) l2cap_ecred_connect(chan); else l2cap_le_connect(chan); } } static void l2cap_start_connection(struct l2cap_chan *chan) { if (chan->conn->hcon->type == LE_LINK) { l2cap_le_start(chan); } else { l2cap_send_conn_req(chan); } } static void l2cap_request_info(struct l2cap_conn *conn) { struct l2cap_info_req req; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) return; req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; conn->info_ident = l2cap_get_ident(conn); schedule_delayed_work(&conn->info_timer, L2CAP_INFO_TIMEOUT); l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ, sizeof(req), &req); } static bool l2cap_check_enc_key_size(struct hci_conn *hcon, struct l2cap_chan *chan) { /* The minimum encryption key size needs to be enforced by the * host stack before establishing any L2CAP connections. The * specification in theory allows a minimum of 1, but to align * BR/EDR and LE transports, a minimum of 7 is chosen. * * This check might also be called for unencrypted connections * that have no key size requirements. Ensure that the link is * actually encrypted before enforcing a key size. */ int min_key_size = hcon->hdev->min_enc_key_size; /* On FIPS security level, key size must be 16 bytes */ if (chan->sec_level == BT_SECURITY_FIPS) min_key_size = 16; return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || hcon->enc_key_size >= min_key_size); } static void l2cap_do_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; if (conn->hcon->type == LE_LINK) { l2cap_le_start(chan); return; } if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) { l2cap_request_info(conn); return; } if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; if (!l2cap_chan_check_security(chan, true) || !__l2cap_no_conn_pending(chan)) return; if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) { u32 local_feat_mask = l2cap_feat_mask; if (!disable_ertm) local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; switch (mode) { case L2CAP_MODE_ERTM: return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; case L2CAP_MODE_STREAMING: return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; default: return 0x00; } } static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err) { struct l2cap_conn *conn = chan->conn; struct l2cap_disconn_req req; if (!conn) return; if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) { __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); } req.dcid = cpu_to_le16(chan->dcid); req.scid = cpu_to_le16(chan->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ, sizeof(req), &req); l2cap_state_change_and_error(chan, BT_DISCONN, err); } /* ---- L2CAP connections ---- */ static void l2cap_conn_start(struct l2cap_conn *conn) { struct l2cap_chan *chan, *tmp; BT_DBG("conn %p", conn); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { l2cap_chan_lock(chan); if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { l2cap_chan_ready(chan); l2cap_chan_unlock(chan); continue; } if (chan->state == BT_CONNECT) { if (!l2cap_chan_check_security(chan, true) || !__l2cap_no_conn_pending(chan)) { l2cap_chan_unlock(chan); continue; } if (!l2cap_mode_supported(chan->mode, conn->feat_mask) && test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) { l2cap_chan_close(chan, ECONNRESET); l2cap_chan_unlock(chan); continue; } if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else l2cap_chan_close(chan, ECONNREFUSED); } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; char buf[128]; rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); if (l2cap_chan_check_security(chan, false)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { rsp.result = cpu_to_le16(L2CAP_CR_PEND); rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); chan->ops->defer(chan); } else { l2cap_state_change(chan, BT_CONFIG); rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); } } else { rsp.result = cpu_to_le16(L2CAP_CR_PEND); rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); } l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); if (test_bit(CONF_REQ_SENT, &chan->conf_state) || rsp.result != L2CAP_CR_SUCCESS) { l2cap_chan_unlock(chan); continue; } set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } l2cap_chan_unlock(chan); } } static void l2cap_le_conn_ready(struct l2cap_conn *conn) { struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; BT_DBG("%s conn %p", hdev->name, conn); /* For outgoing pairing which doesn't necessarily have an * associated socket (e.g. mgmt_pair_device). */ if (hcon->out) smp_conn_security(hcon, hcon->pending_sec_level); /* For LE peripheral connections, make sure the connection interval * is in the range of the minimum and maximum interval that has * been configured for this connection. If not, then trigger * the connection update procedure. */ if (hcon->role == HCI_ROLE_SLAVE && (hcon->le_conn_interval < hcon->le_conn_min_interval || hcon->le_conn_interval > hcon->le_conn_max_interval)) { struct l2cap_conn_param_update_req req; req.min = cpu_to_le16(hcon->le_conn_min_interval); req.max = cpu_to_le16(hcon->le_conn_max_interval); req.latency = cpu_to_le16(hcon->le_conn_latency); req.to_multiplier = cpu_to_le16(hcon->le_supv_timeout); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONN_PARAM_UPDATE_REQ, sizeof(req), &req); } } static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan *chan; struct hci_conn *hcon = conn->hcon; BT_DBG("conn %p", conn); if (hcon->type == ACL_LINK) l2cap_request_info(conn); mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); if (hcon->type == LE_LINK) { l2cap_le_start(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) l2cap_chan_ready(chan); } else if (chan->state == BT_CONNECT) { l2cap_do_start(chan); } l2cap_chan_unlock(chan); } mutex_unlock(&conn->lock); if (hcon->type == LE_LINK) l2cap_le_conn_ready(conn); queue_work(hcon->hdev->workqueue, &conn->pending_rx_work); } /* Notify sockets that we cannot guaranty reliability anymore */ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) { struct l2cap_chan *chan; BT_DBG("conn %p", conn); list_for_each_entry(chan, &conn->chan_l, list) { if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags)) l2cap_chan_set_err(chan, err); } } static void l2cap_info_timeout(struct work_struct *work) { struct l2cap_conn *conn = container_of(work, struct l2cap_conn, info_timer.work); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; mutex_lock(&conn->lock); l2cap_conn_start(conn); mutex_unlock(&conn->lock); } /* * l2cap_user * External modules can register l2cap_user objects on l2cap_conn. The ->probe * callback is called during registration. The ->remove callback is called * during unregistration. * An l2cap_user object can either be explicitly unregistered or when the * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon, * l2cap->hchan, .. are valid as long as the remove callback hasn't been called. * External modules must own a reference to the l2cap_conn object if they intend * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at * any time if they don't. */ int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) { struct hci_dev *hdev = conn->hcon->hdev; int ret; /* We need to check whether l2cap_conn is registered. If it is not, we * must not register the l2cap_user. l2cap_conn_del() is unregisters * l2cap_conn objects, but doesn't provide its own locking. Instead, it * relies on the parent hci_conn object to be locked. This itself relies * on the hci_dev object to be locked. So we must lock the hci device * here, too. */ hci_dev_lock(hdev); if (!list_empty(&user->list)) { ret = -EINVAL; goto out_unlock; } /* conn->hchan is NULL after l2cap_conn_del() was called */ if (!conn->hchan) { ret = -ENODEV; goto out_unlock; } ret = user->probe(conn, user); if (ret) goto out_unlock; list_add(&user->list, &conn->users); ret = 0; out_unlock: hci_dev_unlock(hdev); return ret; } EXPORT_SYMBOL(l2cap_register_user); void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) { struct hci_dev *hdev = conn->hcon->hdev; hci_dev_lock(hdev); if (list_empty(&user->list)) goto out_unlock; list_del_init(&user->list); user->remove(conn, user); out_unlock: hci_dev_unlock(hdev); } EXPORT_SYMBOL(l2cap_unregister_user); static void l2cap_unregister_all_users(struct l2cap_conn *conn) { struct l2cap_user *user; while (!list_empty(&conn->users)) { user = list_first_entry(&conn->users, struct l2cap_user, list); list_del_init(&user->list); user->remove(conn, user); } } static void l2cap_conn_del(struct hci_conn *hcon, int err) { struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_chan *chan, *l; if (!conn) return; BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); mutex_lock(&conn->lock); kfree_skb(conn->rx_skb); skb_queue_purge(&conn->pending_rx); /* We can not call flush_work(&conn->pending_rx_work) here since we * might block if we are running on a worker from the same workqueue * pending_rx_work is waiting on. */ if (work_pending(&conn->pending_rx_work)) cancel_work_sync(&conn->pending_rx_work); cancel_delayed_work_sync(&conn->id_addr_timer); l2cap_unregister_all_users(conn); /* Force the connection to be immediately dropped */ hcon->disc_timeout = 0; /* Kill channels */ list_for_each_entry_safe(chan, l, &conn->chan_l, list) { l2cap_chan_hold(chan); l2cap_chan_lock(chan); l2cap_chan_del(chan, err); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) cancel_delayed_work_sync(&conn->info_timer); hci_chan_del(conn->hchan); conn->hchan = NULL; hcon->l2cap_data = NULL; mutex_unlock(&conn->lock); l2cap_conn_put(conn); } static void l2cap_conn_free(struct kref *ref) { struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref); hci_conn_put(conn->hcon); kfree(conn); } struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn) { kref_get(&conn->ref); return conn; } EXPORT_SYMBOL(l2cap_conn_get); void l2cap_conn_put(struct l2cap_conn *conn) { kref_put(&conn->ref, l2cap_conn_free); } EXPORT_SYMBOL(l2cap_conn_put); /* ---- Socket interface ---- */ /* Find socket with psm and source / destination bdaddr. * Returns closest match. */ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr_t *src, bdaddr_t *dst, u8 link_type) { struct l2cap_chan *c, *tmp, *c1 = NULL; read_lock(&chan_list_lock); list_for_each_entry_safe(c, tmp, &chan_list, global_l) { if (state && c->state != state) continue; if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR) continue; if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) continue; if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) { int src_match, dst_match; int src_any, dst_any; /* Exact match. */ src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { if (!l2cap_chan_hold_unless_zero(c)) continue; read_unlock(&chan_list_lock); return c; } /* Closest match */ src_any = !bacmp(&c->src, BDADDR_ANY); dst_any = !bacmp(&c->dst, BDADDR_ANY); if ((src_match && dst_any) || (src_any && dst_match) || (src_any && dst_any)) c1 = c; } } if (c1) c1 = l2cap_chan_hold_unless_zero(c1); read_unlock(&chan_list_lock); return c1; } static void l2cap_monitor_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, monitor_timer.work); BT_DBG("chan %p", chan); l2cap_chan_lock(chan); if (!chan->conn) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } l2cap_tx(chan, NULL, NULL, L2CAP_EV_MONITOR_TO); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } static void l2cap_retrans_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, retrans_timer.work); BT_DBG("chan %p", chan); l2cap_chan_lock(chan); if (!chan->conn) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } l2cap_tx(chan, NULL, NULL, L2CAP_EV_RETRANS_TO); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } static void l2cap_streaming_send(struct l2cap_chan *chan, struct sk_buff_head *skbs) { struct sk_buff *skb; struct l2cap_ctrl *control; BT_DBG("chan %p, skbs %p", chan, skbs); skb_queue_splice_tail_init(skbs, &chan->tx_q); while (!skb_queue_empty(&chan->tx_q)) { skb = skb_dequeue(&chan->tx_q); bt_cb(skb)->l2cap.retries = 1; control = &bt_cb(skb)->l2cap; control->reqseq = 0; control->txseq = chan->next_tx_seq; __pack_control(chan, control, skb); if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *) skb->data, skb->len); put_unaligned_le16(fcs, skb_put(skb, L2CAP_FCS_SIZE)); } l2cap_do_send(chan, skb); BT_DBG("Sent txseq %u", control->txseq); chan->next_tx_seq = __next_seq(chan, chan->next_tx_seq); chan->frames_sent++; } } static int l2cap_ertm_send(struct l2cap_chan *chan) { struct sk_buff *skb, *tx_skb; struct l2cap_ctrl *control; int sent = 0; BT_DBG("chan %p", chan); if (chan->state != BT_CONNECTED) return -ENOTCONN; if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return 0; while (chan->tx_send_head && chan->unacked_frames < chan->remote_tx_win && chan->tx_state == L2CAP_TX_STATE_XMIT) { skb = chan->tx_send_head; bt_cb(skb)->l2cap.retries = 1; control = &bt_cb(skb)->l2cap; if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) control->final = 1; control->reqseq = chan->buffer_seq; chan->last_acked_seq = chan->buffer_seq; control->txseq = chan->next_tx_seq; __pack_control(chan, control, skb); if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *) skb->data, skb->len); put_unaligned_le16(fcs, skb_put(skb, L2CAP_FCS_SIZE)); } /* Clone after data has been modified. Data is assumed to be read-only (for locking purposes) on cloned sk_buffs. */ tx_skb = skb_clone(skb, GFP_KERNEL); if (!tx_skb) break; __set_retrans_timer(chan); chan->next_tx_seq = __next_seq(chan, chan->next_tx_seq); chan->unacked_frames++; chan->frames_sent++; sent++; if (skb_queue_is_last(&chan->tx_q, skb)) chan->tx_send_head = NULL; else chan->tx_send_head = skb_queue_next(&chan->tx_q, skb); l2cap_do_send(chan, tx_skb); BT_DBG("Sent txseq %u", control->txseq); } BT_DBG("Sent %d, %u unacked, %u in ERTM queue", sent, chan->unacked_frames, skb_queue_len(&chan->tx_q)); return sent; } static void l2cap_ertm_resend(struct l2cap_chan *chan) { struct l2cap_ctrl control; struct sk_buff *skb; struct sk_buff *tx_skb; u16 seq; BT_DBG("chan %p", chan); if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return; while (chan->retrans_list.head != L2CAP_SEQ_LIST_CLEAR) { seq = l2cap_seq_list_pop(&chan->retrans_list); skb = l2cap_ertm_seq_in_queue(&chan->tx_q, seq); if (!skb) { BT_DBG("Error: Can't retransmit seq %d, frame missing", seq); continue; } bt_cb(skb)->l2cap.retries++; control = bt_cb(skb)->l2cap; if (chan->max_tx != 0 && bt_cb(skb)->l2cap.retries > chan->max_tx) { BT_DBG("Retry limit exceeded (%d)", chan->max_tx); l2cap_send_disconn_req(chan, ECONNRESET); l2cap_seq_list_clear(&chan->retrans_list); break; } control.reqseq = chan->buffer_seq; if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) control.final = 1; else control.final = 0; if (skb_cloned(skb)) { /* Cloned sk_buffs are read-only, so we need a * writeable copy */ tx_skb = skb_copy(skb, GFP_KERNEL); } else { tx_skb = skb_clone(skb, GFP_KERNEL); } if (!tx_skb) { l2cap_seq_list_clear(&chan->retrans_list); break; } /* Update skb contents */ if (test_bit(FLAG_EXT_CTRL, &chan->flags)) { put_unaligned_le32(__pack_extended_control(&control), tx_skb->data + L2CAP_HDR_SIZE); } else { put_unaligned_le16(__pack_enhanced_control(&control), tx_skb->data + L2CAP_HDR_SIZE); } /* Update FCS */ if (chan->fcs == L2CAP_FCS_CRC16) { u16 fcs = crc16(0, (u8 *) tx_skb->data, tx_skb->len - L2CAP_FCS_SIZE); put_unaligned_le16(fcs, skb_tail_pointer(tx_skb) - L2CAP_FCS_SIZE); } l2cap_do_send(chan, tx_skb); BT_DBG("Resent txseq %d", control.txseq); chan->last_acked_seq = chan->buffer_seq; } } static void l2cap_retransmit(struct l2cap_chan *chan, struct l2cap_ctrl *control) { BT_DBG("chan %p, control %p", chan, control); l2cap_seq_list_append(&chan->retrans_list, control->reqseq); l2cap_ertm_resend(chan); } static void l2cap_retransmit_all(struct l2cap_chan *chan, struct l2cap_ctrl *control) { struct sk_buff *skb; BT_DBG("chan %p, control %p", chan, control); if (control->poll) set_bit(CONN_SEND_FBIT, &chan->conn_state); l2cap_seq_list_clear(&chan->retrans_list); if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return; if (chan->unacked_frames) { skb_queue_walk(&chan->tx_q, skb) { if (bt_cb(skb)->l2cap.txseq == control->reqseq || skb == chan->tx_send_head) break; } skb_queue_walk_from(&chan->tx_q, skb) { if (skb == chan->tx_send_head) break; l2cap_seq_list_append(&chan->retrans_list, bt_cb(skb)->l2cap.txseq); } l2cap_ertm_resend(chan); } } static void l2cap_send_ack(struct l2cap_chan *chan) { struct l2cap_ctrl control; u16 frames_to_ack = __seq_offset(chan, chan->buffer_seq, chan->last_acked_seq); int threshold; BT_DBG("chan %p last_acked_seq %d buffer_seq %d", chan, chan->last_acked_seq, chan->buffer_seq); memset(&control, 0, sizeof(control)); control.sframe = 1; if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state) && chan->rx_state == L2CAP_RX_STATE_RECV) { __clear_ack_timer(chan); control.super = L2CAP_SUPER_RNR; control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &control); } else { if (!test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) { l2cap_ertm_send(chan); /* If any i-frames were sent, they included an ack */ if (chan->buffer_seq == chan->last_acked_seq) frames_to_ack = 0; } /* Ack now if the window is 3/4ths full. * Calculate without mul or div */ threshold = chan->ack_win; threshold += threshold << 1; threshold >>= 2; BT_DBG("frames_to_ack %u, threshold %d", frames_to_ack, threshold); if (frames_to_ack >= threshold) { __clear_ack_timer(chan); control.super = L2CAP_SUPER_RR; control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &control); frames_to_ack = 0; } if (frames_to_ack) __set_ack_timer(chan); } } static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan, struct msghdr *msg, int len, int count, struct sk_buff *skb) { struct l2cap_conn *conn = chan->conn; struct sk_buff **frag; int sent = 0; if (!copy_from_iter_full(skb_put(skb, count), count, &msg->msg_iter)) return -EFAULT; sent += count; len -= count; /* Continuation fragments (no L2CAP header) */ frag = &skb_shinfo(skb)->frag_list; while (len) { struct sk_buff *tmp; count = min_t(unsigned int, conn->mtu, len); tmp = chan->ops->alloc_skb(chan, 0, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(tmp)) return PTR_ERR(tmp); *frag = tmp; if (!copy_from_iter_full(skb_put(*frag, count), count, &msg->msg_iter)) return -EFAULT; sent += count; len -= count; skb->len += (*frag)->len; skb->data_len += (*frag)->len; frag = &(*frag)->next; } return sent; } static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count, hlen = L2CAP_HDR_SIZE + L2CAP_PSMLEN_SIZE; struct l2cap_hdr *lh; BT_DBG("chan %p psm 0x%2.2x len %zu", chan, __le16_to_cpu(chan->psm), len); count = min_t(unsigned int, (conn->mtu - hlen), len); skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + L2CAP_PSMLEN_SIZE); put_unaligned(chan->psm, (__le16 *) skb_put(skb, L2CAP_PSMLEN_SIZE)); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } return skb; } static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count; struct l2cap_hdr *lh; BT_DBG("chan %p len %zu", chan, len); count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len); skb = chan->ops->alloc_skb(chan, L2CAP_HDR_SIZE, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } return skb; } static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u16 sdulen) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count, hlen; struct l2cap_hdr *lh; BT_DBG("chan %p len %zu", chan, len); if (!conn) return ERR_PTR(-ENOTCONN); hlen = __ertm_hdr_size(chan); if (sdulen) hlen += L2CAP_SDULEN_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) hlen += L2CAP_FCS_SIZE; count = min_t(unsigned int, (conn->mtu - hlen), len); skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); /* Control header is populated later */ if (test_bit(FLAG_EXT_CTRL, &chan->flags)) put_unaligned_le32(0, skb_put(skb, L2CAP_EXT_CTRL_SIZE)); else put_unaligned_le16(0, skb_put(skb, L2CAP_ENH_CTRL_SIZE)); if (sdulen) put_unaligned_le16(sdulen, skb_put(skb, L2CAP_SDULEN_SIZE)); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } bt_cb(skb)->l2cap.fcs = chan->fcs; bt_cb(skb)->l2cap.retries = 0; return skb; } static int l2cap_segment_sdu(struct l2cap_chan *chan, struct sk_buff_head *seg_queue, struct msghdr *msg, size_t len) { struct sk_buff *skb; u16 sdu_len; size_t pdu_len; u8 sar; BT_DBG("chan %p, msg %p, len %zu", chan, msg, len); /* It is critical that ERTM PDUs fit in a single HCI fragment, * so fragmented skbs are not used. The HCI layer's handling * of fragmented skbs is not compatible with ERTM's queueing. */ /* PDU size is derived from the HCI MTU */ pdu_len = chan->conn->mtu; /* Constrain PDU size for BR/EDR connections */ pdu_len = min_t(size_t, pdu_len, L2CAP_BREDR_MAX_PAYLOAD); /* Adjust for largest possible L2CAP overhead. */ if (chan->fcs) pdu_len -= L2CAP_FCS_SIZE; pdu_len -= __ertm_hdr_size(chan); /* Remote device may have requested smaller PDUs */ pdu_len = min_t(size_t, pdu_len, chan->remote_mps); if (len <= pdu_len) { sar = L2CAP_SAR_UNSEGMENTED; sdu_len = 0; pdu_len = len; } else { sar = L2CAP_SAR_START; sdu_len = len; } while (len > 0) { skb = l2cap_create_iframe_pdu(chan, msg, pdu_len, sdu_len); if (IS_ERR(skb)) { __skb_queue_purge(seg_queue); return PTR_ERR(skb); } bt_cb(skb)->l2cap.sar = sar; __skb_queue_tail(seg_queue, skb); len -= pdu_len; if (sdu_len) sdu_len = 0; if (len <= pdu_len) { sar = L2CAP_SAR_END; pdu_len = len; } else { sar = L2CAP_SAR_CONTINUE; } } return 0; } static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u16 sdulen) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count, hlen; struct l2cap_hdr *lh; BT_DBG("chan %p len %zu", chan, len); if (!conn) return ERR_PTR(-ENOTCONN); hlen = L2CAP_HDR_SIZE; if (sdulen) hlen += L2CAP_SDULEN_SIZE; count = min_t(unsigned int, (conn->mtu - hlen), len); skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; /* Create L2CAP header */ lh = skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); if (sdulen) put_unaligned_le16(sdulen, skb_put(skb, L2CAP_SDULEN_SIZE)); err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); if (unlikely(err < 0)) { kfree_skb(skb); return ERR_PTR(err); } return skb; } static int l2cap_segment_le_sdu(struct l2cap_chan *chan, struct sk_buff_head *seg_queue, struct msghdr *msg, size_t len) { struct sk_buff *skb; size_t pdu_len; u16 sdu_len; BT_DBG("chan %p, msg %p, len %zu", chan, msg, len); sdu_len = len; pdu_len = chan->remote_mps - L2CAP_SDULEN_SIZE; while (len > 0) { if (len <= pdu_len) pdu_len = len; skb = l2cap_create_le_flowctl_pdu(chan, msg, pdu_len, sdu_len); if (IS_ERR(skb)) { __skb_queue_purge(seg_queue); return PTR_ERR(skb); } __skb_queue_tail(seg_queue, skb); len -= pdu_len; if (sdu_len) { sdu_len = 0; pdu_len += L2CAP_SDULEN_SIZE; } } return 0; } static void l2cap_le_flowctl_send(struct l2cap_chan *chan) { int sent = 0; BT_DBG("chan %p", chan); while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); chan->tx_credits--; sent++; } BT_DBG("Sent %d credits %u queued %u", sent, chan->tx_credits, skb_queue_len(&chan->tx_q)); } static void l2cap_tx_timestamp(struct sk_buff *skb, const struct sockcm_cookie *sockc, size_t len) { struct sock *sk = skb ? skb->sk : NULL; if (sk && sk->sk_type == SOCK_STREAM) hci_setup_tx_timestamp(skb, len, sockc); else hci_setup_tx_timestamp(skb, 1, sockc); } static void l2cap_tx_timestamp_seg(struct sk_buff_head *queue, const struct sockcm_cookie *sockc, size_t len) { struct sk_buff *skb = skb_peek(queue); struct sock *sk = skb ? skb->sk : NULL; if (sk && sk->sk_type == SOCK_STREAM) l2cap_tx_timestamp(skb_peek_tail(queue), sockc, len); else l2cap_tx_timestamp(skb, sockc, len); } int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, const struct sockcm_cookie *sockc) { struct sk_buff *skb; int err; struct sk_buff_head seg_queue; if (!chan->conn) return -ENOTCONN; /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { skb = l2cap_create_connless_pdu(chan, msg, len); if (IS_ERR(skb)) return PTR_ERR(skb); l2cap_tx_timestamp(skb, sockc, len); l2cap_do_send(chan, skb); return len; } switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: case L2CAP_MODE_EXT_FLOWCTL: /* Check outgoing MTU */ if (len > chan->omtu) return -EMSGSIZE; __skb_queue_head_init(&seg_queue); err = l2cap_segment_le_sdu(chan, &seg_queue, msg, len); if (chan->state != BT_CONNECTED) { __skb_queue_purge(&seg_queue); err = -ENOTCONN; } if (err) return err; l2cap_tx_timestamp_seg(&seg_queue, sockc, len); skb_queue_splice_tail_init(&seg_queue, &chan->tx_q); l2cap_le_flowctl_send(chan); if (!chan->tx_credits) chan->ops->suspend(chan); err = len; break; case L2CAP_MODE_BASIC: /* Check outgoing MTU */ if (len > chan->omtu) return -EMSGSIZE; /* Create a basic PDU */ skb = l2cap_create_basic_pdu(chan, msg, len); if (IS_ERR(skb)) return PTR_ERR(skb); l2cap_tx_timestamp(skb, sockc, len); l2cap_do_send(chan, skb); err = len; break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: /* Check outgoing MTU */ if (len > chan->omtu) { err = -EMSGSIZE; break; } __skb_queue_head_init(&seg_queue); /* Do segmentation before calling in to the state machine, * since it's possible to block while waiting for memory * allocation. */ err = l2cap_segment_sdu(chan, &seg_queue, msg, len); if (err) break; if (chan->mode == L2CAP_MODE_ERTM) { /* TODO: ERTM mode timestamping */ l2cap_tx(chan, NULL, &seg_queue, L2CAP_EV_DATA_REQUEST); } else { l2cap_tx_timestamp_seg(&seg_queue, sockc, len); l2cap_streaming_send(chan, &seg_queue); } err = len; /* If the skbs were not queued for sending, they'll still be in * seg_queue and need to be purged. */ __skb_queue_purge(&seg_queue); break; default: BT_DBG("bad state %1.1x", chan->mode); err = -EBADFD; } return err; } EXPORT_SYMBOL_GPL(l2cap_chan_send); static void l2cap_send_srej(struct l2cap_chan *chan, u16 txseq) { struct l2cap_ctrl control; u16 seq; BT_DBG("chan %p, txseq %u", chan, txseq); memset(&control, 0, sizeof(control)); control.sframe = 1; control.super = L2CAP_SUPER_SREJ; for (seq = chan->expected_tx_seq; seq != txseq; seq = __next_seq(chan, seq)) { if (!l2cap_ertm_seq_in_queue(&chan->srej_q, seq)) { control.reqseq = seq; l2cap_send_sframe(chan, &control); l2cap_seq_list_append(&chan->srej_list, seq); } } chan->expected_tx_seq = __next_seq(chan, txseq); } static void l2cap_send_srej_tail(struct l2cap_chan *chan) { struct l2cap_ctrl control; BT_DBG("chan %p", chan); if (chan->srej_list.tail == L2CAP_SEQ_LIST_CLEAR) return; memset(&control, 0, sizeof(control)); control.sframe = 1; control.super = L2CAP_SUPER_SREJ; control.reqseq = chan->srej_list.tail; l2cap_send_sframe(chan, &control); } static void l2cap_send_srej_list(struct l2cap_chan *chan, u16 txseq) { struct l2cap_ctrl control; u16 initial_head; u16 seq; BT_DBG("chan %p, txseq %u", chan, txseq); memset(&control, 0, sizeof(control)); control.sframe = 1; control.super = L2CAP_SUPER_SREJ; /* Capture initial list head to allow only one pass through the list. */ initial_head = chan->srej_list.head; do { seq = l2cap_seq_list_pop(&chan->srej_list); if (seq == txseq || seq == L2CAP_SEQ_LIST_CLEAR) break; control.reqseq = seq; l2cap_send_sframe(chan, &control); l2cap_seq_list_append(&chan->srej_list, seq); } while (chan->srej_list.head != initial_head); } static void l2cap_process_reqseq(struct l2cap_chan *chan, u16 reqseq) { struct sk_buff *acked_skb; u16 ackseq; BT_DBG("chan %p, reqseq %u", chan, reqseq); if (chan->unacked_frames == 0 || reqseq == chan->expected_ack_seq) return; BT_DBG("expected_ack_seq %u, unacked_frames %u", chan->expected_ack_seq, chan->unacked_frames); for (ackseq = chan->expected_ack_seq; ackseq != reqseq; ackseq = __next_seq(chan, ackseq)) { acked_skb = l2cap_ertm_seq_in_queue(&chan->tx_q, ackseq); if (acked_skb) { skb_unlink(acked_skb, &chan->tx_q); kfree_skb(acked_skb); chan->unacked_frames--; } } chan->expected_ack_seq = reqseq; if (chan->unacked_frames == 0) __clear_retrans_timer(chan); BT_DBG("unacked_frames %u", chan->unacked_frames); } static void l2cap_abort_rx_srej_sent(struct l2cap_chan *chan) { BT_DBG("chan %p", chan); chan->expected_tx_seq = chan->buffer_seq; l2cap_seq_list_clear(&chan->srej_list); skb_queue_purge(&chan->srej_q); chan->rx_state = L2CAP_RX_STATE_RECV; } static void l2cap_tx_state_xmit(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event) { BT_DBG("chan %p, control %p, skbs %p, event %d", chan, control, skbs, event); switch (event) { case L2CAP_EV_DATA_REQUEST: if (chan->tx_send_head == NULL) chan->tx_send_head = skb_peek(skbs); skb_queue_splice_tail_init(skbs, &chan->tx_q); l2cap_ertm_send(chan); break; case L2CAP_EV_LOCAL_BUSY_DETECTED: BT_DBG("Enter LOCAL_BUSY"); set_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (chan->rx_state == L2CAP_RX_STATE_SREJ_SENT) { /* The SREJ_SENT state must be aborted if we are to * enter the LOCAL_BUSY state. */ l2cap_abort_rx_srej_sent(chan); } l2cap_send_ack(chan); break; case L2CAP_EV_LOCAL_BUSY_CLEAR: BT_DBG("Exit LOCAL_BUSY"); clear_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (test_bit(CONN_RNR_SENT, &chan->conn_state)) { struct l2cap_ctrl local_control; memset(&local_control, 0, sizeof(local_control)); local_control.sframe = 1; local_control.super = L2CAP_SUPER_RR; local_control.poll = 1; local_control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &local_control); chan->retry_count = 1; __set_monitor_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; } break; case L2CAP_EV_RECV_REQSEQ_AND_FBIT: l2cap_process_reqseq(chan, control->reqseq); break; case L2CAP_EV_EXPLICIT_POLL: l2cap_send_rr_or_rnr(chan, 1); chan->retry_count = 1; __set_monitor_timer(chan); __clear_ack_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; break; case L2CAP_EV_RETRANS_TO: l2cap_send_rr_or_rnr(chan, 1); chan->retry_count = 1; __set_monitor_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; break; case L2CAP_EV_RECV_FBIT: /* Nothing to process */ break; default: break; } } static void l2cap_tx_state_wait_f(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event) { BT_DBG("chan %p, control %p, skbs %p, event %d", chan, control, skbs, event); switch (event) { case L2CAP_EV_DATA_REQUEST: if (chan->tx_send_head == NULL) chan->tx_send_head = skb_peek(skbs); /* Queue data, but don't send. */ skb_queue_splice_tail_init(skbs, &chan->tx_q); break; case L2CAP_EV_LOCAL_BUSY_DETECTED: BT_DBG("Enter LOCAL_BUSY"); set_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (chan->rx_state == L2CAP_RX_STATE_SREJ_SENT) { /* The SREJ_SENT state must be aborted if we are to * enter the LOCAL_BUSY state. */ l2cap_abort_rx_srej_sent(chan); } l2cap_send_ack(chan); break; case L2CAP_EV_LOCAL_BUSY_CLEAR: BT_DBG("Exit LOCAL_BUSY"); clear_bit(CONN_LOCAL_BUSY, &chan->conn_state); if (test_bit(CONN_RNR_SENT, &chan->conn_state)) { struct l2cap_ctrl local_control; memset(&local_control, 0, sizeof(local_control)); local_control.sframe = 1; local_control.super = L2CAP_SUPER_RR; local_control.poll = 1; local_control.reqseq = chan->buffer_seq; l2cap_send_sframe(chan, &local_control); chan->retry_count = 1; __set_monitor_timer(chan); chan->tx_state = L2CAP_TX_STATE_WAIT_F; } break; case L2CAP_EV_RECV_REQSEQ_AND_FBIT: l2cap_process_reqseq(chan, control->reqseq); fallthrough; case L2CAP_EV_RECV_FBIT: if (control && control->final) { __clear_monitor_timer(chan); if (chan->unacked_frames > 0) __set_retrans_timer(chan); chan->retry_count = 0; chan->tx_state = L2CAP_TX_STATE_XMIT; BT_DBG("recv fbit tx_state 0x2.2%x", chan->tx_state); } break; case L2CAP_EV_EXPLICIT_POLL: /* Ignore */ break; case L2CAP_EV_MONITOR_TO: if (chan->max_tx == 0 || chan->retry_count < chan->max_tx) { l2cap_send_rr_or_rnr(chan, 1); __set_monitor_timer(chan); chan->retry_count++; } else { l2cap_send_disconn_req(chan, ECONNABORTED); } break; default: break; } } static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event) { BT_DBG("chan %p, control %p, skbs %p, event %d, state %d", chan, control, skbs, event, chan->tx_state); switch (chan->tx_state) { case L2CAP_TX_STATE_XMIT: l2cap_tx_state_xmit(chan, control, skbs, event); break; case L2CAP_TX_STATE_WAIT_F: l2cap_tx_state_wait_f(chan, control, skbs, event); break; default: /* Ignore event */ break; } } static void l2cap_pass_to_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control) { BT_DBG("chan %p, control %p", chan, control); l2cap_tx(chan, control, NULL, L2CAP_EV_RECV_REQSEQ_AND_FBIT); } static void l2cap_pass_to_tx_fbit(struct l2cap_chan *chan, struct l2cap_ctrl *control) { BT_DBG("chan %p, control %p", chan, control); l2cap_tx(chan, control, NULL, L2CAP_EV_RECV_FBIT); } /* Copy frame to all raw sockets on that connection */ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) { struct sk_buff *nskb; struct l2cap_chan *chan; BT_DBG("conn %p", conn); list_for_each_entry(chan, &conn->chan_l, list) { if (chan->chan_type != L2CAP_CHAN_RAW) continue; /* Don't send frame to the channel it came from */ if (bt_cb(skb)->l2cap.chan == chan) continue; nskb = skb_clone(skb, GFP_KERNEL); if (!nskb) continue; if (chan->ops->recv(chan, nskb)) kfree_skb(nskb); } } /* ---- L2CAP signalling commands ---- */ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data) { struct sk_buff *skb, **frag; struct l2cap_cmd_hdr *cmd; struct l2cap_hdr *lh; int len, count; BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %u", conn, code, ident, dlen); if (conn->mtu < L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE) return NULL; len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen; count = min_t(unsigned int, conn->mtu, len); skb = bt_skb_alloc(count, GFP_KERNEL); if (!skb) return NULL; lh = skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); if (conn->hcon->type == LE_LINK) lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING); else lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); cmd = skb_put(skb, L2CAP_CMD_HDR_SIZE); cmd->code = code; cmd->ident = ident; cmd->len = cpu_to_le16(dlen); if (dlen) { count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE; skb_put_data(skb, data, count); data += count; } len -= skb->len; /* Continuation fragments (no L2CAP header) */ frag = &skb_shinfo(skb)->frag_list; while (len) { count = min_t(unsigned int, conn->mtu, len); *frag = bt_skb_alloc(count, GFP_KERNEL); if (!*frag) goto fail; skb_put_data(*frag, data, count); len -= count; data += count; frag = &(*frag)->next; } return skb; fail: kfree_skb(skb); return NULL; } static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned long *val) { struct l2cap_conf_opt *opt = *ptr; int len; len = L2CAP_CONF_OPT_SIZE + opt->len; *ptr += len; *type = opt->type; *olen = opt->len; switch (opt->len) { case 1: *val = *((u8 *) opt->val); break; case 2: *val = get_unaligned_le16(opt->val); break; case 4: *val = get_unaligned_le32(opt->val); break; default: *val = (unsigned long) opt->val; break; } BT_DBG("type 0x%2.2x len %u val 0x%lx", *type, opt->len, *val); return len; } static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val, size_t size) { struct l2cap_conf_opt *opt = *ptr; BT_DBG("type 0x%2.2x len %u val 0x%lx", type, len, val); if (size < L2CAP_CONF_OPT_SIZE + len) return; opt->type = type; opt->len = len; switch (len) { case 1: *((u8 *) opt->val) = val; break; case 2: put_unaligned_le16(val, opt->val); break; case 4: put_unaligned_le32(val, opt->val); break; default: memcpy(opt->val, (void *) val, len); break; } *ptr += L2CAP_CONF_OPT_SIZE + len; } static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan, size_t size) { struct l2cap_conf_efs efs; switch (chan->mode) { case L2CAP_MODE_ERTM: efs.id = chan->local_id; efs.stype = chan->local_stype; efs.msdu = cpu_to_le16(chan->local_msdu); efs.sdu_itime = cpu_to_le32(chan->local_sdu_itime); efs.acc_lat = cpu_to_le32(L2CAP_DEFAULT_ACC_LAT); efs.flush_to = cpu_to_le32(L2CAP_EFS_DEFAULT_FLUSH_TO); break; case L2CAP_MODE_STREAMING: efs.id = 1; efs.stype = L2CAP_SERV_BESTEFFORT; efs.msdu = cpu_to_le16(chan->local_msdu); efs.sdu_itime = cpu_to_le32(chan->local_sdu_itime); efs.acc_lat = 0; efs.flush_to = 0; break; default: return; } l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, size); } static void l2cap_ack_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, ack_timer.work); u16 frames_to_ack; BT_DBG("chan %p", chan); l2cap_chan_lock(chan); frames_to_ack = __seq_offset(chan, chan->buffer_seq, chan->last_acked_seq); if (frames_to_ack) l2cap_send_rr_or_rnr(chan, 0); l2cap_chan_unlock(chan); l2cap_chan_put(chan); } int l2cap_ertm_init(struct l2cap_chan *chan) { int err; chan->next_tx_seq = 0; chan->expected_tx_seq = 0; chan->expected_ack_seq = 0; chan->unacked_frames = 0; chan->buffer_seq = 0; chan->frames_sent = 0; chan->last_acked_seq = 0; chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; skb_queue_head_init(&chan->tx_q); if (chan->mode != L2CAP_MODE_ERTM) return 0; chan->rx_state = L2CAP_RX_STATE_RECV; chan->tx_state = L2CAP_TX_STATE_XMIT; skb_queue_head_init(&chan->srej_q); err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win); if (err < 0) return err; err = l2cap_seq_list_init(&chan->retrans_list, chan->remote_tx_win); if (err < 0) l2cap_seq_list_free(&chan->srej_list); return err; } static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) { switch (mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: if (l2cap_mode_supported(mode, remote_feat_mask)) return mode; fallthrough; default: return L2CAP_MODE_BASIC; } } static inline bool __l2cap_ews_supported(struct l2cap_conn *conn) { return (conn->feat_mask & L2CAP_FEAT_EXT_WINDOW); } static inline bool __l2cap_efs_supported(struct l2cap_conn *conn) { return (conn->feat_mask & L2CAP_FEAT_EXT_FLOW); } static void __l2cap_set_ertm_timeouts(struct l2cap_chan *chan, struct l2cap_conf_rfc *rfc) { rfc->retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); rfc->monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); } static inline void l2cap_txwin_setup(struct l2cap_chan *chan) { if (chan->tx_win > L2CAP_DEFAULT_TX_WINDOW && __l2cap_ews_supported(chan->conn)) { /* use extended control field */ set_bit(FLAG_EXT_CTRL, &chan->flags); chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; } else { chan->tx_win = min_t(u16, chan->tx_win, L2CAP_DEFAULT_TX_WINDOW); chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; } chan->ack_win = chan->tx_win; } static void l2cap_mtu_auto(struct l2cap_chan *chan) { struct hci_conn *conn = chan->conn->hcon; chan->imtu = L2CAP_DEFAULT_MIN_MTU; /* The 2-DH1 packet has between 2 and 56 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_2DH1)) chan->imtu = 54; /* The 3-DH1 packet has between 2 and 85 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_3DH1)) chan->imtu = 83; /* The 2-DH3 packet has between 2 and 369 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_2DH3)) chan->imtu = 367; /* The 3-DH3 packet has between 2 and 554 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_3DH3)) chan->imtu = 552; /* The 2-DH5 packet has between 2 and 681 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_2DH5)) chan->imtu = 679; /* The 3-DH5 packet has between 2 and 1023 information bytes * (including the 2-byte payload header) */ if (!(conn->pkt_type & HCI_3DH5)) chan->imtu = 1021; } static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_req *req = data; struct l2cap_conf_rfc rfc = { .mode = chan->mode }; void *ptr = req->data; void *endptr = data + data_size; u16 size; BT_DBG("chan %p", chan); if (chan->num_conf_req || chan->num_conf_rsp) goto done; switch (chan->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) break; if (__l2cap_efs_supported(chan->conn)) set_bit(FLAG_EFS_ENABLE, &chan->flags); fallthrough; default: chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask); break; } done: if (chan->imtu != L2CAP_DEFAULT_MTU) { if (!chan->imtu) l2cap_mtu_auto(chan); l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); } switch (chan->mode) { case L2CAP_MODE_BASIC: if (disable_ertm) break; if (!(chan->conn->feat_mask & L2CAP_FEAT_ERTM) && !(chan->conn->feat_mask & L2CAP_FEAT_STREAMING)) break; rfc.mode = L2CAP_MODE_BASIC; rfc.txwin_size = 0; rfc.max_transmit = 0; rfc.retrans_timeout = 0; rfc.monitor_timeout = 0; rfc.max_pdu_size = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; case L2CAP_MODE_ERTM: rfc.mode = L2CAP_MODE_ERTM; rfc.max_transmit = chan->max_tx; __l2cap_set_ertm_timeouts(chan, &rfc); size = min_t(u16, L2CAP_DEFAULT_MAX_PDU_SIZE, chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); l2cap_txwin_setup(chan); rfc.txwin_size = min_t(u16, chan->tx_win, L2CAP_DEFAULT_TX_WINDOW); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, chan->fcs, endptr - ptr); } break; case L2CAP_MODE_STREAMING: l2cap_txwin_setup(chan); rfc.mode = L2CAP_MODE_STREAMING; rfc.txwin_size = 0; rfc.max_transmit = 0; rfc.retrans_timeout = 0; rfc.monitor_timeout = 0; size = min_t(u16, L2CAP_DEFAULT_MAX_PDU_SIZE, chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, chan->fcs, endptr - ptr); } break; } req->dcid = cpu_to_le16(chan->dcid); req->flags = cpu_to_le16(0); return ptr - data; } static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_rsp *rsp = data; void *ptr = rsp->data; void *endptr = data + data_size; void *req = chan->conf_req; int len = chan->conf_len; int type, hint, olen; unsigned long val; struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; u8 remote_efs = 0; u16 mtu = 0; u16 result = L2CAP_CONF_SUCCESS; u16 size; BT_DBG("chan %p", chan); while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&req, &type, &olen, &val); if (len < 0) break; hint = type & L2CAP_CONF_HINT; type &= L2CAP_CONF_MASK; switch (type) { case L2CAP_CONF_MTU: if (olen != 2) break; mtu = val; break; case L2CAP_CONF_FLUSH_TO: if (olen != 2) break; chan->flush_to = val; break; case L2CAP_CONF_QOS: break; case L2CAP_CONF_RFC: if (olen != sizeof(rfc)) break; memcpy(&rfc, (void *) val, olen); break; case L2CAP_CONF_FCS: if (olen != 1) break; if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; case L2CAP_CONF_EFS: if (olen != sizeof(efs)) break; remote_efs = 1; memcpy(&efs, (void *) val, olen); break; case L2CAP_CONF_EWS: if (olen != 2) break; return -ECONNREFUSED; default: if (hint) break; result = L2CAP_CONF_UNKNOWN; l2cap_add_conf_opt(&ptr, (u8)type, sizeof(u8), type, endptr - ptr); break; } } if (chan->num_conf_rsp || chan->num_conf_req > 1) goto done; switch (chan->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: if (!test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) { chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask); break; } if (remote_efs) { if (__l2cap_efs_supported(chan->conn)) set_bit(FLAG_EFS_ENABLE, &chan->flags); else return -ECONNREFUSED; } if (chan->mode != rfc.mode) return -ECONNREFUSED; break; } done: if (chan->mode != rfc.mode) { result = L2CAP_CONF_UNACCEPT; rfc.mode = chan->mode; if (chan->num_conf_rsp == 1) return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); } if (result == L2CAP_CONF_SUCCESS) { /* Configure output options and let the other side know * which ones we don't like. */ /* If MTU is not provided in configure request, try adjusting it * to the current output MTU if it has been set * * Bluetooth Core 6.1, Vol 3, Part A, Section 4.5 * * Each configuration parameter value (if any is present) in an * L2CAP_CONFIGURATION_RSP packet reflects an ‘adjustment’ to a * configuration parameter value that has been sent (or, in case * of default values, implied) in the corresponding * L2CAP_CONFIGURATION_REQ packet. */ if (!mtu) { /* Only adjust for ERTM channels as for older modes the * remote stack may not be able to detect that the * adjustment causing it to silently drop packets. */ if (chan->mode == L2CAP_MODE_ERTM && chan->omtu && chan->omtu != L2CAP_DEFAULT_MTU) mtu = chan->omtu; else mtu = L2CAP_DEFAULT_MTU; } if (mtu < L2CAP_DEFAULT_MIN_MTU) result = L2CAP_CONF_UNACCEPT; else { chan->omtu = mtu; set_bit(CONF_MTU_DONE, &chan->conf_state); } l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu, endptr - ptr); if (remote_efs) { if (chan->local_stype != L2CAP_SERV_NOTRAFIC && efs.stype != L2CAP_SERV_NOTRAFIC && efs.stype != chan->local_stype) { result = L2CAP_CONF_UNACCEPT; if (chan->num_conf_req >= 1) return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, endptr - ptr); } else { /* Send PENDING Conf Rsp */ result = L2CAP_CONF_PENDING; set_bit(CONF_LOC_CONF_PEND, &chan->conf_state); } } switch (rfc.mode) { case L2CAP_MODE_BASIC: chan->fcs = L2CAP_FCS_NONE; set_bit(CONF_MODE_DONE, &chan->conf_state); break; case L2CAP_MODE_ERTM: if (!test_bit(CONF_EWS_RECV, &chan->conf_state)) chan->remote_tx_win = rfc.txwin_size; else rfc.txwin_size = L2CAP_DEFAULT_TX_WINDOW; chan->remote_max_tx = rfc.max_transmit; size = min_t(u16, le16_to_cpu(rfc.max_pdu_size), chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); chan->remote_mps = size; __l2cap_set_ertm_timeouts(chan, &rfc); set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (remote_efs && test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; chan->remote_stype = efs.stype; chan->remote_msdu = le16_to_cpu(efs.msdu); chan->remote_flush_to = le32_to_cpu(efs.flush_to); chan->remote_acc_lat = le32_to_cpu(efs.acc_lat); chan->remote_sdu_itime = le32_to_cpu(efs.sdu_itime); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, endptr - ptr); } break; case L2CAP_MODE_STREAMING: size = min_t(u16, le16_to_cpu(rfc.max_pdu_size), chan->conn->mtu - L2CAP_EXT_HDR_SIZE - L2CAP_SDULEN_SIZE - L2CAP_FCS_SIZE); rfc.max_pdu_size = cpu_to_le16(size); chan->remote_mps = size; set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; default: result = L2CAP_CONF_UNACCEPT; memset(&rfc, 0, sizeof(rfc)); rfc.mode = chan->mode; } if (result == L2CAP_CONF_SUCCESS) set_bit(CONF_OUTPUT_DONE, &chan->conf_state); } rsp->scid = cpu_to_le16(chan->dcid); rsp->result = cpu_to_le16(result); rsp->flags = cpu_to_le16(0); return ptr - data; } static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, void *data, size_t size, u16 *result) { struct l2cap_conf_req *req = data; void *ptr = req->data; void *endptr = data + size; int type, olen; unsigned long val; struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; BT_DBG("chan %p, rsp %p, len %d, req %p", chan, rsp, len, data); while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); if (len < 0) break; switch (type) { case L2CAP_CONF_MTU: if (olen != 2) break; if (val < L2CAP_DEFAULT_MIN_MTU) { *result = L2CAP_CONF_UNACCEPT; chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: if (olen != 2) break; chan->flush_to = val; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: if (olen != sizeof(rfc)) break; memcpy(&rfc, (void *)val, olen); if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) && rfc.mode != chan->mode) return -ECONNREFUSED; chan->fcs = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: if (olen != 2) break; chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: if (olen != sizeof(efs)) break; memcpy(&efs, (void *)val, olen); if (chan->local_stype != L2CAP_SERV_NOTRAFIC && efs.stype != L2CAP_SERV_NOTRAFIC && efs.stype != chan->local_stype) return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: if (olen != 1) break; if (*result == L2CAP_CONF_PENDING) if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; } } if (chan->mode == L2CAP_MODE_BASIC && chan->mode != rfc.mode) return -ECONNREFUSED; chan->mode = rfc.mode; if (*result == L2CAP_CONF_SUCCESS || *result == L2CAP_CONF_PENDING) { switch (rfc.mode) { case L2CAP_MODE_ERTM: chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); chan->mps = le16_to_cpu(rfc.max_pdu_size); if (!test_bit(FLAG_EXT_CTRL, &chan->flags)) chan->ack_win = min_t(u16, chan->ack_win, rfc.txwin_size); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->local_msdu = le16_to_cpu(efs.msdu); chan->local_sdu_itime = le32_to_cpu(efs.sdu_itime); chan->local_acc_lat = le32_to_cpu(efs.acc_lat); chan->local_flush_to = le32_to_cpu(efs.flush_to); } break; case L2CAP_MODE_STREAMING: chan->mps = le16_to_cpu(rfc.max_pdu_size); } } req->dcid = cpu_to_le16(chan->dcid); req->flags = cpu_to_le16(0); return ptr - data; } static int l2cap_build_conf_rsp(struct l2cap_chan *chan, void *data, u16 result, u16 flags) { struct l2cap_conf_rsp *rsp = data; void *ptr = rsp->data; BT_DBG("chan %p", chan); rsp->scid = cpu_to_le16(chan->dcid); rsp->result = cpu_to_le16(result); rsp->flags = cpu_to_le16(flags); return ptr - data; } void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_le_conn_rsp rsp; struct l2cap_conn *conn = chan->conn; BT_DBG("chan %p", chan); rsp.dcid = cpu_to_le16(chan->scid); rsp.mtu = cpu_to_le16(chan->imtu); rsp.mps = cpu_to_le16(chan->mps); rsp.credits = cpu_to_le16(chan->rx_credits); rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); } static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data) { int *result = data; if (*result || test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; switch (chan->state) { case BT_CONNECT2: /* If channel still pending accept add to result */ (*result)++; return; case BT_CONNECTED: return; default: /* If not connected or pending accept it has been refused */ *result = -ECONNREFUSED; return; } } struct l2cap_ecred_rsp_data { struct { struct l2cap_ecred_conn_rsp_hdr rsp; __le16 scid[L2CAP_ECRED_MAX_CID]; } __packed pdu; int count; }; static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) { struct l2cap_ecred_rsp_data *rsp = data; struct l2cap_ecred_conn_rsp *rsp_flex = container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr); /* Check if channel for outgoing connection or if it wasn't deferred * since in those cases it must be skipped. */ if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags) || !test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags)) return; /* Reset ident so only one response is sent */ chan->ident = 0; /* Include all channels pending with the same ident */ if (!rsp->pdu.rsp.result) rsp_flex->dcid[rsp->count++] = cpu_to_le16(chan->scid); else l2cap_chan_del(chan, ECONNRESET); } void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_ecred_rsp_data data; u16 id = chan->ident; int result = 0; if (!id) return; BT_DBG("chan %p id %d", chan, id); memset(&data, 0, sizeof(data)); data.pdu.rsp.mtu = cpu_to_le16(chan->imtu); data.pdu.rsp.mps = cpu_to_le16(chan->mps); data.pdu.rsp.credits = cpu_to_le16(chan->rx_credits); data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); /* Verify that all channels are ready */ __l2cap_chan_list_id(conn, id, l2cap_ecred_list_defer, &result); if (result > 0) return; if (result < 0) data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_AUTHORIZATION); /* Build response */ __l2cap_chan_list_id(conn, id, l2cap_ecred_rsp_defer, &data); l2cap_send_cmd(conn, id, L2CAP_ECRED_CONN_RSP, sizeof(data.pdu.rsp) + (data.count * sizeof(__le16)), &data.pdu); } void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn_rsp rsp; struct l2cap_conn *conn = chan->conn; u8 buf[128]; u8 rsp_code; rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); rsp_code = L2CAP_CONN_RSP; BT_DBG("chan %p rsp_code %u", chan, rsp_code); l2cap_send_cmd(conn, chan->ident, rsp_code, sizeof(rsp), &rsp); if (test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) return; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) { int type, olen; unsigned long val; /* Use sane default values in case a misbehaving remote device * did not send an RFC or extended window size option. */ u16 txwin_ext = chan->ack_win; struct l2cap_conf_rfc rfc = { .mode = chan->mode, .retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO), .monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO), .max_pdu_size = cpu_to_le16(chan->imtu), .txwin_size = min_t(u16, chan->ack_win, L2CAP_DEFAULT_TX_WINDOW), }; BT_DBG("chan %p, rsp %p, len %d", chan, rsp, len); if ((chan->mode != L2CAP_MODE_ERTM) && (chan->mode != L2CAP_MODE_STREAMING)) return; while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); if (len < 0) break; switch (type) { case L2CAP_CONF_RFC: if (olen != sizeof(rfc)) break; memcpy(&rfc, (void *)val, olen); break; case L2CAP_CONF_EWS: if (olen != 2) break; txwin_ext = val; break; } } switch (rfc.mode) { case L2CAP_MODE_ERTM: chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); chan->mps = le16_to_cpu(rfc.max_pdu_size); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) chan->ack_win = min_t(u16, chan->ack_win, txwin_ext); else chan->ack_win = min_t(u16, chan->ack_win, rfc.txwin_size); break; case L2CAP_MODE_STREAMING: chan->mps = le16_to_cpu(rfc.max_pdu_size); } } static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data; if (cmd_len < sizeof(*rej)) return -EPROTO; if (rej->reason != L2CAP_REJ_NOT_UNDERSTOOD) return 0; if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && cmd->ident == conn->info_ident) { cancel_delayed_work(&conn->info_timer); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); } return 0; } static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data, u8 rsp_code) { struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; struct l2cap_chan *chan = NULL, *pchan = NULL; int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); __le16 psm = req->psm; BT_DBG("psm 0x%2.2x scid 0x%4.4x", __le16_to_cpu(psm), scid); /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, ACL_LINK); if (!pchan) { result = L2CAP_CR_BAD_PSM; goto response; } l2cap_chan_lock(pchan); /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(L2CAP_PSM_SDP) && (!hci_conn_check_link_mode(conn->hcon) || !l2cap_check_enc_key_size(conn->hcon, pchan))) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; } result = L2CAP_CR_NO_MEM; /* Check for valid dynamic CID range (as per Erratum 3253) */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_DYN_END) { result = L2CAP_CR_INVALID_SCID; goto response; } /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { result = L2CAP_CR_SCID_IN_USE; goto response; } chan = pchan->ops->new_connection(pchan); if (!chan) goto response; /* For certain devices (ex: HID mouse), support for authentication, * pairing and bonding is optional. For such devices, inorder to avoid * the ACL alive for too long after L2CAP disconnection, reset the ACL * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect. */ conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); chan->src_type = bdaddr_src_type(conn->hcon); chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; __l2cap_chan_add(conn, chan); dcid = chan->scid; __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); chan->ident = cmd->ident; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { if (l2cap_chan_check_security(chan, false)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { l2cap_state_change(chan, BT_CONFIG); result = L2CAP_CR_SUCCESS; status = L2CAP_CS_NO_INFO; } } else { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; status = L2CAP_CS_AUTHEN_PEND; } } else { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; status = L2CAP_CS_NO_INFO; } response: rsp.scid = cpu_to_le16(scid); rsp.dcid = cpu_to_le16(dcid); rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(status); l2cap_send_cmd(conn, cmd->ident, rsp_code, sizeof(rsp), &rsp); if (!pchan) return; if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { struct l2cap_info_req info; info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; conn->info_ident = l2cap_get_ident(conn); schedule_delayed_work(&conn->info_timer, L2CAP_INFO_TIMEOUT); l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ, sizeof(info), &info); } if (chan && !test_bit(CONF_REQ_SENT, &chan->conf_state) && result == L2CAP_CR_SUCCESS) { u8 buf[128]; set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } l2cap_chan_unlock(pchan); l2cap_chan_put(pchan); } static int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { if (cmd_len < sizeof(struct l2cap_conn_req)) return -EPROTO; l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP); return 0; } static int l2cap_connect_create_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data; u16 scid, dcid, result, status; struct l2cap_chan *chan; u8 req[128]; int err; if (cmd_len < sizeof(*rsp)) return -EPROTO; scid = __le16_to_cpu(rsp->scid); dcid = __le16_to_cpu(rsp->dcid); result = __le16_to_cpu(rsp->result); status = __le16_to_cpu(rsp->status); if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START || dcid > L2CAP_CID_DYN_END)) return -EPROTO; BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); if (scid) { chan = __l2cap_get_chan_by_scid(conn, scid); if (!chan) return -EBADSLT; } else { chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) return -EBADSLT; } chan = l2cap_chan_hold_unless_zero(chan); if (!chan) return -EBADSLT; err = 0; l2cap_chan_lock(chan); switch (result) { case L2CAP_CR_SUCCESS: if (__l2cap_get_chan_by_dcid(conn, dcid)) { err = -EBADSLT; break; } l2cap_state_change(chan, BT_CONFIG); chan->ident = 0; chan->dcid = dcid; clear_bit(CONF_CONNECT_PEND, &chan->conf_state); if (test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) break; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, req, sizeof(req)), req); chan->num_conf_req++; break; case L2CAP_CR_PEND: set_bit(CONF_CONNECT_PEND, &chan->conf_state); break; default: l2cap_chan_del(chan, ECONNREFUSED); break; } l2cap_chan_unlock(chan); l2cap_chan_put(chan); return err; } static inline void set_default_fcs(struct l2cap_chan *chan) { /* FCS is enabled only in ERTM or streaming mode, if one or both * sides request it. */ if (chan->mode != L2CAP_MODE_ERTM && chan->mode != L2CAP_MODE_STREAMING) chan->fcs = L2CAP_FCS_NONE; else if (!test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) chan->fcs = L2CAP_FCS_CRC16; } static void l2cap_send_efs_conf_rsp(struct l2cap_chan *chan, void *data, u8 ident, u16 flags) { struct l2cap_conn *conn = chan->conn; BT_DBG("conn %p chan %p ident %d flags 0x%4.4x", conn, chan, ident, flags); clear_bit(CONF_LOC_CONF_PEND, &chan->conf_state); set_bit(CONF_OUTPUT_DONE, &chan->conf_state); l2cap_send_cmd(conn, ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, data, L2CAP_CONF_SUCCESS, flags), data); } static void cmd_reject_invalid_cid(struct l2cap_conn *conn, u8 ident, u16 scid, u16 dcid) { struct l2cap_cmd_rej_cid rej; rej.reason = cpu_to_le16(L2CAP_REJ_INVALID_CID); rej.scid = __cpu_to_le16(scid); rej.dcid = __cpu_to_le16(dcid); l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conf_req *req = (struct l2cap_conf_req *) data; u16 dcid, flags; u8 rsp[64]; struct l2cap_chan *chan; int len, err = 0; if (cmd_len < sizeof(*req)) return -EPROTO; dcid = __le16_to_cpu(req->dcid); flags = __le16_to_cpu(req->flags); BT_DBG("dcid 0x%4.4x flags 0x%2.2x", dcid, flags); chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) { cmd_reject_invalid_cid(conn, cmd->ident, dcid, 0); return 0; } if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2 && chan->state != BT_CONNECTED) { cmd_reject_invalid_cid(conn, cmd->ident, chan->scid, chan->dcid); goto unlock; } /* Reject if config buffer is too small. */ len = cmd_len - sizeof(*req); if (chan->conf_len + len > sizeof(chan->conf_req)) { l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, rsp, L2CAP_CONF_REJECT, flags), rsp); goto unlock; } /* Store config. */ memcpy(chan->conf_req + chan->conf_len, req->data, len); chan->conf_len += len; if (flags & L2CAP_CONF_FLAG_CONTINUATION) { /* Incomplete config. Send empty response. */ l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, rsp, L2CAP_CONF_SUCCESS, flags), rsp); goto unlock; } /* Complete config. */ len = l2cap_parse_conf_req(chan, rsp, sizeof(rsp)); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto unlock; } chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; if (!test_bit(CONF_OUTPUT_DONE, &chan->conf_state)) goto unlock; if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); if (chan->mode == L2CAP_MODE_ERTM || chan->mode == L2CAP_MODE_STREAMING) err = l2cap_ertm_init(chan); if (err < 0) l2cap_send_disconn_req(chan, -err); else l2cap_chan_ready(chan); goto unlock; } if (!test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) { u8 buf[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } /* Got Conf Rsp PENDING from remote side and assume we sent Conf Rsp PENDING in the code above */ if (test_bit(CONF_REM_CONF_PEND, &chan->conf_state) && test_bit(CONF_LOC_CONF_PEND, &chan->conf_state)) { /* check compatibility */ /* Send rsp for BR/EDR channel */ l2cap_send_efs_conf_rsp(chan, rsp, cmd->ident, flags); } unlock: l2cap_chan_unlock(chan); l2cap_chan_put(chan); return err; } static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data; u16 scid, flags, result; struct l2cap_chan *chan; int len = cmd_len - sizeof(*rsp); int err = 0; if (cmd_len < sizeof(*rsp)) return -EPROTO; scid = __le16_to_cpu(rsp->scid); flags = __le16_to_cpu(rsp->flags); result = __le16_to_cpu(rsp->result); BT_DBG("scid 0x%4.4x flags 0x%2.2x result 0x%2.2x len %d", scid, flags, result, len); chan = l2cap_get_chan_by_scid(conn, scid); if (!chan) return 0; switch (result) { case L2CAP_CONF_SUCCESS: l2cap_conf_rfc_get(chan, rsp->data, len); clear_bit(CONF_REM_CONF_PEND, &chan->conf_state); break; case L2CAP_CONF_PENDING: set_bit(CONF_REM_CONF_PEND, &chan->conf_state); if (test_bit(CONF_LOC_CONF_PEND, &chan->conf_state)) { char buf[64]; len = l2cap_parse_conf_rsp(chan, rsp->data, len, buf, sizeof(buf), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; } l2cap_send_efs_conf_rsp(chan, buf, cmd->ident, 0); } goto done; case L2CAP_CONF_UNKNOWN: case L2CAP_CONF_UNACCEPT: if (chan->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) { char req[64]; if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; } /* throw out any old stored conf requests */ result = L2CAP_CONF_SUCCESS; len = l2cap_parse_conf_rsp(chan, rsp->data, len, req, sizeof(req), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; } l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, len, req); chan->num_conf_req++; if (result != L2CAP_CONF_SUCCESS) goto done; break; } fallthrough; default: l2cap_chan_set_err(chan, ECONNRESET); __set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT); l2cap_send_disconn_req(chan, ECONNRESET); goto done; } if (flags & L2CAP_CONF_FLAG_CONTINUATION) goto done; set_bit(CONF_INPUT_DONE, &chan->conf_state); if (test_bit(CONF_OUTPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); if (chan->mode == L2CAP_MODE_ERTM || chan->mode == L2CAP_MODE_STREAMING) err = l2cap_ertm_init(chan); if (err < 0) l2cap_send_disconn_req(chan, -err); else l2cap_chan_ready(chan); } done: l2cap_chan_unlock(chan); l2cap_chan_put(chan); return err; } static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data; struct l2cap_disconn_rsp rsp; u16 dcid, scid; struct l2cap_chan *chan; if (cmd_len != sizeof(*req)) return -EPROTO; scid = __le16_to_cpu(req->scid); dcid = __le16_to_cpu(req->dcid); BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid); chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) { cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid); return 0; } rsp.dcid = cpu_to_le16(chan->scid); rsp.scid = cpu_to_le16(chan->dcid); l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); chan->ops->set_shutdown(chan); l2cap_chan_del(chan, ECONNRESET); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); return 0; } static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data; u16 dcid, scid; struct l2cap_chan *chan; if (cmd_len != sizeof(*rsp)) return -EPROTO; scid = __le16_to_cpu(rsp->scid); dcid = __le16_to_cpu(rsp->dcid); BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid); chan = l2cap_get_chan_by_scid(conn, scid); if (!chan) { return 0; } if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); return 0; } l2cap_chan_del(chan, 0); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); return 0; } static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_info_req *req = (struct l2cap_info_req *) data; u16 type; if (cmd_len != sizeof(*req)) return -EPROTO; type = __le16_to_cpu(req->type); BT_DBG("type 0x%4.4x", type); if (type == L2CAP_IT_FEAT_MASK) { u8 buf[8]; u32 feat_mask = l2cap_feat_mask; struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); if (!disable_ertm) feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING | L2CAP_FEAT_FCS; put_unaligned_le32(feat_mask, rsp->data); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), buf); } else if (type == L2CAP_IT_FIXED_CHAN) { u8 buf[12]; struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); rsp->data[0] = conn->local_fixed_chan; memset(rsp->data + 1, 0, 7); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), buf); } else { struct l2cap_info_rsp rsp; rsp.type = cpu_to_le16(type); rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp); } return 0; } static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; u16 type, result; if (cmd_len < sizeof(*rsp)) return -EPROTO; type = __le16_to_cpu(rsp->type); result = __le16_to_cpu(rsp->result); BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); /* L2CAP Info req/rsp are unbound to channels, add extra checks */ if (cmd->ident != conn->info_ident || conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) return 0; cancel_delayed_work(&conn->info_timer); if (result != L2CAP_IR_SUCCESS) { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); return 0; } switch (type) { case L2CAP_IT_FEAT_MASK: conn->feat_mask = get_unaligned_le32(rsp->data); if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) { struct l2cap_info_req req; req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); conn->info_ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ, sizeof(req), &req); } else { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); } break; case L2CAP_IT_FIXED_CHAN: conn->remote_fixed_chan = rsp->data[0]; conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; l2cap_conn_start(conn); break; } return 0; } static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct hci_conn *hcon = conn->hcon; struct l2cap_conn_param_update_req *req; struct l2cap_conn_param_update_rsp rsp; u16 min, max, latency, to_multiplier; int err; if (hcon->role != HCI_ROLE_MASTER) return -EINVAL; if (cmd_len != sizeof(struct l2cap_conn_param_update_req)) return -EPROTO; req = (struct l2cap_conn_param_update_req *) data; min = __le16_to_cpu(req->min); max = __le16_to_cpu(req->max); latency = __le16_to_cpu(req->latency); to_multiplier = __le16_to_cpu(req->to_multiplier); BT_DBG("min 0x%4.4x max 0x%4.4x latency: 0x%4.4x Timeout: 0x%4.4x", min, max, latency, to_multiplier); memset(&rsp, 0, sizeof(rsp)); err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED); l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); if (!err) { u8 store_hint; store_hint = hci_le_conn_update(hcon, min, max, latency, to_multiplier); mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type, store_hint, min, max, latency, to_multiplier); } return 0; } static int l2cap_le_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_le_conn_rsp *rsp = (struct l2cap_le_conn_rsp *) data; struct hci_conn *hcon = conn->hcon; u16 dcid, mtu, mps, credits, result; struct l2cap_chan *chan; int err, sec_level; if (cmd_len < sizeof(*rsp)) return -EPROTO; dcid = __le16_to_cpu(rsp->dcid); mtu = __le16_to_cpu(rsp->mtu); mps = __le16_to_cpu(rsp->mps); credits = __le16_to_cpu(rsp->credits); result = __le16_to_cpu(rsp->result); if (result == L2CAP_CR_LE_SUCCESS && (mtu < 23 || mps < 23 || dcid < L2CAP_CID_DYN_START || dcid > L2CAP_CID_LE_DYN_END)) return -EPROTO; BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", dcid, mtu, mps, credits, result); chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) return -EBADSLT; err = 0; l2cap_chan_lock(chan); switch (result) { case L2CAP_CR_LE_SUCCESS: if (__l2cap_get_chan_by_dcid(conn, dcid)) { err = -EBADSLT; break; } chan->ident = 0; chan->dcid = dcid; chan->omtu = mtu; chan->remote_mps = mps; chan->tx_credits = credits; l2cap_chan_ready(chan); break; case L2CAP_CR_LE_AUTHENTICATION: case L2CAP_CR_LE_ENCRYPTION: /* If we already have MITM protection we can't do * anything. */ if (hcon->sec_level > BT_SECURITY_MEDIUM) { l2cap_chan_del(chan, ECONNREFUSED); break; } sec_level = hcon->sec_level + 1; if (chan->sec_level < sec_level) chan->sec_level = sec_level; /* We'll need to send a new Connect Request */ clear_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags); smp_conn_security(hcon, chan->sec_level); break; default: l2cap_chan_del(chan, ECONNREFUSED); break; } l2cap_chan_unlock(chan); return err; } static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { int err = 0; switch (cmd->code) { case L2CAP_COMMAND_REJ: l2cap_command_rej(conn, cmd, cmd_len, data); break; case L2CAP_CONN_REQ: err = l2cap_connect_req(conn, cmd, cmd_len, data); break; case L2CAP_CONN_RSP: l2cap_connect_create_rsp(conn, cmd, cmd_len, data); break; case L2CAP_CONF_REQ: err = l2cap_config_req(conn, cmd, cmd_len, data); break; case L2CAP_CONF_RSP: l2cap_config_rsp(conn, cmd, cmd_len, data); break; case L2CAP_DISCONN_REQ: err = l2cap_disconnect_req(conn, cmd, cmd_len, data); break; case L2CAP_DISCONN_RSP: l2cap_disconnect_rsp(conn, cmd, cmd_len, data); break; case L2CAP_ECHO_REQ: l2cap_send_cmd(conn, cmd->ident, L2CAP_ECHO_RSP, cmd_len, data); break; case L2CAP_ECHO_RSP: break; case L2CAP_INFO_REQ: err = l2cap_information_req(conn, cmd, cmd_len, data); break; case L2CAP_INFO_RSP: l2cap_information_rsp(conn, cmd, cmd_len, data); break; default: BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code); err = -EINVAL; break; } return err; } static int l2cap_le_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_le_conn_req *req = (struct l2cap_le_conn_req *) data; struct l2cap_le_conn_rsp rsp; struct l2cap_chan *chan, *pchan; u16 dcid, scid, credits, mtu, mps; __le16 psm; u8 result; if (cmd_len != sizeof(*req)) return -EPROTO; scid = __le16_to_cpu(req->scid); mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); psm = req->psm; dcid = 0; credits = 0; if (mtu < 23 || mps < 23) return -EPROTO; BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), scid, mtu, mps); /* BLUETOOTH CORE SPECIFICATION Vers