Total coverage: 250685 (16%)of 1580181
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 /* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/pnode.h * * (C) Copyright IBM Corporation 2005. */ #ifndef _LINUX_PNODE_H #define _LINUX_PNODE_H #include <linux/list.h> #include "mount.h" #define IS_MNT_SHARED(m) ((m)->mnt_t_flags & T_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) #define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt_t_flags &= ~T_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt_t_flags & T_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt_t_flags & T_MARKED) #define SET_MNT_MARK(m) ((m)->mnt_t_flags |= T_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt_t_flags &= ~T_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 #define CL_COPY_UNBINDABLE 0x04 #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_COPY_MNT_NS_FILE 0x40 /* * EXCL[namespace_sem] */ static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt_t_flags &= ~T_SHARED_MASK; mnt->mnt_t_flags |= T_SHARED; } static inline bool peers(const struct mount *m1, const struct mount *m2) { return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; } void change_mnt_propagation(struct mount *, int); void bulk_make_private(struct list_head *); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); void propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); int count_mounts(struct mnt_namespace *ns, struct mount *mnt); bool propagation_would_overmount(const struct mount *from, const struct mount *to, const struct mountpoint *mp); #endif /* _LINUX_PNODE_H */
62 37 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 // SPDX-License-Identifier: GPL-2.0-or-later /* Sysfs attributes of bond slaves * * Copyright (c) 2014 Scott Feldman <sfeldma@cumulusnetworks.com> */ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/bonding.h> struct slave_attribute { struct attribute attr; ssize_t (*show)(struct slave *, char *); }; #define SLAVE_ATTR_RO(_name) \ const struct slave_attribute slave_attr_##_name = __ATTR_RO(_name) static ssize_t state_show(struct slave *slave, char *buf) { switch (bond_slave_state(slave)) { case BOND_STATE_ACTIVE: return sysfs_emit(buf, "active\n"); case BOND_STATE_BACKUP: return sysfs_emit(buf, "backup\n"); default: return sysfs_emit(buf, "UNKNOWN\n"); } } static SLAVE_ATTR_RO(state); static ssize_t mii_status_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%s\n", bond_slave_link_status(slave->link)); } static SLAVE_ATTR_RO(mii_status); static ssize_t link_failure_count_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%d\n", slave->link_failure_count); } static SLAVE_ATTR_RO(link_failure_count); static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%*phC\n", slave->dev->addr_len, slave->perm_hwaddr); } static SLAVE_ATTR_RO(perm_hwaddr); static ssize_t queue_id_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%d\n", READ_ONCE(slave->queue_id)); } static SLAVE_ATTR_RO(queue_id); static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) { const struct aggregator *agg; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { agg = SLAVE_AD_INFO(slave)->port.aggregator; if (agg) return sysfs_emit(buf, "%d\n", agg->aggregator_identifier); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_aggregator_id); static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf) { const struct port *ad_port; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; if (ad_port->aggregator) return sysfs_emit(buf, "%u\n", ad_port->actor_oper_port_state); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_actor_oper_port_state); static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) { const struct port *ad_port; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; if (ad_port->aggregator) return sysfs_emit(buf, "%u\n", ad_port->partner_oper.port_state); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_partner_oper_port_state); static const struct attribute *slave_attrs[] = { &slave_attr_state.attr, &slave_attr_mii_status.attr, &slave_attr_link_failure_count.attr, &slave_attr_perm_hwaddr.attr, &slave_attr_queue_id.attr, &slave_attr_ad_aggregator_id.attr, &slave_attr_ad_actor_oper_port_state.attr, &slave_attr_ad_partner_oper_port_state.attr, NULL }; #define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr) static ssize_t slave_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct slave_attribute *slave_attr = to_slave_attr(attr); struct slave *slave = to_slave(kobj); return slave_attr->show(slave, buf); } const struct sysfs_ops slave_sysfs_ops = { .show = slave_show, }; int bond_sysfs_slave_add(struct slave *slave) { return sysfs_create_files(&slave->kobj, slave_attrs); } void bond_sysfs_slave_del(struct slave *slave) { sysfs_remove_files(&slave->kobj, slave_attrs); }
114 148 150 148 168 167 167 168 167 169 174 174 78 88 87 21 137 114 21 169 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Red Hat, Inc. * Copyright (c) 2016-2021 Christoph Hellwig. */ #include <linux/iomap.h> #include "trace.h" static inline void iomap_iter_reset_iomap(struct iomap_iter *iter) { iter->status = 0; memset(&iter->iomap, 0, sizeof(iter->iomap)); memset(&iter->srcmap, 0, sizeof(iter->srcmap)); } /* * Advance the current iterator position and output the length remaining for the * current mapping. */ int iomap_iter_advance(struct iomap_iter *iter, u64 *count) { if (WARN_ON_ONCE(*count > iomap_length(iter))) return -EIO; iter->pos += *count; iter->len -= *count; *count = iomap_length(iter); return 0; } static inline void iomap_iter_done(struct iomap_iter *iter) { WARN_ON_ONCE(iter->iomap.offset > iter->pos); WARN_ON_ONCE(iter->iomap.length == 0); WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos); WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE); iter->iter_start_pos = iter->pos; trace_iomap_iter_dstmap(iter->inode, &iter->iomap); if (iter->srcmap.type != IOMAP_HOLE) trace_iomap_iter_srcmap(iter->inode, &iter->srcmap); } /** * iomap_iter - iterate over a ranges in a file * @iter: iteration structue * @ops: iomap ops provided by the file system * * Iterate over filesystem-provided space mappings for the provided file range. * * This function handles cleanup of resources acquired for iteration when the * filesystem indicates there are no more space mappings, which means that this * function must be called in a loop that continues as long it returns a * positive value. If 0 or a negative value is returned, the caller must not * return to the loop body. Within a loop body, there are two ways to break out * of the loop body: leave @iter.status unchanged, or set it to a negative * errno. */ int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops) { bool stale = iter->iomap.flags & IOMAP_F_STALE; ssize_t advanced; u64 olen; int ret; trace_iomap_iter(iter, ops, _RET_IP_); if (!iter->iomap.length) goto begin; /* * Calculate how far the iter was advanced and the original length bytes * for ->iomap_end(). */ advanced = iter->pos - iter->iter_start_pos; olen = iter->len + advanced; if (ops->iomap_end) { ret = ops->iomap_end(iter->inode, iter->iter_start_pos, iomap_length_trim(iter, iter->iter_start_pos, olen), advanced, iter->flags, &iter->iomap); if (ret < 0 && !advanced) return ret; } /* detect old return semantics where this would advance */ if (WARN_ON_ONCE(iter->status > 0)) iter->status = -EIO; /* * Use iter->len to determine whether to continue onto the next mapping. * Explicitly terminate on error status or if the current iter has not * advanced at all (i.e. no work was done for some reason) unless the * mapping has been marked stale and needs to be reprocessed. */ if (iter->status < 0) ret = iter->status; else if (iter->len == 0 || (!advanced && !stale)) ret = 0; else ret = 1; iomap_iter_reset_iomap(iter); if (ret <= 0) return ret; begin: ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags, &iter->iomap, &iter->srcmap); if (ret < 0) return ret; iomap_iter_done(iter); return 1; }
18 18 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890 9891 9892 9893 9894 9895 9896 9897 9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943 9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962 9963 9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190 10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205 10206 10207 10208 10209 10210 10211 10212 10213 10214 10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262 10263 10264 10265 10266 10267 10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310 10311 10312 10313 10314 10315 10316 10317 10318 10319 10320 10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334 10335 10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 // SPDX-License-Identifier: ISC /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "mac.h" #include <linux/export.h> #include <net/cfg80211.h> #include <net/mac80211.h> #include <linux/etherdevice.h> #include <linux/acpi.h> #include <linux/of.h> #include <linux/bitfield.h> #include <linux/random.h> #include "hif.h" #include "core.h" #include "debug.h" #include "wmi.h" #include "htt.h" #include "txrx.h" #include "testmode.h" #include "wmi-tlv.h" #include "wmi-ops.h" #include "wow.h" #include "leds.h" /*********/ /* Rates */ /*********/ static struct ieee80211_rate ath10k_rates[] = { { .bitrate = 10, .hw_value = ATH10K_HW_RATE_CCK_LP_1M }, { .bitrate = 20, .hw_value = ATH10K_HW_RATE_CCK_LP_2M, .hw_value_short = ATH10K_HW_RATE_CCK_SP_2M, .flags = IEEE80211_RATE_SHORT_PREAMBLE }, { .bitrate = 55, .hw_value = ATH10K_HW_RATE_CCK_LP_5_5M, .hw_value_short = ATH10K_HW_RATE_CCK_SP_5_5M, .flags = IEEE80211_RATE_SHORT_PREAMBLE }, { .bitrate = 110, .hw_value = ATH10K_HW_RATE_CCK_LP_11M, .hw_value_short = ATH10K_HW_RATE_CCK_SP_11M, .flags = IEEE80211_RATE_SHORT_PREAMBLE }, { .bitrate = 60, .hw_value = ATH10K_HW_RATE_OFDM_6M }, { .bitrate = 90, .hw_value = ATH10K_HW_RATE_OFDM_9M }, { .bitrate = 120, .hw_value = ATH10K_HW_RATE_OFDM_12M }, { .bitrate = 180, .hw_value = ATH10K_HW_RATE_OFDM_18M }, { .bitrate = 240, .hw_value = ATH10K_HW_RATE_OFDM_24M }, { .bitrate = 360, .hw_value = ATH10K_HW_RATE_OFDM_36M }, { .bitrate = 480, .hw_value = ATH10K_HW_RATE_OFDM_48M }, { .bitrate = 540, .hw_value = ATH10K_HW_RATE_OFDM_54M }, }; static struct ieee80211_rate ath10k_rates_rev2[] = { { .bitrate = 10, .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_1M }, { .bitrate = 20, .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_2M, .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_2M, .flags = IEEE80211_RATE_SHORT_PREAMBLE }, { .bitrate = 55, .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_5_5M, .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_5_5M, .flags = IEEE80211_RATE_SHORT_PREAMBLE }, { .bitrate = 110, .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_11M, .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_11M, .flags = IEEE80211_RATE_SHORT_PREAMBLE }, { .bitrate = 60, .hw_value = ATH10K_HW_RATE_OFDM_6M }, { .bitrate = 90, .hw_value = ATH10K_HW_RATE_OFDM_9M }, { .bitrate = 120, .hw_value = ATH10K_HW_RATE_OFDM_12M }, { .bitrate = 180, .hw_value = ATH10K_HW_RATE_OFDM_18M }, { .bitrate = 240, .hw_value = ATH10K_HW_RATE_OFDM_24M }, { .bitrate = 360, .hw_value = ATH10K_HW_RATE_OFDM_36M }, { .bitrate = 480, .hw_value = ATH10K_HW_RATE_OFDM_48M }, { .bitrate = 540, .hw_value = ATH10K_HW_RATE_OFDM_54M }, }; static const struct cfg80211_sar_freq_ranges ath10k_sar_freq_ranges[] = { {.start_freq = 2402, .end_freq = 2494 }, {.start_freq = 5170, .end_freq = 5875 }, }; static const struct cfg80211_sar_capa ath10k_sar_capa = { .type = NL80211_SAR_TYPE_POWER, .num_freq_ranges = (ARRAY_SIZE(ath10k_sar_freq_ranges)), .freq_ranges = &ath10k_sar_freq_ranges[0], }; #define ATH10K_MAC_FIRST_OFDM_RATE_IDX 4 #define ath10k_a_rates (ath10k_rates + ATH10K_MAC_FIRST_OFDM_RATE_IDX) #define ath10k_a_rates_size (ARRAY_SIZE(ath10k_rates) - \ ATH10K_MAC_FIRST_OFDM_RATE_IDX) #define ath10k_g_rates (ath10k_rates + 0) #define ath10k_g_rates_size (ARRAY_SIZE(ath10k_rates)) #define ath10k_g_rates_rev2 (ath10k_rates_rev2 + 0) #define ath10k_g_rates_rev2_size (ARRAY_SIZE(ath10k_rates_rev2)) #define ath10k_wmi_legacy_rates ath10k_rates static bool ath10k_mac_bitrate_is_cck(int bitrate) { switch (bitrate) { case 10: case 20: case 55: case 110: return true; } return false; } static u8 ath10k_mac_bitrate_to_rate(int bitrate) { return DIV_ROUND_UP(bitrate, 5) | (ath10k_mac_bitrate_is_cck(bitrate) ? BIT(7) : 0); } u8 ath10k_mac_hw_rate_to_idx(const struct ieee80211_supported_band *sband, u8 hw_rate, bool cck) { const struct ieee80211_rate *rate; int i; for (i = 0; i < sband->n_bitrates; i++) { rate = &sband->bitrates[i]; if (ath10k_mac_bitrate_is_cck(rate->bitrate) != cck) continue; if (rate->hw_value == hw_rate) return i; else if (rate->flags & IEEE80211_RATE_SHORT_PREAMBLE && rate->hw_value_short == hw_rate) return i; } return 0; } u8 ath10k_mac_bitrate_to_idx(const struct ieee80211_supported_band *sband, u32 bitrate) { int i; for (i = 0; i < sband->n_bitrates; i++) if (sband->bitrates[i].bitrate == bitrate) return i; return 0; } static int ath10k_mac_get_rate_hw_value(int bitrate) { int i; u8 hw_value_prefix = 0; if (ath10k_mac_bitrate_is_cck(bitrate)) hw_value_prefix = WMI_RATE_PREAMBLE_CCK << 6; for (i = 0; i < ARRAY_SIZE(ath10k_rates); i++) { if (ath10k_rates[i].bitrate == bitrate) return hw_value_prefix | ath10k_rates[i].hw_value; } return -EINVAL; } static int ath10k_mac_get_max_vht_mcs_map(u16 mcs_map, int nss) { switch ((mcs_map >> (2 * nss)) & 0x3) { case IEEE80211_VHT_MCS_SUPPORT_0_7: return BIT(8) - 1; case IEEE80211_VHT_MCS_SUPPORT_0_8: return BIT(9) - 1; case IEEE80211_VHT_MCS_SUPPORT_0_9: return BIT(10) - 1; } return 0; } static u32 ath10k_mac_max_ht_nss(const u8 ht_mcs_mask[IEEE80211_HT_MCS_MASK_LEN]) { int nss; for (nss = IEEE80211_HT_MCS_MASK_LEN - 1; nss >= 0; nss--) if (ht_mcs_mask[nss]) return nss + 1; return 1; } static u32 ath10k_mac_max_vht_nss(const u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) { int nss; for (nss = NL80211_VHT_NSS_MAX - 1; nss >= 0; nss--) if (vht_mcs_mask[nss]) return nss + 1; return 1; } int ath10k_mac_ext_resource_config(struct ath10k *ar, u32 val) { enum wmi_host_platform_type platform_type; int ret; if (test_bit(WMI_SERVICE_TX_MODE_DYNAMIC, ar->wmi.svc_map)) platform_type = WMI_HOST_PLATFORM_LOW_PERF; else platform_type = WMI_HOST_PLATFORM_HIGH_PERF; ret = ath10k_wmi_ext_resource_config(ar, platform_type, val); if (ret && ret != -EOPNOTSUPP) { ath10k_warn(ar, "failed to configure ext resource: %d\n", ret); return ret; } return 0; } /**********/ /* Crypto */ /**********/ static int ath10k_send_key(struct ath10k_vif *arvif, struct ieee80211_key_conf *key, enum set_key_cmd cmd, const u8 *macaddr, u32 flags) { struct ath10k *ar = arvif->ar; struct wmi_vdev_install_key_arg arg = { .vdev_id = arvif->vdev_id, .key_idx = key->keyidx, .key_len = key->keylen, .key_data = key->key, .key_flags = flags, .macaddr = macaddr, }; lockdep_assert_held(&arvif->ar->conf_mutex); switch (key->cipher) { case WLAN_CIPHER_SUITE_CCMP: arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_AES_CCM]; key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; case WLAN_CIPHER_SUITE_TKIP: arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_TKIP]; arg.key_txmic_len = 8; arg.key_rxmic_len = 8; break; case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_WEP]; break; case WLAN_CIPHER_SUITE_CCMP_256: arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_AES_CCM]; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_AES_GCM]; key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: case WLAN_CIPHER_SUITE_BIP_CMAC_256: case WLAN_CIPHER_SUITE_AES_CMAC: WARN_ON(1); return -EINVAL; default: ath10k_warn(ar, "cipher %d is not supported\n", key->cipher); return -EOPNOTSUPP; } if (test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; if (cmd == DISABLE_KEY) { if (flags & WMI_KEY_GROUP) { /* Not all hardware handles group-key deletion operation * correctly. Replace the key with a junk value to invalidate it. */ get_random_bytes(key->key, key->keylen); } else { arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_NONE]; arg.key_data = NULL; } } return ath10k_wmi_vdev_install_key(arvif->ar, &arg); } static int ath10k_install_key(struct ath10k_vif *arvif, struct ieee80211_key_conf *key, enum set_key_cmd cmd, const u8 *macaddr, u32 flags) { struct ath10k *ar = arvif->ar; int ret; unsigned long time_left; lockdep_assert_held(&ar->conf_mutex); reinit_completion(&ar->install_key_done); if (arvif->nohwcrypt) return 1; ret = ath10k_send_key(arvif, key, cmd, macaddr, flags); if (ret) return ret; time_left = wait_for_completion_timeout(&ar->install_key_done, 3 * HZ); if (time_left == 0) return -ETIMEDOUT; return 0; } static int ath10k_install_peer_wep_keys(struct ath10k_vif *arvif, const u8 *addr) { struct ath10k *ar = arvif->ar; struct ath10k_peer *peer; int ret; int i; u32 flags; lockdep_assert_held(&ar->conf_mutex); if (WARN_ON(arvif->vif->type != NL80211_IFTYPE_AP && arvif->vif->type != NL80211_IFTYPE_ADHOC && arvif->vif->type != NL80211_IFTYPE_MESH_POINT)) return -EINVAL; spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arvif->vdev_id, addr); spin_unlock_bh(&ar->data_lock); if (!peer) return -ENOENT; for (i = 0; i < ARRAY_SIZE(arvif->wep_keys); i++) { if (arvif->wep_keys[i] == NULL) continue; switch (arvif->vif->type) { case NL80211_IFTYPE_AP: flags = WMI_KEY_PAIRWISE; if (arvif->def_wep_key_idx == i) flags |= WMI_KEY_TX_USAGE; ret = ath10k_install_key(arvif, arvif->wep_keys[i], SET_KEY, addr, flags); if (ret < 0) return ret; break; case NL80211_IFTYPE_ADHOC: ret = ath10k_install_key(arvif, arvif->wep_keys[i], SET_KEY, addr, WMI_KEY_PAIRWISE); if (ret < 0) return ret; ret = ath10k_install_key(arvif, arvif->wep_keys[i], SET_KEY, addr, WMI_KEY_GROUP); if (ret < 0) return ret; break; default: WARN_ON(1); return -EINVAL; } spin_lock_bh(&ar->data_lock); peer->keys[i] = arvif->wep_keys[i]; spin_unlock_bh(&ar->data_lock); } /* In some cases (notably with static WEP IBSS with multiple keys) * multicast Tx becomes broken. Both pairwise and groupwise keys are * installed already. Using WMI_KEY_TX_USAGE in different combinations * didn't seem help. Using def_keyid vdev parameter seems to be * effective so use that. * * FIXME: Revisit. Perhaps this can be done in a less hacky way. */ if (arvif->vif->type != NL80211_IFTYPE_ADHOC) return 0; if (arvif->def_wep_key_idx == -1) return 0; ret = ath10k_wmi_vdev_set_param(arvif->ar, arvif->vdev_id, arvif->ar->wmi.vdev_param->def_keyid, arvif->def_wep_key_idx); if (ret) { ath10k_warn(ar, "failed to re-set def wpa key idxon vdev %i: %d\n", arvif->vdev_id, ret); return ret; } return 0; } static int ath10k_clear_peer_keys(struct ath10k_vif *arvif, const u8 *addr) { struct ath10k *ar = arvif->ar; struct ath10k_peer *peer; int first_errno = 0; int ret; int i; u32 flags = 0; lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arvif->vdev_id, addr); spin_unlock_bh(&ar->data_lock); if (!peer) return -ENOENT; for (i = 0; i < ARRAY_SIZE(peer->keys); i++) { if (peer->keys[i] == NULL) continue; /* key flags are not required to delete the key */ ret = ath10k_install_key(arvif, peer->keys[i], DISABLE_KEY, addr, flags); if (ret < 0 && first_errno == 0) first_errno = ret; if (ret < 0) ath10k_warn(ar, "failed to remove peer wep key %d: %d\n", i, ret); spin_lock_bh(&ar->data_lock); peer->keys[i] = NULL; spin_unlock_bh(&ar->data_lock); } return first_errno; } bool ath10k_mac_is_peer_wep_key_set(struct ath10k *ar, const u8 *addr, u8 keyidx) { struct ath10k_peer *peer; int i; lockdep_assert_held(&ar->data_lock); /* We don't know which vdev this peer belongs to, * since WMI doesn't give us that information. * * FIXME: multi-bss needs to be handled. */ peer = ath10k_peer_find(ar, 0, addr); if (!peer) return false; for (i = 0; i < ARRAY_SIZE(peer->keys); i++) { if (peer->keys[i] && peer->keys[i]->keyidx == keyidx) return true; } return false; } static int ath10k_clear_vdev_key(struct ath10k_vif *arvif, struct ieee80211_key_conf *key) { struct ath10k *ar = arvif->ar; struct ath10k_peer *peer; u8 addr[ETH_ALEN]; int first_errno = 0; int ret; int i; u32 flags = 0; lockdep_assert_held(&ar->conf_mutex); for (;;) { /* since ath10k_install_key we can't hold data_lock all the * time, so we try to remove the keys incrementally */ spin_lock_bh(&ar->data_lock); i = 0; list_for_each_entry(peer, &ar->peers, list) { for (i = 0; i < ARRAY_SIZE(peer->keys); i++) { if (peer->keys[i] == key) { ether_addr_copy(addr, peer->addr); peer->keys[i] = NULL; break; } } if (i < ARRAY_SIZE(peer->keys)) break; } spin_unlock_bh(&ar->data_lock); if (i == ARRAY_SIZE(peer->keys)) break; /* key flags are not required to delete the key */ ret = ath10k_install_key(arvif, key, DISABLE_KEY, addr, flags); if (ret < 0 && first_errno == 0) first_errno = ret; if (ret) ath10k_warn(ar, "failed to remove key for %pM: %d\n", addr, ret); } return first_errno; } static int ath10k_mac_vif_update_wep_key(struct ath10k_vif *arvif, struct ieee80211_key_conf *key) { struct ath10k *ar = arvif->ar; struct ath10k_peer *peer; int ret; lockdep_assert_held(&ar->conf_mutex); list_for_each_entry(peer, &ar->peers, list) { if (ether_addr_equal(peer->addr, arvif->vif->addr)) continue; if (ether_addr_equal(peer->addr, arvif->bssid)) continue; if (peer->keys[key->keyidx] == key) continue; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vif vdev %i update key %i needs update\n", arvif->vdev_id, key->keyidx); ret = ath10k_install_peer_wep_keys(arvif, peer->addr); if (ret) { ath10k_warn(ar, "failed to update wep keys on vdev %i for peer %pM: %d\n", arvif->vdev_id, peer->addr, ret); return ret; } } return 0; } /*********************/ /* General utilities */ /*********************/ static inline enum wmi_phy_mode chan_to_phymode(const struct cfg80211_chan_def *chandef) { enum wmi_phy_mode phymode = MODE_UNKNOWN; switch (chandef->chan->band) { case NL80211_BAND_2GHZ: switch (chandef->width) { case NL80211_CHAN_WIDTH_20_NOHT: if (chandef->chan->flags & IEEE80211_CHAN_NO_OFDM) phymode = MODE_11B; else phymode = MODE_11G; break; case NL80211_CHAN_WIDTH_20: phymode = MODE_11NG_HT20; break; case NL80211_CHAN_WIDTH_40: phymode = MODE_11NG_HT40; break; default: phymode = MODE_UNKNOWN; break; } break; case NL80211_BAND_5GHZ: switch (chandef->width) { case NL80211_CHAN_WIDTH_20_NOHT: phymode = MODE_11A; break; case NL80211_CHAN_WIDTH_20: phymode = MODE_11NA_HT20; break; case NL80211_CHAN_WIDTH_40: phymode = MODE_11NA_HT40; break; case NL80211_CHAN_WIDTH_80: phymode = MODE_11AC_VHT80; break; case NL80211_CHAN_WIDTH_160: phymode = MODE_11AC_VHT160; break; case NL80211_CHAN_WIDTH_80P80: phymode = MODE_11AC_VHT80_80; break; default: phymode = MODE_UNKNOWN; break; } break; default: break; } WARN_ON(phymode == MODE_UNKNOWN); return phymode; } static u8 ath10k_parse_mpdudensity(u8 mpdudensity) { /* * 802.11n D2.0 defined values for "Minimum MPDU Start Spacing": * 0 for no restriction * 1 for 1/4 us * 2 for 1/2 us * 3 for 1 us * 4 for 2 us * 5 for 4 us * 6 for 8 us * 7 for 16 us */ switch (mpdudensity) { case 0: return 0; case 1: case 2: case 3: /* Our lower layer calculations limit our precision to * 1 microsecond */ return 1; case 4: return 2; case 5: return 4; case 6: return 8; case 7: return 16; default: return 0; } } int ath10k_mac_vif_chan(struct ieee80211_vif *vif, struct cfg80211_chan_def *def) { struct ieee80211_chanctx_conf *conf; rcu_read_lock(); conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (!conf) { rcu_read_unlock(); return -ENOENT; } *def = conf->def; rcu_read_unlock(); return 0; } static void ath10k_mac_num_chanctxs_iter(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *conf, void *data) { int *num = data; (*num)++; } static int ath10k_mac_num_chanctxs(struct ath10k *ar) { int num = 0; ieee80211_iter_chan_contexts_atomic(ar->hw, ath10k_mac_num_chanctxs_iter, &num); return num; } static void ath10k_mac_get_any_chandef_iter(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *conf, void *data) { struct cfg80211_chan_def **def = data; *def = &conf->def; } static void ath10k_wait_for_peer_delete_done(struct ath10k *ar, u32 vdev_id, const u8 *addr) { unsigned long time_left; int ret; if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) { ret = ath10k_wait_for_peer_deleted(ar, vdev_id, addr); if (ret) { ath10k_warn(ar, "failed wait for peer deleted"); return; } time_left = wait_for_completion_timeout(&ar->peer_delete_done, 5 * HZ); if (!time_left) ath10k_warn(ar, "Timeout in receiving peer delete response\n"); } } static int ath10k_peer_create(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, u32 vdev_id, const u8 *addr, enum wmi_peer_type peer_type) { struct ath10k_peer *peer; int ret; lockdep_assert_held(&ar->conf_mutex); /* Each vdev consumes a peer entry as well. */ if (ar->num_peers + list_count_nodes(&ar->arvifs) >= ar->max_num_peers) return -ENOBUFS; ret = ath10k_wmi_peer_create(ar, vdev_id, addr, peer_type); if (ret) { ath10k_warn(ar, "failed to create wmi peer %pM on vdev %i: %i\n", addr, vdev_id, ret); return ret; } ret = ath10k_wait_for_peer_created(ar, vdev_id, addr); if (ret) { ath10k_warn(ar, "failed to wait for created wmi peer %pM on vdev %i: %i\n", addr, vdev_id, ret); return ret; } spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, vdev_id, addr); if (!peer) { spin_unlock_bh(&ar->data_lock); ath10k_warn(ar, "failed to find peer %pM on vdev %i after creation\n", addr, vdev_id); ath10k_wait_for_peer_delete_done(ar, vdev_id, addr); return -ENOENT; } peer->vif = vif; peer->sta = sta; spin_unlock_bh(&ar->data_lock); ar->num_peers++; return 0; } static int ath10k_mac_set_kickout(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; u32 param; int ret; param = ar->wmi.pdev_param->sta_kickout_th; ret = ath10k_wmi_pdev_set_param(ar, param, ATH10K_KICKOUT_THRESHOLD); if (ret) { ath10k_warn(ar, "failed to set kickout threshold on vdev %i: %d\n", arvif->vdev_id, ret); return ret; } param = ar->wmi.vdev_param->ap_keepalive_min_idle_inactive_time_secs; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param, ATH10K_KEEPALIVE_MIN_IDLE); if (ret) { ath10k_warn(ar, "failed to set keepalive minimum idle time on vdev %i: %d\n", arvif->vdev_id, ret); return ret; } param = ar->wmi.vdev_param->ap_keepalive_max_idle_inactive_time_secs; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param, ATH10K_KEEPALIVE_MAX_IDLE); if (ret) { ath10k_warn(ar, "failed to set keepalive maximum idle time on vdev %i: %d\n", arvif->vdev_id, ret); return ret; } param = ar->wmi.vdev_param->ap_keepalive_max_unresponsive_time_secs; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param, ATH10K_KEEPALIVE_MAX_UNRESPONSIVE); if (ret) { ath10k_warn(ar, "failed to set keepalive maximum unresponsive time on vdev %i: %d\n", arvif->vdev_id, ret); return ret; } return 0; } static int ath10k_mac_set_rts(struct ath10k_vif *arvif, u32 value) { struct ath10k *ar = arvif->ar; u32 vdev_param; vdev_param = ar->wmi.vdev_param->rts_threshold; return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, value); } static int ath10k_peer_delete(struct ath10k *ar, u32 vdev_id, const u8 *addr) { int ret; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_wmi_peer_delete(ar, vdev_id, addr); if (ret) return ret; ret = ath10k_wait_for_peer_deleted(ar, vdev_id, addr); if (ret) return ret; if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) { unsigned long time_left; time_left = wait_for_completion_timeout (&ar->peer_delete_done, 5 * HZ); if (!time_left) { ath10k_warn(ar, "Timeout in receiving peer delete response\n"); return -ETIMEDOUT; } } ar->num_peers--; return 0; } static void ath10k_peer_map_cleanup(struct ath10k *ar, struct ath10k_peer *peer) { int peer_id, i; lockdep_assert_held(&ar->conf_mutex); for_each_set_bit(peer_id, peer->peer_ids, ATH10K_MAX_NUM_PEER_IDS) { ar->peer_map[peer_id] = NULL; } /* Double check that peer is properly un-referenced from * the peer_map */ for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { if (ar->peer_map[i] == peer) { ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %p idx %d)\n", peer->addr, peer, i); ar->peer_map[i] = NULL; } } list_del(&peer->list); kfree(peer); ar->num_peers--; } static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id) { struct ath10k_peer *peer, *tmp; lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); list_for_each_entry_safe(peer, tmp, &ar->peers, list) { if (peer->vdev_id != vdev_id) continue; ath10k_warn(ar, "removing stale peer %pM from vdev_id %d\n", peer->addr, vdev_id); ath10k_peer_map_cleanup(ar, peer); } spin_unlock_bh(&ar->data_lock); } static void ath10k_peer_cleanup_all(struct ath10k *ar) { struct ath10k_peer *peer, *tmp; int i; lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); list_for_each_entry_safe(peer, tmp, &ar->peers, list) { list_del(&peer->list); kfree(peer); } for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) ar->peer_map[i] = NULL; spin_unlock_bh(&ar->data_lock); ar->num_peers = 0; ar->num_stations = 0; } static int ath10k_mac_tdls_peer_update(struct ath10k *ar, u32 vdev_id, struct ieee80211_sta *sta, enum wmi_tdls_peer_state state) { int ret; struct wmi_tdls_peer_update_cmd_arg arg = {}; struct wmi_tdls_peer_capab_arg cap = {}; struct wmi_channel_arg chan_arg = {}; lockdep_assert_held(&ar->conf_mutex); arg.vdev_id = vdev_id; arg.peer_state = state; ether_addr_copy(arg.addr, sta->addr); cap.peer_max_sp = sta->max_sp; cap.peer_uapsd_queues = sta->uapsd_queues; if (state == WMI_TDLS_PEER_STATE_CONNECTED && !sta->tdls_initiator) cap.is_peer_responder = 1; ret = ath10k_wmi_tdls_peer_update(ar, &arg, &cap, &chan_arg); if (ret) { ath10k_warn(ar, "failed to update tdls peer %pM on vdev %i: %i\n", arg.addr, vdev_id, ret); return ret; } return 0; } /************************/ /* Interface management */ /************************/ void ath10k_mac_vif_beacon_free(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->data_lock); if (!arvif->beacon) return; if (!arvif->beacon_buf) dma_unmap_single(ar->dev, ATH10K_SKB_CB(arvif->beacon)->paddr, arvif->beacon->len, DMA_TO_DEVICE); if (WARN_ON(arvif->beacon_state != ATH10K_BEACON_SCHEDULED && arvif->beacon_state != ATH10K_BEACON_SENT)) return; dev_kfree_skb_any(arvif->beacon); arvif->beacon = NULL; arvif->beacon_state = ATH10K_BEACON_SCHEDULED; } static void ath10k_mac_vif_beacon_cleanup(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->data_lock); ath10k_mac_vif_beacon_free(arvif); if (arvif->beacon_buf) { if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) kfree(arvif->beacon_buf); else dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, arvif->beacon_buf, arvif->beacon_paddr); arvif->beacon_buf = NULL; } } static inline int ath10k_vdev_setup_sync(struct ath10k *ar) { unsigned long time_left; lockdep_assert_held(&ar->conf_mutex); if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) return -ESHUTDOWN; time_left = wait_for_completion_timeout(&ar->vdev_setup_done, ATH10K_VDEV_SETUP_TIMEOUT_HZ); if (time_left == 0) return -ETIMEDOUT; return ar->last_wmi_vdev_start_status; } static inline int ath10k_vdev_delete_sync(struct ath10k *ar) { unsigned long time_left; lockdep_assert_held(&ar->conf_mutex); if (!test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) return 0; if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) return -ESHUTDOWN; time_left = wait_for_completion_timeout(&ar->vdev_delete_done, ATH10K_VDEV_DELETE_TIMEOUT_HZ); if (time_left == 0) return -ETIMEDOUT; return 0; } static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) { struct cfg80211_chan_def *chandef = NULL; struct ieee80211_channel *channel = NULL; struct wmi_vdev_start_request_arg arg = {}; int ret = 0; lockdep_assert_held(&ar->conf_mutex); ieee80211_iter_chan_contexts_atomic(ar->hw, ath10k_mac_get_any_chandef_iter, &chandef); if (WARN_ON_ONCE(!chandef)) return -ENOENT; channel = chandef->chan; arg.vdev_id = vdev_id; arg.channel.freq = channel->center_freq; arg.channel.band_center_freq1 = chandef->center_freq1; arg.channel.band_center_freq2 = chandef->center_freq2; /* TODO setup this dynamically, what in case we * don't have any vifs? */ arg.channel.mode = chan_to_phymode(chandef); arg.channel.chan_radar = !!(channel->flags & IEEE80211_CHAN_RADAR); arg.channel.min_power = 0; arg.channel.max_power = channel->max_power * 2; arg.channel.max_reg_power = channel->max_reg_power * 2; arg.channel.max_antenna_gain = channel->max_antenna_gain; reinit_completion(&ar->vdev_setup_done); reinit_completion(&ar->vdev_delete_done); ret = ath10k_wmi_vdev_start(ar, &arg); if (ret) { ath10k_warn(ar, "failed to request monitor vdev %i start: %d\n", vdev_id, ret); return ret; } ret = ath10k_vdev_setup_sync(ar); if (ret) { ath10k_warn(ar, "failed to synchronize setup for monitor vdev %i start: %d\n", vdev_id, ret); return ret; } ret = ath10k_wmi_vdev_up(ar, vdev_id, 0, ar->mac_addr); if (ret) { ath10k_warn(ar, "failed to put up monitor vdev %i: %d\n", vdev_id, ret); goto vdev_stop; } ar->monitor_vdev_id = vdev_id; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %i started\n", ar->monitor_vdev_id); return 0; vdev_stop: ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id); if (ret) ath10k_warn(ar, "failed to stop monitor vdev %i after start failure: %d\n", ar->monitor_vdev_id, ret); return ret; } static int ath10k_monitor_vdev_stop(struct ath10k *ar) { int ret = 0; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_wmi_vdev_down(ar, ar->monitor_vdev_id); if (ret) ath10k_warn(ar, "failed to put down monitor vdev %i: %d\n", ar->monitor_vdev_id, ret); reinit_completion(&ar->vdev_setup_done); reinit_completion(&ar->vdev_delete_done); ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id); if (ret) ath10k_warn(ar, "failed to request monitor vdev %i stop: %d\n", ar->monitor_vdev_id, ret); ret = ath10k_vdev_setup_sync(ar); if (ret) ath10k_warn(ar, "failed to synchronize monitor vdev %i stop: %d\n", ar->monitor_vdev_id, ret); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %i stopped\n", ar->monitor_vdev_id); return ret; } static int ath10k_monitor_vdev_create(struct ath10k *ar) { int bit, ret = 0; lockdep_assert_held(&ar->conf_mutex); if (ar->free_vdev_map == 0) { ath10k_warn(ar, "failed to find free vdev id for monitor vdev\n"); return -ENOMEM; } bit = __ffs64(ar->free_vdev_map); ar->monitor_vdev_id = bit; ret = ath10k_wmi_vdev_create(ar, ar->monitor_vdev_id, WMI_VDEV_TYPE_MONITOR, 0, ar->mac_addr); if (ret) { ath10k_warn(ar, "failed to request monitor vdev %i creation: %d\n", ar->monitor_vdev_id, ret); return ret; } ar->free_vdev_map &= ~(1LL << ar->monitor_vdev_id); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %d created\n", ar->monitor_vdev_id); return 0; } static int ath10k_monitor_vdev_delete(struct ath10k *ar) { int ret = 0; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_wmi_vdev_delete(ar, ar->monitor_vdev_id); if (ret) { ath10k_warn(ar, "failed to request wmi monitor vdev %i removal: %d\n", ar->monitor_vdev_id, ret); return ret; } ar->free_vdev_map |= 1LL << ar->monitor_vdev_id; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %d deleted\n", ar->monitor_vdev_id); return ret; } static int ath10k_monitor_start(struct ath10k *ar) { int ret; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_monitor_vdev_create(ar); if (ret) { ath10k_warn(ar, "failed to create monitor vdev: %d\n", ret); return ret; } ret = ath10k_monitor_vdev_start(ar, ar->monitor_vdev_id); if (ret) { ath10k_warn(ar, "failed to start monitor vdev: %d\n", ret); ath10k_monitor_vdev_delete(ar); return ret; } ar->monitor_started = true; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor started\n"); return 0; } static int ath10k_monitor_stop(struct ath10k *ar) { int ret; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_monitor_vdev_stop(ar); if (ret) { ath10k_warn(ar, "failed to stop monitor vdev: %d\n", ret); return ret; } ret = ath10k_monitor_vdev_delete(ar); if (ret) { ath10k_warn(ar, "failed to delete monitor vdev: %d\n", ret); return ret; } ar->monitor_started = false; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor stopped\n"); return 0; } static bool ath10k_mac_monitor_vdev_is_needed(struct ath10k *ar) { int num_ctx; /* At least one chanctx is required to derive a channel to start * monitor vdev on. */ num_ctx = ath10k_mac_num_chanctxs(ar); if (num_ctx == 0) return false; /* If there's already an existing special monitor interface then don't * bother creating another monitor vdev. */ if (ar->monitor_arvif) return false; return ar->monitor || (!test_bit(ATH10K_FW_FEATURE_ALLOWS_MESH_BCAST, ar->running_fw->fw_file.fw_features) && (ar->filter_flags & (FIF_OTHER_BSS | FIF_MCAST_ACTION))) || test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); } static bool ath10k_mac_monitor_vdev_is_allowed(struct ath10k *ar) { int num_ctx; num_ctx = ath10k_mac_num_chanctxs(ar); /* FIXME: Current interface combinations and cfg80211/mac80211 code * shouldn't allow this but make sure to prevent handling the following * case anyway since multi-channel DFS hasn't been tested at all. */ if (test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags) && num_ctx > 1) return false; return true; } static int ath10k_monitor_recalc(struct ath10k *ar) { bool needed; bool allowed; int ret; lockdep_assert_held(&ar->conf_mutex); needed = ath10k_mac_monitor_vdev_is_needed(ar); allowed = ath10k_mac_monitor_vdev_is_allowed(ar); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor recalc started? %d needed? %d allowed? %d\n", ar->monitor_started, needed, allowed); if (WARN_ON(needed && !allowed)) { if (ar->monitor_started) { ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor stopping disallowed monitor\n"); ret = ath10k_monitor_stop(ar); if (ret) ath10k_warn(ar, "failed to stop disallowed monitor: %d\n", ret); /* not serious */ } return -EPERM; } if (needed == ar->monitor_started) return 0; if (needed) return ath10k_monitor_start(ar); else return ath10k_monitor_stop(ar); } static bool ath10k_mac_can_set_cts_prot(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->conf_mutex); if (!arvif->is_started) { ath10k_dbg(ar, ATH10K_DBG_MAC, "defer cts setup, vdev is not ready yet\n"); return false; } return true; } static int ath10k_mac_set_cts_prot(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; u32 vdev_param; lockdep_assert_held(&ar->conf_mutex); vdev_param = ar->wmi.vdev_param->protection_mode; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d cts_protection %d\n", arvif->vdev_id, arvif->use_cts_prot); return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, arvif->use_cts_prot ? 1 : 0); } static int ath10k_recalc_rtscts_prot(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; u32 vdev_param, rts_cts = 0; lockdep_assert_held(&ar->conf_mutex); vdev_param = ar->wmi.vdev_param->enable_rtscts; rts_cts |= SM(WMI_RTSCTS_ENABLED, WMI_RTSCTS_SET); if (arvif->num_legacy_stations > 0) rts_cts |= SM(WMI_RTSCTS_ACROSS_SW_RETRIES, WMI_RTSCTS_PROFILE); else rts_cts |= SM(WMI_RTSCTS_FOR_SECOND_RATESERIES, WMI_RTSCTS_PROFILE); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d recalc rts/cts prot %d\n", arvif->vdev_id, rts_cts); return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, rts_cts); } static int ath10k_start_cac(struct ath10k *ar) { int ret; lockdep_assert_held(&ar->conf_mutex); set_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); ret = ath10k_monitor_recalc(ar); if (ret) { ath10k_warn(ar, "failed to start monitor (cac): %d\n", ret); clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); return ret; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac cac start monitor vdev %d\n", ar->monitor_vdev_id); return 0; } static int ath10k_stop_cac(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); /* CAC is not running - do nothing */ if (!test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags)) return 0; clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); ath10k_monitor_stop(ar); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac cac finished\n"); return 0; } static void ath10k_mac_has_radar_iter(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *conf, void *data) { bool *ret = data; if (!*ret && conf->radar_enabled) *ret = true; } static bool ath10k_mac_has_radar_enabled(struct ath10k *ar) { bool has_radar = false; ieee80211_iter_chan_contexts_atomic(ar->hw, ath10k_mac_has_radar_iter, &has_radar); return has_radar; } static void ath10k_recalc_radar_detection(struct ath10k *ar) { int ret; lockdep_assert_held(&ar->conf_mutex); ath10k_stop_cac(ar); if (!ath10k_mac_has_radar_enabled(ar)) return; if (ar->num_started_vdevs > 0) return; ret = ath10k_start_cac(ar); if (ret) { /* * Not possible to start CAC on current channel so starting * radiation is not allowed, make this channel DFS_UNAVAILABLE * by indicating that radar was detected. */ ath10k_warn(ar, "failed to start CAC: %d\n", ret); ieee80211_radar_detected(ar->hw, NULL); } } static int ath10k_vdev_stop(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; int ret; lockdep_assert_held(&ar->conf_mutex); reinit_completion(&ar->vdev_setup_done); reinit_completion(&ar->vdev_delete_done); ret = ath10k_wmi_vdev_stop(ar, arvif->vdev_id); if (ret) { ath10k_warn(ar, "failed to stop WMI vdev %i: %d\n", arvif->vdev_id, ret); return ret; } ret = ath10k_vdev_setup_sync(ar); if (ret) { ath10k_warn(ar, "failed to synchronize setup for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } WARN_ON(ar->num_started_vdevs == 0); if (ar->num_started_vdevs != 0) { ar->num_started_vdevs--; ath10k_recalc_radar_detection(ar); } return ret; } static int ath10k_vdev_start_restart(struct ath10k_vif *arvif, const struct cfg80211_chan_def *chandef, bool restart) { struct ath10k *ar = arvif->ar; struct wmi_vdev_start_request_arg arg = {}; int ret = 0; lockdep_assert_held(&ar->conf_mutex); reinit_completion(&ar->vdev_setup_done); reinit_completion(&ar->vdev_delete_done); arg.vdev_id = arvif->vdev_id; arg.dtim_period = arvif->dtim_period; arg.bcn_intval = arvif->beacon_interval; arg.channel.freq = chandef->chan->center_freq; arg.channel.band_center_freq1 = chandef->center_freq1; arg.channel.band_center_freq2 = chandef->center_freq2; arg.channel.mode = chan_to_phymode(chandef); arg.channel.min_power = 0; arg.channel.max_power = chandef->chan->max_power * 2; arg.channel.max_reg_power = chandef->chan->max_reg_power * 2; arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain; if (arvif->vdev_type == WMI_VDEV_TYPE_AP) { arg.ssid = arvif->u.ap.ssid; arg.ssid_len = arvif->u.ap.ssid_len; arg.hidden_ssid = arvif->u.ap.hidden_ssid; /* For now allow DFS for AP mode */ arg.channel.chan_radar = !!(chandef->chan->flags & IEEE80211_CHAN_RADAR); } else if (arvif->vdev_type == WMI_VDEV_TYPE_IBSS) { arg.ssid = arvif->vif->cfg.ssid; arg.ssid_len = arvif->vif->cfg.ssid_len; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d start center_freq %d phymode %s\n", arg.vdev_id, arg.channel.freq, ath10k_wmi_phymode_str(arg.channel.mode)); if (restart) ret = ath10k_wmi_vdev_restart(ar, &arg); else ret = ath10k_wmi_vdev_start(ar, &arg); if (ret) { ath10k_warn(ar, "failed to start WMI vdev %i: %d\n", arg.vdev_id, ret); return ret; } ret = ath10k_vdev_setup_sync(ar); if (ret) { ath10k_warn(ar, "failed to synchronize setup for vdev %i restart %d: %d\n", arg.vdev_id, restart, ret); return ret; } ar->num_started_vdevs++; ath10k_recalc_radar_detection(ar); return ret; } static int ath10k_vdev_start(struct ath10k_vif *arvif, const struct cfg80211_chan_def *def) { return ath10k_vdev_start_restart(arvif, def, false); } static int ath10k_vdev_restart(struct ath10k_vif *arvif, const struct cfg80211_chan_def *def) { return ath10k_vdev_start_restart(arvif, def, true); } static int ath10k_mac_setup_bcn_p2p_ie(struct ath10k_vif *arvif, struct sk_buff *bcn) { struct ath10k *ar = arvif->ar; struct ieee80211_mgmt *mgmt; const u8 *p2p_ie; int ret; if (arvif->vif->type != NL80211_IFTYPE_AP || !arvif->vif->p2p) return 0; mgmt = (void *)bcn->data; p2p_ie = cfg80211_find_vendor_ie(WLAN_OUI_WFA, WLAN_OUI_TYPE_WFA_P2P, mgmt->u.beacon.variable, bcn->len - (mgmt->u.beacon.variable - bcn->data)); if (!p2p_ie) return -ENOENT; ret = ath10k_wmi_p2p_go_bcn_ie(ar, arvif->vdev_id, p2p_ie); if (ret) { ath10k_warn(ar, "failed to submit p2p go bcn ie for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } return 0; } static int ath10k_mac_remove_vendor_ie(struct sk_buff *skb, unsigned int oui, u8 oui_type, size_t ie_offset) { size_t len; const u8 *next; const u8 *end; u8 *ie; if (WARN_ON(skb->len < ie_offset)) return -EINVAL; ie = (u8 *)cfg80211_find_vendor_ie(oui, oui_type, skb->data + ie_offset, skb->len - ie_offset); if (!ie) return -ENOENT; len = ie[1] + 2; end = skb->data + skb->len; next = ie + len; if (WARN_ON(next > end)) return -EINVAL; memmove(ie, next, end - next); skb_trim(skb, skb->len - len); return 0; } static int ath10k_mac_setup_bcn_tmpl(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; struct ieee80211_hw *hw = ar->hw; struct ieee80211_vif *vif = arvif->vif; struct ieee80211_mutable_offsets offs = {}; struct sk_buff *bcn; int ret; if (!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map)) return 0; if (arvif->vdev_type != WMI_VDEV_TYPE_AP && arvif->vdev_type != WMI_VDEV_TYPE_IBSS) return 0; bcn = ieee80211_beacon_get_template(hw, vif, &offs, 0); if (!bcn) { ath10k_warn(ar, "failed to get beacon template from mac80211\n"); return -EPERM; } ret = ath10k_mac_setup_bcn_p2p_ie(arvif, bcn); if (ret) { ath10k_warn(ar, "failed to setup p2p go bcn ie: %d\n", ret); kfree_skb(bcn); return ret; } /* P2P IE is inserted by firmware automatically (as configured above) * so remove it from the base beacon template to avoid duplicate P2P * IEs in beacon frames. */ ath10k_mac_remove_vendor_ie(bcn, WLAN_OUI_WFA, WLAN_OUI_TYPE_WFA_P2P, offsetof(struct ieee80211_mgmt, u.beacon.variable)); ret = ath10k_wmi_bcn_tmpl(ar, arvif->vdev_id, offs.tim_offset, bcn, 0, 0, NULL, 0); kfree_skb(bcn); if (ret) { ath10k_warn(ar, "failed to submit beacon template command: %d\n", ret); return ret; } return 0; } static int ath10k_mac_setup_prb_tmpl(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; struct ieee80211_hw *hw = ar->hw; struct ieee80211_vif *vif = arvif->vif; struct sk_buff *prb; int ret; if (!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map)) return 0; if (arvif->vdev_type != WMI_VDEV_TYPE_AP) return 0; /* For mesh, probe response and beacon share the same template */ if (ieee80211_vif_is_mesh(vif)) return 0; prb = ieee80211_proberesp_get(hw, vif); if (!prb) { ath10k_warn(ar, "failed to get probe resp template from mac80211\n"); return -EPERM; } ret = ath10k_wmi_prb_tmpl(ar, arvif->vdev_id, prb); kfree_skb(prb); if (ret) { ath10k_warn(ar, "failed to submit probe resp template command: %d\n", ret); return ret; } return 0; } static int ath10k_mac_vif_fix_hidden_ssid(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; struct cfg80211_chan_def def; int ret; /* When originally vdev is started during assign_vif_chanctx() some * information is missing, notably SSID. Firmware revisions with beacon * offloading require the SSID to be provided during vdev (re)start to * handle hidden SSID properly. * * Vdev restart must be done after vdev has been both started and * upped. Otherwise some firmware revisions (at least 10.2) fail to * deliver vdev restart response event causing timeouts during vdev * syncing in ath10k. * * Note: The vdev down/up and template reinstallation could be skipped * since only wmi-tlv firmware are known to have beacon offload and * wmi-tlv doesn't seem to misbehave like 10.2 wrt vdev restart * response delivery. It's probably more robust to keep it as is. */ if (!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map)) return 0; if (WARN_ON(!arvif->is_started)) return -EINVAL; if (WARN_ON(!arvif->is_up)) return -EINVAL; if (WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def))) return -EINVAL; ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) { ath10k_warn(ar, "failed to bring down ap vdev %i: %d\n", arvif->vdev_id, ret); return ret; } /* Vdev down reset beacon & presp templates. Reinstall them. Otherwise * firmware will crash upon vdev up. */ ret = ath10k_mac_setup_bcn_tmpl(arvif); if (ret) { ath10k_warn(ar, "failed to update beacon template: %d\n", ret); return ret; } ret = ath10k_mac_setup_prb_tmpl(arvif); if (ret) { ath10k_warn(ar, "failed to update presp template: %d\n", ret); return ret; } ret = ath10k_vdev_restart(arvif, &def); if (ret) { ath10k_warn(ar, "failed to restart ap vdev %i: %d\n", arvif->vdev_id, ret); return ret; } ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid, arvif->bssid); if (ret) { ath10k_warn(ar, "failed to bring up ap vdev %i: %d\n", arvif->vdev_id, ret); return ret; } return 0; } static void ath10k_control_beaconing(struct ath10k_vif *arvif, struct ieee80211_bss_conf *info) { struct ath10k *ar = arvif->ar; int ret = 0; lockdep_assert_held(&arvif->ar->conf_mutex); if (!info->enable_beacon) { ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) ath10k_warn(ar, "failed to down vdev_id %i: %d\n", arvif->vdev_id, ret); arvif->is_up = false; spin_lock_bh(&arvif->ar->data_lock); ath10k_mac_vif_beacon_free(arvif); spin_unlock_bh(&arvif->ar->data_lock); return; } arvif->tx_seq_no = 0x1000; arvif->aid = 0; ether_addr_copy(arvif->bssid, info->bssid); ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid, arvif->bssid); if (ret) { ath10k_warn(ar, "failed to bring up vdev %d: %i\n", arvif->vdev_id, ret); return; } arvif->is_up = true; ret = ath10k_mac_vif_fix_hidden_ssid(arvif); if (ret) { ath10k_warn(ar, "failed to fix hidden ssid for vdev %i, expect trouble: %d\n", arvif->vdev_id, ret); return; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d up\n", arvif->vdev_id); } static void ath10k_control_ibss(struct ath10k_vif *arvif, struct ieee80211_vif *vif) { struct ath10k *ar = arvif->ar; u32 vdev_param; int ret = 0; lockdep_assert_held(&arvif->ar->conf_mutex); if (!vif->cfg.ibss_joined) { if (is_zero_ether_addr(arvif->bssid)) return; eth_zero_addr(arvif->bssid); return; } vdev_param = arvif->ar->wmi.vdev_param->atim_window; ret = ath10k_wmi_vdev_set_param(arvif->ar, arvif->vdev_id, vdev_param, ATH10K_DEFAULT_ATIM); if (ret) ath10k_warn(ar, "failed to set IBSS ATIM for vdev %d: %d\n", arvif->vdev_id, ret); } static int ath10k_mac_vif_recalc_ps_wake_threshold(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; u32 param; u32 value; int ret; lockdep_assert_held(&arvif->ar->conf_mutex); if (arvif->u.sta.uapsd) value = WMI_STA_PS_TX_WAKE_THRESHOLD_NEVER; else value = WMI_STA_PS_TX_WAKE_THRESHOLD_ALWAYS; param = WMI_STA_PS_PARAM_TX_WAKE_THRESHOLD; ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, value); if (ret) { ath10k_warn(ar, "failed to submit ps wake threshold %u on vdev %i: %d\n", value, arvif->vdev_id, ret); return ret; } return 0; } static int ath10k_mac_vif_recalc_ps_poll_count(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; u32 param; u32 value; int ret; lockdep_assert_held(&arvif->ar->conf_mutex); if (arvif->u.sta.uapsd) value = WMI_STA_PS_PSPOLL_COUNT_UAPSD; else value = WMI_STA_PS_PSPOLL_COUNT_NO_MAX; param = WMI_STA_PS_PARAM_PSPOLL_COUNT; ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, value); if (ret) { ath10k_warn(ar, "failed to submit ps poll count %u on vdev %i: %d\n", value, arvif->vdev_id, ret); return ret; } return 0; } static int ath10k_mac_num_vifs_started(struct ath10k *ar) { struct ath10k_vif *arvif; int num = 0; lockdep_assert_held(&ar->conf_mutex); list_for_each_entry(arvif, &ar->arvifs, list) if (arvif->is_started) num++; return num; } static int ath10k_mac_vif_setup_ps(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; struct ieee80211_vif *vif = arvif->vif; struct ieee80211_conf *conf = &ar->hw->conf; enum wmi_sta_powersave_param param; enum wmi_sta_ps_mode psmode; int ret; int ps_timeout; bool enable_ps; lockdep_assert_held(&arvif->ar->conf_mutex); if (arvif->vif->type != NL80211_IFTYPE_STATION) return 0; enable_ps = arvif->ps; if (enable_ps && ath10k_mac_num_vifs_started(ar) > 1 && !test_bit(ATH10K_FW_FEATURE_MULTI_VIF_PS_SUPPORT, ar->running_fw->fw_file.fw_features)) { ath10k_warn(ar, "refusing to enable ps on vdev %i: not supported by fw\n", arvif->vdev_id); enable_ps = false; } if (!arvif->is_started) { /* mac80211 can update vif powersave state while disconnected. * Firmware doesn't behave nicely and consumes more power than * necessary if PS is disabled on a non-started vdev. Hence * force-enable PS for non-running vdevs. */ psmode = WMI_STA_PS_MODE_ENABLED; } else if (enable_ps) { psmode = WMI_STA_PS_MODE_ENABLED; param = WMI_STA_PS_PARAM_INACTIVITY_TIME; ps_timeout = conf->dynamic_ps_timeout; if (ps_timeout == 0) { /* Firmware doesn't like 0 */ ps_timeout = ieee80211_tu_to_usec( vif->bss_conf.beacon_int) / 1000; } ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, ps_timeout); if (ret) { ath10k_warn(ar, "failed to set inactivity time for vdev %d: %i\n", arvif->vdev_id, ret); return ret; } } else { psmode = WMI_STA_PS_MODE_DISABLED; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d psmode %s\n", arvif->vdev_id, psmode ? "enable" : "disable"); ret = ath10k_wmi_set_psmode(ar, arvif->vdev_id, psmode); if (ret) { ath10k_warn(ar, "failed to set PS Mode %d for vdev %d: %d\n", psmode, arvif->vdev_id, ret); return ret; } return 0; } static int ath10k_mac_vif_disable_keepalive(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; struct wmi_sta_keepalive_arg arg = {}; int ret; lockdep_assert_held(&arvif->ar->conf_mutex); if (arvif->vdev_type != WMI_VDEV_TYPE_STA) return 0; if (!test_bit(WMI_SERVICE_STA_KEEP_ALIVE, ar->wmi.svc_map)) return 0; /* Some firmware revisions have a bug and ignore the `enabled` field. * Instead use the interval to disable the keepalive. */ arg.vdev_id = arvif->vdev_id; arg.enabled = 1; arg.method = WMI_STA_KEEPALIVE_METHOD_NULL_FRAME; arg.interval = WMI_STA_KEEPALIVE_INTERVAL_DISABLE; ret = ath10k_wmi_sta_keepalive(ar, &arg); if (ret) { ath10k_warn(ar, "failed to submit keepalive on vdev %i: %d\n", arvif->vdev_id, ret); return ret; } return 0; } static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; struct ieee80211_vif *vif = arvif->vif; int ret; lockdep_assert_held(&arvif->ar->conf_mutex); if (WARN_ON(!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map))) return; if (arvif->vdev_type != WMI_VDEV_TYPE_AP) return; if (!vif->bss_conf.csa_active) return; if (!arvif->is_up) return; if (!ieee80211_beacon_cntdwn_is_complete(vif, 0)) { ieee80211_beacon_update_cntdwn(vif, 0); ret = ath10k_mac_setup_bcn_tmpl(arvif); if (ret) ath10k_warn(ar, "failed to update bcn tmpl during csa: %d\n", ret); ret = ath10k_mac_setup_prb_tmpl(arvif); if (ret) ath10k_warn(ar, "failed to update prb tmpl during csa: %d\n", ret); } else { ieee80211_csa_finish(vif, 0); } } static void ath10k_mac_vif_ap_csa_work(struct work_struct *work) { struct ath10k_vif *arvif = container_of(work, struct ath10k_vif, ap_csa_work); struct ath10k *ar = arvif->ar; mutex_lock(&ar->conf_mutex); ath10k_mac_vif_ap_csa_count_down(arvif); mutex_unlock(&ar->conf_mutex); } static void ath10k_mac_handle_beacon_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { struct sk_buff *skb = data; struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ath10k_vif *arvif = (void *)vif->drv_priv; if (vif->type != NL80211_IFTYPE_STATION) return; if (!ether_addr_equal(mgmt->bssid, vif->bss_conf.bssid)) return; cancel_delayed_work(&arvif->connection_loss_work); } void ath10k_mac_handle_beacon(struct ath10k *ar, struct sk_buff *skb) { ieee80211_iterate_active_interfaces_atomic(ar->hw, ATH10K_ITER_NORMAL_FLAGS, ath10k_mac_handle_beacon_iter, skb); } static void ath10k_mac_handle_beacon_miss_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { u32 *vdev_id = data; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k *ar = arvif->ar; struct ieee80211_hw *hw = ar->hw; if (arvif->vdev_id != *vdev_id) return; if (!arvif->is_up) return; ieee80211_beacon_loss(vif); /* Firmware doesn't report beacon loss events repeatedly. If AP probe * (done by mac80211) succeeds but beacons do not resume then it * doesn't make sense to continue operation. Queue connection loss work * which can be cancelled when beacon is received. */ ieee80211_queue_delayed_work(hw, &arvif->connection_loss_work, ATH10K_CONNECTION_LOSS_HZ); } void ath10k_mac_handle_beacon_miss(struct ath10k *ar, u32 vdev_id) { ieee80211_iterate_active_interfaces_atomic(ar->hw, ATH10K_ITER_NORMAL_FLAGS, ath10k_mac_handle_beacon_miss_iter, &vdev_id); } static void ath10k_mac_vif_sta_connection_loss_work(struct work_struct *work) { struct ath10k_vif *arvif = container_of(work, struct ath10k_vif, connection_loss_work.work); struct ieee80211_vif *vif = arvif->vif; if (!arvif->is_up) return; ieee80211_connection_loss(vif); } /**********************/ /* Station management */ /**********************/ static u32 ath10k_peer_assoc_h_listen_intval(struct ath10k *ar, struct ieee80211_vif *vif) { /* Some firmware revisions have unstable STA powersave when listen * interval is set too high (e.g. 5). The symptoms are firmware doesn't * generate NullFunc frames properly even if buffered frames have been * indicated in Beacon TIM. Firmware would seldom wake up to pull * buffered frames. Often pinging the device from AP would simply fail. * * As a workaround set it to 1. */ if (vif->type == NL80211_IFTYPE_STATION) return 1; return ar->hw->conf.listen_interval; } static void ath10k_peer_assoc_h_basic(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { struct ath10k_vif *arvif = (void *)vif->drv_priv; u32 aid; lockdep_assert_held(&ar->conf_mutex); if (vif->type == NL80211_IFTYPE_STATION) aid = vif->cfg.aid; else aid = sta->aid; ether_addr_copy(arg->addr, sta->addr); arg->vdev_id = arvif->vdev_id; arg->peer_aid = aid; arg->peer_flags |= arvif->ar->wmi.peer_flags->auth; arg->peer_listen_intval = ath10k_peer_assoc_h_listen_intval(ar, vif); arg->peer_num_spatial_streams = 1; arg->peer_caps = vif->bss_conf.assoc_capability; } static void ath10k_peer_assoc_h_crypto(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { struct ieee80211_bss_conf *info = &vif->bss_conf; struct cfg80211_chan_def def; struct cfg80211_bss *bss; const u8 *rsnie = NULL; const u8 *wpaie = NULL; lockdep_assert_held(&ar->conf_mutex); if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) return; bss = cfg80211_get_bss(ar->hw->wiphy, def.chan, info->bssid, vif->cfg.ssid_len ? vif->cfg.ssid : NULL, vif->cfg.ssid_len, IEEE80211_BSS_TYPE_ANY, IEEE80211_PRIVACY_ANY); if (bss) { const struct cfg80211_bss_ies *ies; rcu_read_lock(); rsnie = ieee80211_bss_get_ie(bss, WLAN_EID_RSN); ies = rcu_dereference(bss->ies); wpaie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WPA, ies->data, ies->len); rcu_read_unlock(); cfg80211_put_bss(ar->hw->wiphy, bss); } /* FIXME: base on RSN IE/WPA IE is a correct idea? */ if (rsnie || wpaie) { ath10k_dbg(ar, ATH10K_DBG_WMI, "%s: rsn ie found\n", __func__); arg->peer_flags |= ar->wmi.peer_flags->need_ptk_4_way; } if (wpaie) { ath10k_dbg(ar, ATH10K_DBG_WMI, "%s: wpa ie found\n", __func__); arg->peer_flags |= ar->wmi.peer_flags->need_gtk_2_way; } if (sta->mfp && test_bit(ATH10K_FW_FEATURE_MFP_SUPPORT, ar->running_fw->fw_file.fw_features)) { arg->peer_flags |= ar->wmi.peer_flags->pmf; } } static void ath10k_peer_assoc_h_rates(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct wmi_rate_set_arg *rateset = &arg->peer_legacy_rates; struct cfg80211_chan_def def; const struct ieee80211_supported_band *sband; const struct ieee80211_rate *rates; enum nl80211_band band; u32 ratemask; u8 rate; int i; lockdep_assert_held(&ar->conf_mutex); if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) return; band = def.chan->band; sband = ar->hw->wiphy->bands[band]; ratemask = sta->deflink.supp_rates[band]; ratemask &= arvif->bitrate_mask.control[band].legacy; rates = sband->bitrates; rateset->num_rates = 0; for (i = 0; i < 32; i++, ratemask >>= 1, rates++) { if (!(ratemask & 1)) continue; rate = ath10k_mac_bitrate_to_rate(rates->bitrate); rateset->rates[rateset->num_rates] = rate; rateset->num_rates++; } } static bool ath10k_peer_assoc_h_ht_masked(const u8 ht_mcs_mask[IEEE80211_HT_MCS_MASK_LEN]) { int nss; for (nss = 0; nss < IEEE80211_HT_MCS_MASK_LEN; nss++) if (ht_mcs_mask[nss]) return false; return true; } static bool ath10k_peer_assoc_h_vht_masked(const u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) { int nss; for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) if (vht_mcs_mask[nss]) return false; return true; } static void ath10k_peer_assoc_h_ht(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { const struct ieee80211_sta_ht_cap *ht_cap = &sta->deflink.ht_cap; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct cfg80211_chan_def def; enum nl80211_band band; const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; int i, n; u8 max_nss; u32 stbc; lockdep_assert_held(&ar->conf_mutex); if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) return; if (!ht_cap->ht_supported) return; band = def.chan->band; ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs; vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs; if (ath10k_peer_assoc_h_ht_masked(ht_mcs_mask) && ath10k_peer_assoc_h_vht_masked(vht_mcs_mask)) return; arg->peer_flags |= ar->wmi.peer_flags->ht; arg->peer_max_mpdu = (1 << (IEEE80211_HT_MAX_AMPDU_FACTOR + ht_cap->ampdu_factor)) - 1; arg->peer_mpdu_density = ath10k_parse_mpdudensity(ht_cap->ampdu_density); arg->peer_ht_caps = ht_cap->cap; arg->peer_rate_caps |= WMI_RC_HT_FLAG; if (ht_cap->cap & IEEE80211_HT_CAP_LDPC_CODING) arg->peer_flags |= ar->wmi.peer_flags->ldbc; if (sta->deflink.bandwidth >= IEEE80211_STA_RX_BW_40) { arg->peer_flags |= ar->wmi.peer_flags->bw40; arg->peer_rate_caps |= WMI_RC_CW40_FLAG; } if (arvif->bitrate_mask.control[band].gi != NL80211_TXRATE_FORCE_LGI) { if (ht_cap->cap & IEEE80211_HT_CAP_SGI_20) arg->peer_rate_caps |= WMI_RC_SGI_FLAG; if (ht_cap->cap & IEEE80211_HT_CAP_SGI_40) arg->peer_rate_caps |= WMI_RC_SGI_FLAG; } if (ht_cap->cap & IEEE80211_HT_CAP_TX_STBC) { arg->peer_rate_caps |= WMI_RC_TX_STBC_FLAG; arg->peer_flags |= ar->wmi.peer_flags->stbc; } if (ht_cap->cap & IEEE80211_HT_CAP_RX_STBC) { stbc = ht_cap->cap & IEEE80211_HT_CAP_RX_STBC; stbc = stbc >> IEEE80211_HT_CAP_RX_STBC_SHIFT; stbc = stbc << WMI_RC_RX_STBC_FLAG_S; arg->peer_rate_caps |= stbc; arg->peer_flags |= ar->wmi.peer_flags->stbc; } if (ht_cap->mcs.rx_mask[1] && ht_cap->mcs.rx_mask[2]) arg->peer_rate_caps |= WMI_RC_TS_FLAG; else if (ht_cap->mcs.rx_mask[1]) arg->peer_rate_caps |= WMI_RC_DS_FLAG; for (i = 0, n = 0, max_nss = 0; i < IEEE80211_HT_MCS_MASK_LEN * 8; i++) if ((ht_cap->mcs.rx_mask[i / 8] & BIT(i % 8)) && (ht_mcs_mask[i / 8] & BIT(i % 8))) { max_nss = (i / 8) + 1; arg->peer_ht_rates.rates[n++] = i; } /* * This is a workaround for HT-enabled STAs which break the spec * and have no HT capabilities RX mask (no HT RX MCS map). * * As per spec, in section 20.3.5 Modulation and coding scheme (MCS), * MCS 0 through 7 are mandatory in 20MHz with 800 ns GI at all STAs. * * Firmware asserts if such situation occurs. */ if (n == 0) { arg->peer_ht_rates.num_rates = 8; for (i = 0; i < arg->peer_ht_rates.num_rates; i++) arg->peer_ht_rates.rates[i] = i; } else { arg->peer_ht_rates.num_rates = n; arg->peer_num_spatial_streams = min(sta->deflink.rx_nss, max_nss); } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac ht peer %pM mcs cnt %d nss %d\n", arg->addr, arg->peer_ht_rates.num_rates, arg->peer_num_spatial_streams); } static int ath10k_peer_assoc_qos_ap(struct ath10k *ar, struct ath10k_vif *arvif, struct ieee80211_sta *sta) { u32 uapsd = 0; u32 max_sp = 0; int ret = 0; lockdep_assert_held(&ar->conf_mutex); if (sta->wme && sta->uapsd_queues) { ath10k_dbg(ar, ATH10K_DBG_MAC, "mac uapsd_queues 0x%x max_sp %d\n", sta->uapsd_queues, sta->max_sp); if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) uapsd |= WMI_AP_PS_UAPSD_AC3_DELIVERY_EN | WMI_AP_PS_UAPSD_AC3_TRIGGER_EN; if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI) uapsd |= WMI_AP_PS_UAPSD_AC2_DELIVERY_EN | WMI_AP_PS_UAPSD_AC2_TRIGGER_EN; if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) uapsd |= WMI_AP_PS_UAPSD_AC1_DELIVERY_EN | WMI_AP_PS_UAPSD_AC1_TRIGGER_EN; if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE) uapsd |= WMI_AP_PS_UAPSD_AC0_DELIVERY_EN | WMI_AP_PS_UAPSD_AC0_TRIGGER_EN; if (sta->max_sp < MAX_WMI_AP_PS_PEER_PARAM_MAX_SP) max_sp = sta->max_sp; ret = ath10k_wmi_set_ap_ps_param(ar, arvif->vdev_id, sta->addr, WMI_AP_PS_PEER_PARAM_UAPSD, uapsd); if (ret) { ath10k_warn(ar, "failed to set ap ps peer param uapsd for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } ret = ath10k_wmi_set_ap_ps_param(ar, arvif->vdev_id, sta->addr, WMI_AP_PS_PEER_PARAM_MAX_SP, max_sp); if (ret) { ath10k_warn(ar, "failed to set ap ps peer param max sp for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } /* TODO setup this based on STA listen interval and * beacon interval. Currently we don't know * sta->listen_interval - mac80211 patch required. * Currently use 10 seconds */ ret = ath10k_wmi_set_ap_ps_param(ar, arvif->vdev_id, sta->addr, WMI_AP_PS_PEER_PARAM_AGEOUT_TIME, 10); if (ret) { ath10k_warn(ar, "failed to set ap ps peer param ageout time for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } } return 0; } static u16 ath10k_peer_assoc_h_vht_limit(u16 tx_mcs_set, const u16 vht_mcs_limit[NL80211_VHT_NSS_MAX]) { int idx_limit; int nss; u16 mcs_map; u16 mcs; for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { mcs_map = ath10k_mac_get_max_vht_mcs_map(tx_mcs_set, nss) & vht_mcs_limit[nss]; if (mcs_map) idx_limit = fls(mcs_map) - 1; else idx_limit = -1; switch (idx_limit) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: default: /* see ath10k_mac_can_set_bitrate_mask() */ WARN_ON(1); fallthrough; case -1: mcs = IEEE80211_VHT_MCS_NOT_SUPPORTED; break; case 7: mcs = IEEE80211_VHT_MCS_SUPPORT_0_7; break; case 8: mcs = IEEE80211_VHT_MCS_SUPPORT_0_8; break; case 9: mcs = IEEE80211_VHT_MCS_SUPPORT_0_9; break; } tx_mcs_set &= ~(0x3 << (nss * 2)); tx_mcs_set |= mcs << (nss * 2); } return tx_mcs_set; } static u32 get_160mhz_nss_from_maxrate(int rate) { u32 nss; switch (rate) { case 780: nss = 1; break; case 1560: nss = 2; break; case 2106: nss = 3; /* not support MCS9 from spec*/ break; case 3120: nss = 4; break; default: nss = 1; } return nss; } static void ath10k_peer_assoc_h_vht(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { const struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_hw_params *hw = &ar->hw_params; struct cfg80211_chan_def def; enum nl80211_band band; const u16 *vht_mcs_mask; u8 ampdu_factor; u8 max_nss, vht_mcs; int i; if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) return; if (!vht_cap->vht_supported) return; band = def.chan->band; vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs; if (ath10k_peer_assoc_h_vht_masked(vht_mcs_mask)) return; arg->peer_flags |= ar->wmi.peer_flags->vht; if (def.chan->band == NL80211_BAND_2GHZ) arg->peer_flags |= ar->wmi.peer_flags->vht_2g; arg->peer_vht_caps = vht_cap->cap; ampdu_factor = (vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >> IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; /* Workaround: Some Netgear/Linksys 11ac APs set Rx A-MPDU factor to * zero in VHT IE. Using it would result in degraded throughput. * arg->peer_max_mpdu at this point contains HT max_mpdu so keep * it if VHT max_mpdu is smaller. */ arg->peer_max_mpdu = max(arg->peer_max_mpdu, (1U << (IEEE80211_HT_MAX_AMPDU_FACTOR + ampdu_factor)) - 1); if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) arg->peer_flags |= ar->wmi.peer_flags->bw80; if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) arg->peer_flags |= ar->wmi.peer_flags->bw160; /* Calculate peer NSS capability from VHT capabilities if STA * supports VHT. */ for (i = 0, max_nss = 0, vht_mcs = 0; i < NL80211_VHT_NSS_MAX; i++) { vht_mcs = __le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map) >> (2 * i) & 3; if ((vht_mcs != IEEE80211_VHT_MCS_NOT_SUPPORTED) && vht_mcs_mask[i]) max_nss = i + 1; } arg->peer_num_spatial_streams = min(sta->deflink.rx_nss, max_nss); arg->peer_vht_rates.rx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.rx_highest); arg->peer_vht_rates.rx_mcs_set = __le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); arg->peer_vht_rates.tx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.tx_highest); arg->peer_vht_rates.tx_mcs_set = ath10k_peer_assoc_h_vht_limit( __le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map), vht_mcs_mask); /* Configure bandwidth-NSS mapping to FW * for the chip's tx chains setting on 160Mhz bw */ if (arg->peer_phymode == MODE_11AC_VHT160 || arg->peer_phymode == MODE_11AC_VHT80_80) { u32 rx_nss; u32 max_rate; max_rate = arg->peer_vht_rates.rx_max_rate; rx_nss = get_160mhz_nss_from_maxrate(max_rate); if (rx_nss == 0) rx_nss = arg->peer_num_spatial_streams; else rx_nss = min(arg->peer_num_spatial_streams, rx_nss); max_rate = hw->vht160_mcs_tx_highest; rx_nss = min(rx_nss, get_160mhz_nss_from_maxrate(max_rate)); arg->peer_bw_rxnss_override = FIELD_PREP(WMI_PEER_NSS_MAP_ENABLE, 1) | FIELD_PREP(WMI_PEER_NSS_160MHZ_MASK, (rx_nss - 1)); if (arg->peer_phymode == MODE_11AC_VHT80_80) { arg->peer_bw_rxnss_override |= FIELD_PREP(WMI_PEER_NSS_80_80MHZ_MASK, (rx_nss - 1)); } } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vht peer %pM max_mpdu %d flags 0x%x peer_rx_nss_override 0x%x\n", sta->addr, arg->peer_max_mpdu, arg->peer_flags, arg->peer_bw_rxnss_override); } static void ath10k_peer_assoc_h_qos(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { struct ath10k_vif *arvif = (void *)vif->drv_priv; switch (arvif->vdev_type) { case WMI_VDEV_TYPE_AP: if (sta->wme) arg->peer_flags |= arvif->ar->wmi.peer_flags->qos; if (sta->wme && sta->uapsd_queues) { arg->peer_flags |= arvif->ar->wmi.peer_flags->apsd; arg->peer_rate_caps |= WMI_RC_UAPSD_FLAG; } break; case WMI_VDEV_TYPE_STA: if (sta->wme) arg->peer_flags |= arvif->ar->wmi.peer_flags->qos; break; case WMI_VDEV_TYPE_IBSS: if (sta->wme) arg->peer_flags |= arvif->ar->wmi.peer_flags->qos; break; default: break; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac peer %pM qos %d\n", sta->addr, !!(arg->peer_flags & arvif->ar->wmi.peer_flags->qos)); } static bool ath10k_mac_sta_has_ofdm_only(struct ieee80211_sta *sta) { return sta->deflink.supp_rates[NL80211_BAND_2GHZ] >> ATH10K_MAC_FIRST_OFDM_RATE_IDX; } static enum wmi_phy_mode ath10k_mac_get_phymode_vht(struct ath10k *ar, struct ieee80211_sta *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) { switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: return MODE_11AC_VHT160; case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: return MODE_11AC_VHT80_80; default: /* not sure if this is a valid case? */ return MODE_11AC_VHT160; } } if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) return MODE_11AC_VHT80; if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) return MODE_11AC_VHT40; if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_20) return MODE_11AC_VHT20; return MODE_UNKNOWN; } static void ath10k_peer_assoc_h_phymode(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct cfg80211_chan_def def; enum nl80211_band band; const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; enum wmi_phy_mode phymode = MODE_UNKNOWN; if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) return; band = def.chan->band; ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs; vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs; switch (band) { case NL80211_BAND_2GHZ: if (sta->deflink.vht_cap.vht_supported && !ath10k_peer_assoc_h_vht_masked(vht_mcs_mask)) { if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) phymode = MODE_11AC_VHT40; else phymode = MODE_11AC_VHT20; } else if (sta->deflink.ht_cap.ht_supported && !ath10k_peer_assoc_h_ht_masked(ht_mcs_mask)) { if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) phymode = MODE_11NG_HT40; else phymode = MODE_11NG_HT20; } else if (ath10k_mac_sta_has_ofdm_only(sta)) { phymode = MODE_11G; } else { phymode = MODE_11B; } break; case NL80211_BAND_5GHZ: /* * Check VHT first. */ if (sta->deflink.vht_cap.vht_supported && !ath10k_peer_assoc_h_vht_masked(vht_mcs_mask)) { phymode = ath10k_mac_get_phymode_vht(ar, sta); } else if (sta->deflink.ht_cap.ht_supported && !ath10k_peer_assoc_h_ht_masked(ht_mcs_mask)) { if (sta->deflink.bandwidth >= IEEE80211_STA_RX_BW_40) phymode = MODE_11NA_HT40; else phymode = MODE_11NA_HT20; } else { phymode = MODE_11A; } break; default: break; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac peer %pM phymode %s\n", sta->addr, ath10k_wmi_phymode_str(phymode)); arg->peer_phymode = phymode; WARN_ON(phymode == MODE_UNKNOWN); } static int ath10k_peer_assoc_prepare(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct wmi_peer_assoc_complete_arg *arg) { lockdep_assert_held(&ar->conf_mutex); memset(arg, 0, sizeof(*arg)); ath10k_peer_assoc_h_basic(ar, vif, sta, arg); ath10k_peer_assoc_h_crypto(ar, vif, sta, arg); ath10k_peer_assoc_h_rates(ar, vif, sta, arg); ath10k_peer_assoc_h_ht(ar, vif, sta, arg); ath10k_peer_assoc_h_phymode(ar, vif, sta, arg); ath10k_peer_assoc_h_vht(ar, vif, sta, arg); ath10k_peer_assoc_h_qos(ar, vif, sta, arg); return 0; } static const u32 ath10k_smps_map[] = { [WLAN_HT_CAP_SM_PS_STATIC] = WMI_PEER_SMPS_STATIC, [WLAN_HT_CAP_SM_PS_DYNAMIC] = WMI_PEER_SMPS_DYNAMIC, [WLAN_HT_CAP_SM_PS_INVALID] = WMI_PEER_SMPS_PS_NONE, [WLAN_HT_CAP_SM_PS_DISABLED] = WMI_PEER_SMPS_PS_NONE, }; static int ath10k_setup_peer_smps(struct ath10k *ar, struct ath10k_vif *arvif, const u8 *addr, const struct ieee80211_sta_ht_cap *ht_cap) { int smps; if (!ht_cap->ht_supported) return 0; smps = ht_cap->cap & IEEE80211_HT_CAP_SM_PS; smps >>= IEEE80211_HT_CAP_SM_PS_SHIFT; if (smps >= ARRAY_SIZE(ath10k_smps_map)) return -EINVAL; return ath10k_wmi_peer_set_param(ar, arvif->vdev_id, addr, ar->wmi.peer_param->smps_state, ath10k_smps_map[smps]); } static int ath10k_mac_vif_recalc_txbf(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta_vht_cap vht_cap) { struct ath10k_vif *arvif = (void *)vif->drv_priv; int ret; u32 param; u32 value; if (ath10k_wmi_get_txbf_conf_scheme(ar) != WMI_TXBF_CONF_AFTER_ASSOC) return 0; if (!(ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE | IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE))) return 0; param = ar->wmi.vdev_param->txbf; value = 0; if (WARN_ON(param == WMI_VDEV_PARAM_UNSUPPORTED)) return 0; /* The following logic is correct. If a remote STA advertises support * for being a beamformer then we should enable us being a beamformee. */ if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)) { if (vht_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFEE; if (vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) value |= WMI_VDEV_PARAM_TXBF_MU_TX_BFEE; } if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)) { if (vht_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFER; if (vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) value |= WMI_VDEV_PARAM_TXBF_MU_TX_BFER; } if (value & WMI_VDEV_PARAM_TXBF_MU_TX_BFEE) value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFEE; if (value & WMI_VDEV_PARAM_TXBF_MU_TX_BFER) value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFER; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param, value); if (ret) { ath10k_warn(ar, "failed to submit vdev param txbf 0x%x: %d\n", value, ret); return ret; } return 0; } static bool ath10k_mac_is_connected(struct ath10k *ar) { struct ath10k_vif *arvif; list_for_each_entry(arvif, &ar->arvifs, list) { if (arvif->is_up && arvif->vdev_type == WMI_VDEV_TYPE_STA) return true; } return false; } static int ath10k_mac_txpower_setup(struct ath10k *ar, int txpower) { int ret; u32 param; int tx_power_2g, tx_power_5g; bool connected; lockdep_assert_held(&ar->conf_mutex); /* ath10k internally uses unit of 0.5 dBm so multiply by 2 */ tx_power_2g = txpower * 2; tx_power_5g = txpower * 2; connected = ath10k_mac_is_connected(ar); if (connected && ar->tx_power_2g_limit) if (tx_power_2g > ar->tx_power_2g_limit) tx_power_2g = ar->tx_power_2g_limit; if (connected && ar->tx_power_5g_limit) if (tx_power_5g > ar->tx_power_5g_limit) tx_power_5g = ar->tx_power_5g_limit; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac txpower 2g: %d, 5g: %d\n", tx_power_2g, tx_power_5g); param = ar->wmi.pdev_param->txpower_limit2g; ret = ath10k_wmi_pdev_set_param(ar, param, tx_power_2g); if (ret) { ath10k_warn(ar, "failed to set 2g txpower %d: %d\n", tx_power_2g, ret); return ret; } param = ar->wmi.pdev_param->txpower_limit5g; ret = ath10k_wmi_pdev_set_param(ar, param, tx_power_5g); if (ret) { ath10k_warn(ar, "failed to set 5g txpower %d: %d\n", tx_power_5g, ret); return ret; } return 0; } static int ath10k_mac_txpower_recalc(struct ath10k *ar) { struct ath10k_vif *arvif; int ret, txpower = -1; lockdep_assert_held(&ar->conf_mutex); list_for_each_entry(arvif, &ar->arvifs, list) { /* txpower not initialized yet? */ if (arvif->txpower == INT_MIN) continue; if (txpower == -1) txpower = arvif->txpower; else txpower = min(txpower, arvif->txpower); } if (txpower == -1) return 0; ret = ath10k_mac_txpower_setup(ar, txpower); if (ret) { ath10k_warn(ar, "failed to setup tx power %d: %d\n", txpower, ret); return ret; } return 0; } static int ath10k_mac_set_sar_power(struct ath10k *ar) { if (!ar->hw_params.dynamic_sar_support) return -EOPNOTSUPP; if (!ath10k_mac_is_connected(ar)) return 0; /* if connected, then arvif->txpower must be valid */ return ath10k_mac_txpower_recalc(ar); } static int ath10k_mac_set_sar_specs(struct ieee80211_hw *hw, const struct cfg80211_sar_specs *sar) { const struct cfg80211_sar_sub_specs *sub_specs; struct ath10k *ar = hw->priv; u32 i; int ret; mutex_lock(&ar->conf_mutex); if (!ar->hw_params.dynamic_sar_support) { ret = -EOPNOTSUPP; goto err; } if (!sar || sar->type != NL80211_SAR_TYPE_POWER || sar->num_sub_specs == 0) { ret = -EINVAL; goto err; } sub_specs = sar->sub_specs; /* 0dbm is not a practical value for ath10k, so use 0 * as no SAR limitation on it. */ ar->tx_power_2g_limit = 0; ar->tx_power_5g_limit = 0; /* note the power is in 0.25dbm unit, while ath10k uses * 0.5dbm unit. */ for (i = 0; i < sar->num_sub_specs; i++) { if (sub_specs->freq_range_index == 0) ar->tx_power_2g_limit = sub_specs->power / 2; else if (sub_specs->freq_range_index == 1) ar->tx_power_5g_limit = sub_specs->power / 2; sub_specs++; } ret = ath10k_mac_set_sar_power(ar); if (ret) { ath10k_warn(ar, "failed to set sar power: %d", ret); goto err; } err: mutex_unlock(&ar->conf_mutex); return ret; } /* can be called only in mac80211 callbacks due to `key_count` usage */ static void ath10k_bss_assoc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *bss_conf) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; struct wmi_peer_assoc_complete_arg peer_arg; struct ieee80211_sta *ap_sta; int ret; lockdep_assert_held(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %i assoc bssid %pM aid %d\n", arvif->vdev_id, arvif->bssid, arvif->aid); rcu_read_lock(); ap_sta = ieee80211_find_sta(vif, bss_conf->bssid); if (!ap_sta) { ath10k_warn(ar, "failed to find station entry for bss %pM vdev %i\n", bss_conf->bssid, arvif->vdev_id); rcu_read_unlock(); return; } /* ap_sta must be accessed only within rcu section which must be left * before calling ath10k_setup_peer_smps() which might sleep. */ ht_cap = ap_sta->deflink.ht_cap; vht_cap = ap_sta->deflink.vht_cap; ret = ath10k_peer_assoc_prepare(ar, vif, ap_sta, &peer_arg); if (ret) { ath10k_warn(ar, "failed to prepare peer assoc for %pM vdev %i: %d\n", bss_conf->bssid, arvif->vdev_id, ret); rcu_read_unlock(); return; } rcu_read_unlock(); ret = ath10k_wmi_peer_assoc(ar, &peer_arg); if (ret) { ath10k_warn(ar, "failed to run peer assoc for %pM vdev %i: %d\n", bss_conf->bssid, arvif->vdev_id, ret); return; } ret = ath10k_setup_peer_smps(ar, arvif, bss_conf->bssid, &ht_cap); if (ret) { ath10k_warn(ar, "failed to setup peer SMPS for vdev %i: %d\n", arvif->vdev_id, ret); return; } ret = ath10k_mac_vif_recalc_txbf(ar, vif, vht_cap); if (ret) { ath10k_warn(ar, "failed to recalc txbf for vdev %i on bss %pM: %d\n", arvif->vdev_id, bss_conf->bssid, ret); return; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d up (associated) bssid %pM aid %d\n", arvif->vdev_id, bss_conf->bssid, vif->cfg.aid); WARN_ON(arvif->is_up); arvif->aid = vif->cfg.aid; ether_addr_copy(arvif->bssid, bss_conf->bssid); ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->peer_stats_info_enable, 1); if (ret) ath10k_warn(ar, "failed to enable peer stats info: %d\n", ret); ret = ath10k_wmi_vdev_up(ar, arvif->vdev_id, arvif->aid, arvif->bssid); if (ret) { ath10k_warn(ar, "failed to set vdev %d up: %d\n", arvif->vdev_id, ret); return; } arvif->is_up = true; ath10k_mac_set_sar_power(ar); /* Workaround: Some firmware revisions (tested with qca6174 * WLAN.RM.2.0-00073) have buggy powersave state machine and must be * poked with peer param command. */ ret = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, arvif->bssid, ar->wmi.peer_param->dummy_var, 1); if (ret) { ath10k_warn(ar, "failed to poke peer %pM param for ps workaround on vdev %i: %d\n", arvif->bssid, arvif->vdev_id, ret); return; } } static void ath10k_bss_disassoc(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ieee80211_sta_vht_cap vht_cap = {}; int ret; lockdep_assert_held(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %i disassoc bssid %pM\n", arvif->vdev_id, arvif->bssid); ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) ath10k_warn(ar, "failed to down vdev %i: %d\n", arvif->vdev_id, ret); arvif->def_wep_key_idx = -1; ret = ath10k_mac_vif_recalc_txbf(ar, vif, vht_cap); if (ret) { ath10k_warn(ar, "failed to recalc txbf for vdev %i: %d\n", arvif->vdev_id, ret); return; } arvif->is_up = false; ath10k_mac_txpower_recalc(ar); cancel_delayed_work_sync(&arvif->connection_loss_work); } static int ath10k_new_peer_tid_config(struct ath10k *ar, struct ieee80211_sta *sta, struct ath10k_vif *arvif) { struct wmi_per_peer_per_tid_cfg_arg arg = {}; struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; bool config_apply; int ret, i; for (i = 0; i < ATH10K_TID_MAX; i++) { config_apply = false; if (arvif->retry_long[i] || arvif->ampdu[i] || arvif->rate_ctrl[i] || arvif->rtscts[i]) { config_apply = true; arg.tid = i; arg.vdev_id = arvif->vdev_id; arg.retry_count = arvif->retry_long[i]; arg.aggr_control = arvif->ampdu[i]; arg.rate_ctrl = arvif->rate_ctrl[i]; arg.rcode_flags = arvif->rate_code[i]; if (arvif->rtscts[i]) arg.ext_tid_cfg_bitmap = WMI_EXT_TID_RTS_CTS_CONFIG; else arg.ext_tid_cfg_bitmap = 0; arg.rtscts_ctrl = arvif->rtscts[i]; } if (arvif->noack[i]) { arg.ack_policy = arvif->noack[i]; arg.rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_DEFAULT_LOWEST_RATE; arg.aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_DISABLE; config_apply = true; } /* Assign default value(-1) to newly connected station. * This is to identify station specific tid configuration not * configured for the station. */ arsta->retry_long[i] = -1; arsta->noack[i] = -1; arsta->ampdu[i] = -1; if (!config_apply) continue; ether_addr_copy(arg.peer_macaddr.addr, sta->addr); ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, &arg); if (ret) { ath10k_warn(ar, "failed to set per tid retry/aggr config for sta %pM: %d\n", sta->addr, ret); return ret; } memset(&arg, 0, sizeof(arg)); } return 0; } static int ath10k_station_assoc(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, bool reassoc) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct wmi_peer_assoc_complete_arg peer_arg; int ret = 0; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_peer_assoc_prepare(ar, vif, sta, &peer_arg); if (ret) { ath10k_warn(ar, "failed to prepare WMI peer assoc for %pM vdev %i: %i\n", sta->addr, arvif->vdev_id, ret); return ret; } ret = ath10k_wmi_peer_assoc(ar, &peer_arg); if (ret) { ath10k_warn(ar, "failed to run peer assoc for STA %pM vdev %i: %d\n", sta->addr, arvif->vdev_id, ret); return ret; } /* Re-assoc is run only to update supported rates for given station. It * doesn't make much sense to reconfigure the peer completely. */ if (!reassoc) { ret = ath10k_setup_peer_smps(ar, arvif, sta->addr, &sta->deflink.ht_cap); if (ret) { ath10k_warn(ar, "failed to setup peer SMPS for vdev %d: %d\n", arvif->vdev_id, ret); return ret; } ret = ath10k_peer_assoc_qos_ap(ar, arvif, sta); if (ret) { ath10k_warn(ar, "failed to set qos params for STA %pM for vdev %i: %d\n", sta->addr, arvif->vdev_id, ret); return ret; } if (!sta->wme) { arvif->num_legacy_stations++; ret = ath10k_recalc_rtscts_prot(arvif); if (ret) { ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n", arvif->vdev_id, ret); return ret; } } /* Plumb cached keys only for static WEP */ if ((arvif->def_wep_key_idx != -1) && (!sta->tdls)) { ret = ath10k_install_peer_wep_keys(arvif, sta->addr); if (ret) { ath10k_warn(ar, "failed to install peer wep keys for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } } } if (!test_bit(WMI_SERVICE_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map)) return ret; return ath10k_new_peer_tid_config(ar, sta, arvif); } static int ath10k_station_disassoc(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct ath10k_vif *arvif = (void *)vif->drv_priv; int ret = 0; lockdep_assert_held(&ar->conf_mutex); if (!sta->wme) { arvif->num_legacy_stations--; ret = ath10k_recalc_rtscts_prot(arvif); if (ret) { ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n", arvif->vdev_id, ret); return ret; } } ret = ath10k_clear_peer_keys(arvif, sta->addr); if (ret) { ath10k_warn(ar, "failed to clear all peer wep keys for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } return ret; } /**************/ /* Regulatory */ /**************/ static int ath10k_update_channel_list(struct ath10k *ar) { struct ieee80211_hw *hw = ar->hw; struct ieee80211_supported_band **bands; enum nl80211_band band; struct ieee80211_channel *channel; struct wmi_scan_chan_list_arg arg = {}; struct wmi_channel_arg *ch; bool passive; int len; int ret; int i; lockdep_assert_held(&ar->conf_mutex); bands = hw->wiphy->bands; for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!bands[band]) continue; for (i = 0; i < bands[band]->n_channels; i++) { if (bands[band]->channels[i].flags & IEEE80211_CHAN_DISABLED) continue; arg.n_channels++; } } len = sizeof(struct wmi_channel_arg) * arg.n_channels; arg.channels = kzalloc(len, GFP_KERNEL); if (!arg.channels) return -ENOMEM; ch = arg.channels; for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!bands[band]) continue; for (i = 0; i < bands[band]->n_channels; i++) { channel = &bands[band]->channels[i]; if (channel->flags & IEEE80211_CHAN_DISABLED) continue; ch->allow_ht = true; /* FIXME: when should we really allow VHT? */ ch->allow_vht = true; ch->allow_ibss = !(channel->flags & IEEE80211_CHAN_NO_IR); ch->ht40plus = !(channel->flags & IEEE80211_CHAN_NO_HT40PLUS); ch->chan_radar = !!(channel->flags & IEEE80211_CHAN_RADAR); passive = channel->flags & IEEE80211_CHAN_NO_IR; ch->passive = passive; /* the firmware is ignoring the "radar" flag of the * channel and is scanning actively using Probe Requests * on "Radar detection"/DFS channels which are not * marked as "available" */ ch->passive |= ch->chan_radar; ch->freq = channel->center_freq; ch->band_center_freq1 = channel->center_freq; ch->min_power = 0; ch->max_power = channel->max_power * 2; ch->max_reg_power = channel->max_reg_power * 2; ch->max_antenna_gain = channel->max_antenna_gain; ch->reg_class_id = 0; /* FIXME */ /* FIXME: why use only legacy modes, why not any * HT/VHT modes? Would that even make any * difference? */ if (channel->band == NL80211_BAND_2GHZ) ch->mode = MODE_11G; else ch->mode = MODE_11A; if (WARN_ON_ONCE(ch->mode == MODE_UNKNOWN)) continue; ath10k_dbg(ar, ATH10K_DBG_WMI, "mac channel [%zd/%d] freq %d maxpower %d regpower %d antenna %d mode %d\n", ch - arg.channels, arg.n_channels, ch->freq, ch->max_power, ch->max_reg_power, ch->max_antenna_gain, ch->mode); ch++; } } ret = ath10k_wmi_scan_chan_list(ar, &arg); kfree(arg.channels); return ret; } static enum wmi_dfs_region ath10k_mac_get_dfs_region(enum nl80211_dfs_regions dfs_region) { switch (dfs_region) { case NL80211_DFS_UNSET: return WMI_UNINIT_DFS_DOMAIN; case NL80211_DFS_FCC: return WMI_FCC_DFS_DOMAIN; case NL80211_DFS_ETSI: return WMI_ETSI_DFS_DOMAIN; case NL80211_DFS_JP: return WMI_MKK4_DFS_DOMAIN; } return WMI_UNINIT_DFS_DOMAIN; } static void ath10k_regd_update(struct ath10k *ar) { struct reg_dmn_pair_mapping *regpair; int ret; enum wmi_dfs_region wmi_dfs_reg; enum nl80211_dfs_regions nl_dfs_reg; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_update_channel_list(ar); if (ret) ath10k_warn(ar, "failed to update channel list: %d\n", ret); regpair = ar->ath_common.regulatory.regpair; if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { nl_dfs_reg = ar->dfs_detector->region; wmi_dfs_reg = ath10k_mac_get_dfs_region(nl_dfs_reg); } else { wmi_dfs_reg = WMI_UNINIT_DFS_DOMAIN; } /* Target allows setting up per-band regdomain but ath_common provides * a combined one only */ ret = ath10k_wmi_pdev_set_regdomain(ar, regpair->reg_domain, regpair->reg_domain, /* 2ghz */ regpair->reg_domain, /* 5ghz */ regpair->reg_2ghz_ctl, regpair->reg_5ghz_ctl, wmi_dfs_reg); if (ret) ath10k_warn(ar, "failed to set pdev regdomain: %d\n", ret); } static void ath10k_mac_update_channel_list(struct ath10k *ar, struct ieee80211_supported_band *band) { int i; if (ar->low_5ghz_chan && ar->high_5ghz_chan) { for (i = 0; i < band->n_channels; i++) { if (band->channels[i].center_freq < ar->low_5ghz_chan || band->channels[i].center_freq > ar->high_5ghz_chan) band->channels[i].flags |= IEEE80211_CHAN_DISABLED; } } } static void ath10k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) { struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy); struct ath10k *ar = hw->priv; bool result; ath_reg_notifier_apply(wiphy, request, &ar->ath_common.regulatory); if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { ath10k_dbg(ar, ATH10K_DBG_REGULATORY, "dfs region 0x%x\n", request->dfs_region); result = ar->dfs_detector->set_dfs_domain(ar->dfs_detector, request->dfs_region); if (!result) ath10k_warn(ar, "DFS region 0x%X not supported, will trigger radar for every pulse\n", request->dfs_region); } mutex_lock(&ar->conf_mutex); if (ar->state == ATH10K_STATE_ON) ath10k_regd_update(ar); mutex_unlock(&ar->conf_mutex); if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY) ath10k_mac_update_channel_list(ar, ar->hw->wiphy->bands[NL80211_BAND_5GHZ]); } static void ath10k_stop_radar_confirmation(struct ath10k *ar) { spin_lock_bh(&ar->data_lock); ar->radar_conf_state = ATH10K_RADAR_CONFIRMATION_STOPPED; spin_unlock_bh(&ar->data_lock); cancel_work_sync(&ar->radar_confirmation_work); } /***************/ /* TX handlers */ /***************/ enum ath10k_mac_tx_path { ATH10K_MAC_TX_HTT, ATH10K_MAC_TX_HTT_MGMT, ATH10K_MAC_TX_WMI_MGMT, ATH10K_MAC_TX_UNKNOWN, }; void ath10k_mac_tx_lock(struct ath10k *ar, int reason) { lockdep_assert_held(&ar->htt.tx_lock); WARN_ON(reason >= ATH10K_TX_PAUSE_MAX); ar->tx_paused |= BIT(reason); ieee80211_stop_queues(ar->hw); } static void ath10k_mac_tx_unlock_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { struct ath10k *ar = data; struct ath10k_vif *arvif = (void *)vif->drv_priv; if (arvif->tx_paused) return; ieee80211_wake_queue(ar->hw, arvif->vdev_id); } void ath10k_mac_tx_unlock(struct ath10k *ar, int reason) { lockdep_assert_held(&ar->htt.tx_lock); WARN_ON(reason >= ATH10K_TX_PAUSE_MAX); ar->tx_paused &= ~BIT(reason); if (ar->tx_paused) return; ieee80211_iterate_active_interfaces_atomic(ar->hw, ATH10K_ITER_RESUME_FLAGS, ath10k_mac_tx_unlock_iter, ar); ieee80211_wake_queue(ar->hw, ar->hw->offchannel_tx_hw_queue); } void ath10k_mac_vif_tx_lock(struct ath10k_vif *arvif, int reason) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->htt.tx_lock); WARN_ON(reason >= BITS_PER_LONG); arvif->tx_paused |= BIT(reason); ieee80211_stop_queue(ar->hw, arvif->vdev_id); } void ath10k_mac_vif_tx_unlock(struct ath10k_vif *arvif, int reason) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->htt.tx_lock); WARN_ON(reason >= BITS_PER_LONG); arvif->tx_paused &= ~BIT(reason); if (ar->tx_paused) return; if (arvif->tx_paused) return; ieee80211_wake_queue(ar->hw, arvif->vdev_id); } static void ath10k_mac_vif_handle_tx_pause(struct ath10k_vif *arvif, enum wmi_tlv_tx_pause_id pause_id, enum wmi_tlv_tx_pause_action action) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->htt.tx_lock); switch (action) { case WMI_TLV_TX_PAUSE_ACTION_STOP: ath10k_mac_vif_tx_lock(arvif, pause_id); break; case WMI_TLV_TX_PAUSE_ACTION_WAKE: ath10k_mac_vif_tx_unlock(arvif, pause_id); break; default: ath10k_dbg(ar, ATH10K_DBG_BOOT, "received unknown tx pause action %d on vdev %i, ignoring\n", action, arvif->vdev_id); break; } } struct ath10k_mac_tx_pause { u32 vdev_id; enum wmi_tlv_tx_pause_id pause_id; enum wmi_tlv_tx_pause_action action; }; static void ath10k_mac_handle_tx_pause_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_mac_tx_pause *arg = data; if (arvif->vdev_id != arg->vdev_id) return; ath10k_mac_vif_handle_tx_pause(arvif, arg->pause_id, arg->action); } void ath10k_mac_handle_tx_pause_vdev(struct ath10k *ar, u32 vdev_id, enum wmi_tlv_tx_pause_id pause_id, enum wmi_tlv_tx_pause_action action) { struct ath10k_mac_tx_pause arg = { .vdev_id = vdev_id, .pause_id = pause_id, .action = action, }; spin_lock_bh(&ar->htt.tx_lock); ieee80211_iterate_active_interfaces_atomic(ar->hw, ATH10K_ITER_RESUME_FLAGS, ath10k_mac_handle_tx_pause_iter, &arg); spin_unlock_bh(&ar->htt.tx_lock); } static enum ath10k_hw_txrx_mode ath10k_mac_tx_h_get_txmode(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct sk_buff *skb) { const struct ieee80211_hdr *hdr = (void *)skb->data; const struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(skb); __le16 fc = hdr->frame_control; if (IEEE80211_SKB_CB(skb)->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) return ATH10K_HW_TXRX_ETHERNET; if (!vif || vif->type == NL80211_IFTYPE_MONITOR) return ATH10K_HW_TXRX_RAW; if (ieee80211_is_mgmt(fc)) return ATH10K_HW_TXRX_MGMT; /* Workaround: * * NullFunc frames are mostly used to ping if a client or AP are still * reachable and responsive. This implies tx status reports must be * accurate - otherwise either mac80211 or userspace (e.g. hostapd) can * come to a conclusion that the other end disappeared and tear down * BSS connection or it can never disconnect from BSS/client (which is * the case). * * Firmware with HTT older than 3.0 delivers incorrect tx status for * NullFunc frames to driver. However there's a HTT Mgmt Tx command * which seems to deliver correct tx reports for NullFunc frames. The * downside of using it is it ignores client powersave state so it can * end up disconnecting sleeping clients in AP mode. It should fix STA * mode though because AP don't sleep. */ if (ar->htt.target_version_major < 3 && (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX, ar->running_fw->fw_file.fw_features)) return ATH10K_HW_TXRX_MGMT; /* Workaround: * * Some wmi-tlv firmwares for qca6174 have broken Tx key selection for * NativeWifi txmode - it selects AP key instead of peer key. It seems * to work with Ethernet txmode so use it. * * FIXME: Check if raw mode works with TDLS. */ if (ieee80211_is_data_present(fc) && sta && sta->tdls) return ATH10K_HW_TXRX_ETHERNET; if (test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags) || skb_cb->flags & ATH10K_SKB_F_RAW_TX) return ATH10K_HW_TXRX_RAW; return ATH10K_HW_TXRX_NATIVE_WIFI; } static bool ath10k_tx_h_use_hwcrypto(struct ieee80211_vif *vif, struct sk_buff *skb) { const struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); const struct ieee80211_hdr *hdr = (void *)skb->data; const u32 mask = IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_CTL_INJECTED; if (!ieee80211_has_protected(hdr->frame_control)) return false; if ((info->flags & mask) == mask) return false; if (vif) return !((struct ath10k_vif *)vif->drv_priv)->nohwcrypt; return true; } /* HTT Tx uses Native Wifi tx mode which expects 802.11 frames without QoS * Control in the header. */ static void ath10k_tx_h_nwifi(struct ieee80211_hw *hw, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (void *)skb->data; struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb); u8 *qos_ctl; if (!ieee80211_is_data_qos(hdr->frame_control)) return; qos_ctl = ieee80211_get_qos_ctl(hdr); memmove(skb->data + IEEE80211_QOS_CTL_LEN, skb->data, (void *)qos_ctl - (void *)skb->data); skb_pull(skb, IEEE80211_QOS_CTL_LEN); /* Some firmware revisions don't handle sending QoS NullFunc well. * These frames are mainly used for CQM purposes so it doesn't really * matter whether QoS NullFunc or NullFunc are sent. */ hdr = (void *)skb->data; if (ieee80211_is_qos_nullfunc(hdr->frame_control)) cb->flags &= ~ATH10K_SKB_F_QOS; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_STYPE_QOS_DATA); } static void ath10k_tx_h_8023(struct sk_buff *skb) { struct ieee80211_hdr *hdr; struct rfc1042_hdr *rfc1042; struct ethhdr *eth; size_t hdrlen; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; __be16 type; hdr = (void *)skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); rfc1042 = (void *)skb->data + hdrlen; ether_addr_copy(da, ieee80211_get_DA(hdr)); ether_addr_copy(sa, ieee80211_get_SA(hdr)); type = rfc1042->snap_type; skb_pull(skb, hdrlen + sizeof(*rfc1042)); skb_push(skb, sizeof(*eth)); eth = (void *)skb->data; ether_addr_copy(eth->h_dest, da); ether_addr_copy(eth->h_source, sa); eth->h_proto = type; } static void ath10k_tx_h_add_p2p_noa_ie(struct ath10k *ar, struct ieee80211_vif *vif, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ath10k_vif *arvif = (void *)vif->drv_priv; /* This is case only for P2P_GO */ if (vif->type != NL80211_IFTYPE_AP || !vif->p2p) return; if (unlikely(ieee80211_is_probe_resp(hdr->frame_control))) { spin_lock_bh(&ar->data_lock); if (arvif->u.ap.noa_data) if (!pskb_expand_head(skb, 0, arvif->u.ap.noa_len, GFP_ATOMIC)) skb_put_data(skb, arvif->u.ap.noa_data, arvif->u.ap.noa_len); spin_unlock_bh(&ar->data_lock); } } static void ath10k_mac_tx_h_fill_cb(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_txq *txq, struct ieee80211_sta *sta, struct sk_buff *skb, u16 airtime) { struct ieee80211_hdr *hdr = (void *)skb->data; struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb); const struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); bool is_data = ieee80211_is_data(hdr->frame_control) || ieee80211_is_data_qos(hdr->frame_control); struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_sta *arsta; u8 tid, *qos_ctl; bool noack = false; cb->flags = 0; if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) { cb->flags |= ATH10K_SKB_F_QOS; /* Assume data frames are QoS */ goto finish_cb_fill; } if (!ath10k_tx_h_use_hwcrypto(vif, skb)) cb->flags |= ATH10K_SKB_F_NO_HWCRYPT; if (ieee80211_is_mgmt(hdr->frame_control)) cb->flags |= ATH10K_SKB_F_MGMT; if (ieee80211_is_data_qos(hdr->frame_control)) { cb->flags |= ATH10K_SKB_F_QOS; qos_ctl = ieee80211_get_qos_ctl(hdr); tid = (*qos_ctl) & IEEE80211_QOS_CTL_TID_MASK; if (arvif->noack[tid] == WMI_PEER_TID_CONFIG_NOACK) noack = true; if (sta) { arsta = (struct ath10k_sta *)sta->drv_priv; if (arsta->noack[tid] == WMI_PEER_TID_CONFIG_NOACK) noack = true; if (arsta->noack[tid] == WMI_PEER_TID_CONFIG_ACK) noack = false; } if (noack) cb->flags |= ATH10K_SKB_F_NOACK_TID; } /* Data frames encrypted in software will be posted to firmware * with tx encap mode set to RAW. Ex: Multicast traffic generated * for a specific VLAN group will always be encrypted in software. */ if (is_data && ieee80211_has_protected(hdr->frame_control) && !info->control.hw_key) { cb->flags |= ATH10K_SKB_F_NO_HWCRYPT; cb->flags |= ATH10K_SKB_F_RAW_TX; } finish_cb_fill: cb->vif = vif; cb->txq = txq; cb->airtime_est = airtime; if (sta) { arsta = (struct ath10k_sta *)sta->drv_priv; spin_lock_bh(&ar->data_lock); cb->ucast_cipher = arsta->ucast_cipher; spin_unlock_bh(&ar->data_lock); } } bool ath10k_mac_tx_frm_has_freq(struct ath10k *ar) { /* FIXME: Not really sure since when the behaviour changed. At some * point new firmware stopped requiring creation of peer entries for * offchannel tx (and actually creating them causes issues with wmi-htc * tx credit replenishment and reliability). Assuming it's at least 3.4 * because that's when the `freq` was introduced to TX_FRM HTT command. */ return (ar->htt.target_version_major >= 3 && ar->htt.target_version_minor >= 4 && ar->running_fw->fw_file.htt_op_version == ATH10K_FW_HTT_OP_VERSION_TLV); } static int ath10k_mac_tx_wmi_mgmt(struct ath10k *ar, struct sk_buff *skb) { struct sk_buff_head *q = &ar->wmi_mgmt_tx_queue; if (skb_queue_len_lockless(q) >= ATH10K_MAX_NUM_MGMT_PENDING) { ath10k_warn(ar, "wmi mgmt tx queue is full\n"); return -ENOSPC; } skb_queue_tail(q, skb); ieee80211_queue_work(ar->hw, &ar->wmi_mgmt_tx_work); return 0; } static enum ath10k_mac_tx_path ath10k_mac_tx_h_get_txpath(struct ath10k *ar, struct sk_buff *skb, enum ath10k_hw_txrx_mode txmode) { switch (txmode) { case ATH10K_HW_TXRX_RAW: case ATH10K_HW_TXRX_NATIVE_WIFI: case ATH10K_HW_TXRX_ETHERNET: return ATH10K_MAC_TX_HTT; case ATH10K_HW_TXRX_MGMT: if (test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX, ar->running_fw->fw_file.fw_features) || test_bit(WMI_SERVICE_MGMT_TX_WMI, ar->wmi.svc_map)) return ATH10K_MAC_TX_WMI_MGMT; else if (ar->htt.target_version_major >= 3) return ATH10K_MAC_TX_HTT; else return ATH10K_MAC_TX_HTT_MGMT; } return ATH10K_MAC_TX_UNKNOWN; } static int ath10k_mac_tx_submit(struct ath10k *ar, enum ath10k_hw_txrx_mode txmode, enum ath10k_mac_tx_path txpath, struct sk_buff *skb) { struct ath10k_htt *htt = &ar->htt; int ret = -EINVAL; switch (txpath) { case ATH10K_MAC_TX_HTT: ret = ath10k_htt_tx(htt, txmode, skb); break; case ATH10K_MAC_TX_HTT_MGMT: ret = ath10k_htt_mgmt_tx(htt, skb); break; case ATH10K_MAC_TX_WMI_MGMT: ret = ath10k_mac_tx_wmi_mgmt(ar, skb); break; case ATH10K_MAC_TX_UNKNOWN: WARN_ON_ONCE(1); ret = -EINVAL; break; } if (ret) { ath10k_warn(ar, "failed to transmit packet, dropping: %d\n", ret); ieee80211_free_txskb(ar->hw, skb); } return ret; } /* This function consumes the sk_buff regardless of return value as far as * caller is concerned so no freeing is necessary afterwards. */ static int ath10k_mac_tx(struct ath10k *ar, struct ieee80211_vif *vif, enum ath10k_hw_txrx_mode txmode, enum ath10k_mac_tx_path txpath, struct sk_buff *skb, bool noque_offchan) { struct ieee80211_hw *hw = ar->hw; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); const struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(skb); int ret; /* We should disable CCK RATE due to P2P */ if (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE) ath10k_dbg(ar, ATH10K_DBG_MAC, "IEEE80211_TX_CTL_NO_CCK_RATE\n"); switch (txmode) { case ATH10K_HW_TXRX_MGMT: case ATH10K_HW_TXRX_NATIVE_WIFI: ath10k_tx_h_nwifi(hw, skb); ath10k_tx_h_add_p2p_noa_ie(ar, vif, skb); ath10k_tx_h_seq_no(vif, skb); break; case ATH10K_HW_TXRX_ETHERNET: /* Convert 802.11->802.3 header only if the frame was earlier * encapsulated to 802.11 by mac80211. Otherwise pass it as is. */ if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) ath10k_tx_h_8023(skb); break; case ATH10K_HW_TXRX_RAW: if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags) && !(skb_cb->flags & ATH10K_SKB_F_RAW_TX)) { WARN_ON_ONCE(1); ieee80211_free_txskb(hw, skb); return -EOPNOTSUPP; } } if (!noque_offchan && info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { if (!ath10k_mac_tx_frm_has_freq(ar)) { ath10k_dbg(ar, ATH10K_DBG_MAC, "mac queued offchannel skb %p len %d\n", skb, skb->len); skb_queue_tail(&ar->offchan_tx_queue, skb); ieee80211_queue_work(hw, &ar->offchan_tx_work); return 0; } } ret = ath10k_mac_tx_submit(ar, txmode, txpath, skb); if (ret) { ath10k_warn(ar, "failed to submit frame: %d\n", ret); return ret; } return 0; } void ath10k_offchan_tx_purge(struct ath10k *ar) { struct sk_buff *skb; for (;;) { skb = skb_dequeue(&ar->offchan_tx_queue); if (!skb) break; ieee80211_free_txskb(ar->hw, skb); } } void ath10k_offchan_tx_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, offchan_tx_work); struct ath10k_peer *peer; struct ath10k_vif *arvif; enum ath10k_hw_txrx_mode txmode; enum ath10k_mac_tx_path txpath; struct ieee80211_hdr *hdr; struct ieee80211_vif *vif; struct ieee80211_sta *sta; struct sk_buff *skb; const u8 *peer_addr; int vdev_id; int ret; unsigned long time_left; bool tmp_peer_created = false; /* FW requirement: We must create a peer before FW will send out * an offchannel frame. Otherwise the frame will be stuck and * never transmitted. We delete the peer upon tx completion. * It is unlikely that a peer for offchannel tx will already be * present. However it may be in some rare cases so account for that. * Otherwise we might remove a legitimate peer and break stuff. */ for (;;) { skb = skb_dequeue(&ar->offchan_tx_queue); if (!skb) break; mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %p len %d\n", skb, skb->len); hdr = (struct ieee80211_hdr *)skb->data; peer_addr = ieee80211_get_DA(hdr); spin_lock_bh(&ar->data_lock); vdev_id = ar->scan.vdev_id; peer = ath10k_peer_find(ar, vdev_id, peer_addr); spin_unlock_bh(&ar->data_lock); if (peer) { ath10k_warn(ar, "peer %pM on vdev %d already present\n", peer_addr, vdev_id); } else { ret = ath10k_peer_create(ar, NULL, NULL, vdev_id, peer_addr, WMI_PEER_TYPE_DEFAULT); if (ret) ath10k_warn(ar, "failed to create peer %pM on vdev %d: %d\n", peer_addr, vdev_id, ret); tmp_peer_created = (ret == 0); } spin_lock_bh(&ar->data_lock); reinit_completion(&ar->offchan_tx_completed); ar->offchan_tx_skb = skb; spin_unlock_bh(&ar->data_lock); /* It's safe to access vif and sta - conf_mutex guarantees that * sta_state() and remove_interface() are locked exclusively * out wrt to this offchannel worker. */ arvif = ath10k_get_arvif(ar, vdev_id); if (arvif) { vif = arvif->vif; sta = ieee80211_find_sta(vif, peer_addr); } else { vif = NULL; sta = NULL; } txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, true); if (ret) { ath10k_warn(ar, "failed to transmit offchannel frame: %d\n", ret); /* not serious */ } time_left = wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ); if (time_left == 0) ath10k_warn(ar, "timed out waiting for offchannel skb %p, len: %d\n", skb, skb->len); if (!peer && tmp_peer_created) { ret = ath10k_peer_delete(ar, vdev_id, peer_addr); if (ret) ath10k_warn(ar, "failed to delete peer %pM on vdev %d: %d\n", peer_addr, vdev_id, ret); } mutex_unlock(&ar->conf_mutex); } } void ath10k_mgmt_over_wmi_tx_purge(struct ath10k *ar) { struct sk_buff *skb; for (;;) { skb = skb_dequeue(&ar->wmi_mgmt_tx_queue); if (!skb) break; ieee80211_free_txskb(ar->hw, skb); } } void ath10k_mgmt_over_wmi_tx_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, wmi_mgmt_tx_work); struct sk_buff *skb; dma_addr_t paddr; int ret; for (;;) { skb = skb_dequeue(&ar->wmi_mgmt_tx_queue); if (!skb) break; if (test_bit(ATH10K_FW_FEATURE_MGMT_TX_BY_REF, ar->running_fw->fw_file.fw_features)) { paddr = dma_map_single(ar->dev, skb->data, skb->len, DMA_TO_DEVICE); if (dma_mapping_error(ar->dev, paddr)) { ieee80211_free_txskb(ar->hw, skb); continue; } ret = ath10k_wmi_mgmt_tx_send(ar, skb, paddr); if (ret) { ath10k_warn(ar, "failed to transmit management frame by ref via WMI: %d\n", ret); /* remove this msdu from idr tracking */ ath10k_wmi_cleanup_mgmt_tx_send(ar, skb); dma_unmap_single(ar->dev, paddr, skb->len, DMA_TO_DEVICE); ieee80211_free_txskb(ar->hw, skb); } } else { ret = ath10k_wmi_mgmt_tx(ar, skb); if (ret) { ath10k_warn(ar, "failed to transmit management frame via WMI: %d\n", ret); ieee80211_free_txskb(ar->hw, skb); } } } } static void ath10k_mac_txq_init(struct ieee80211_txq *txq) { struct ath10k_txq *artxq; if (!txq) return; artxq = (void *)txq->drv_priv; INIT_LIST_HEAD(&artxq->list); } static void ath10k_mac_txq_unref(struct ath10k *ar, struct ieee80211_txq *txq) { struct ath10k_skb_cb *cb; struct sk_buff *msdu; int msdu_id; if (!txq) return; spin_lock_bh(&ar->htt.tx_lock); idr_for_each_entry(&ar->htt.pending_tx, msdu, msdu_id) { cb = ATH10K_SKB_CB(msdu); if (cb->txq == txq) cb->txq = NULL; } spin_unlock_bh(&ar->htt.tx_lock); } struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar, u16 peer_id, u8 tid) { struct ath10k_peer *peer; lockdep_assert_held(&ar->data_lock); peer = ar->peer_map[peer_id]; if (!peer) return NULL; if (peer->removed) return NULL; if (peer->sta) return peer->sta->txq[tid]; else if (peer->vif) return peer->vif->txq; else return NULL; } static bool ath10k_mac_tx_can_push(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { struct ath10k *ar = hw->priv; struct ath10k_txq *artxq = (void *)txq->drv_priv; /* No need to get locks */ if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) return true; if (ar->htt.num_pending_tx < ar->htt.tx_q_state.num_push_allowed) return true; if (artxq->num_fw_queued < artxq->num_push_allowed) return true; return false; } /* Return estimated airtime in microsecond, which is calculated using last * reported TX rate. This is just a rough estimation because host driver has no * knowledge of the actual transmit rate, retries or aggregation. If actual * airtime can be reported by firmware, then delta between estimated and actual * airtime can be adjusted from deficit. */ #define IEEE80211_ATF_OVERHEAD 100 /* IFS + some slot time */ #define IEEE80211_ATF_OVERHEAD_IFS 16 /* IFS only */ static u16 ath10k_mac_update_airtime(struct ath10k *ar, struct ieee80211_txq *txq, struct sk_buff *skb) { struct ath10k_sta *arsta; u32 pktlen; u16 airtime = 0; if (!txq || !txq->sta) return airtime; if (test_bit(WMI_SERVICE_REPORT_AIRTIME, ar->wmi.svc_map)) return airtime; spin_lock_bh(&ar->data_lock); arsta = (struct ath10k_sta *)txq->sta->drv_priv; pktlen = skb->len + 38; /* Assume MAC header 30, SNAP 8 for most case */ if (arsta->last_tx_bitrate) { /* airtime in us, last_tx_bitrate in 100kbps */ airtime = (pktlen * 8 * (1000 / 100)) / arsta->last_tx_bitrate; /* overhead for media access time and IFS */ airtime += IEEE80211_ATF_OVERHEAD_IFS; } else { /* This is mostly for throttle excessive BC/MC frames, and the * airtime/rate doesn't need be exact. Airtime of BC/MC frames * in 2G get some discount, which helps prevent very low rate * frames from being blocked for too long. */ airtime = (pktlen * 8 * (1000 / 100)) / 60; /* 6M */ airtime += IEEE80211_ATF_OVERHEAD; } spin_unlock_bh(&ar->data_lock); return airtime; } int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { struct ath10k *ar = hw->priv; struct ath10k_htt *htt = &ar->htt; struct ath10k_txq *artxq = (void *)txq->drv_priv; struct ieee80211_vif *vif = txq->vif; struct ieee80211_sta *sta = txq->sta; enum ath10k_hw_txrx_mode txmode; enum ath10k_mac_tx_path txpath; struct sk_buff *skb; struct ieee80211_hdr *hdr; size_t skb_len; bool is_mgmt, is_presp; int ret; u16 airtime; spin_lock_bh(&ar->htt.tx_lock); ret = ath10k_htt_tx_inc_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); if (ret) return ret; skb = ieee80211_tx_dequeue_ni(hw, txq); if (!skb) { spin_lock_bh(&ar->htt.tx_lock); ath10k_htt_tx_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); return -ENOENT; } airtime = ath10k_mac_update_airtime(ar, txq, skb); ath10k_mac_tx_h_fill_cb(ar, vif, txq, sta, skb, airtime); skb_len = skb->len; txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT); if (is_mgmt) { hdr = (struct ieee80211_hdr *)skb->data; is_presp = ieee80211_is_probe_resp(hdr->frame_control); spin_lock_bh(&ar->htt.tx_lock); ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp); if (ret) { ath10k_htt_tx_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); return ret; } spin_unlock_bh(&ar->htt.tx_lock); } ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false); if (unlikely(ret)) { ath10k_warn(ar, "failed to push frame: %d\n", ret); spin_lock_bh(&ar->htt.tx_lock); ath10k_htt_tx_dec_pending(htt); if (is_mgmt) ath10k_htt_tx_mgmt_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); return ret; } spin_lock_bh(&ar->htt.tx_lock); artxq->num_fw_queued++; spin_unlock_bh(&ar->htt.tx_lock); return skb_len; } static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac) { struct ieee80211_txq *txq; int ret = 0; ieee80211_txq_schedule_start(hw, ac); while ((txq = ieee80211_next_txq(hw, ac))) { while (ath10k_mac_tx_can_push(hw, txq)) { ret = ath10k_mac_tx_push_txq(hw, txq); if (ret < 0) break; } ieee80211_return_txq(hw, txq, false); ath10k_htt_tx_txq_update(hw, txq); if (ret == -EBUSY) break; } ieee80211_txq_schedule_end(hw, ac); return ret; } void ath10k_mac_tx_push_pending(struct ath10k *ar) { struct ieee80211_hw *hw = ar->hw; u32 ac; if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH) return; if (ar->htt.num_pending_tx >= (ar->htt.max_num_pending_tx / 2)) return; rcu_read_lock(); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { if (ath10k_mac_schedule_txq(hw, ac) == -EBUSY) break; } rcu_read_unlock(); } EXPORT_SYMBOL(ath10k_mac_tx_push_pending); /************/ /* Scanning */ /************/ void __ath10k_scan_finish(struct ath10k *ar) { lockdep_assert_held(&ar->data_lock); switch (ar->scan.state) { case ATH10K_SCAN_IDLE: break; case ATH10K_SCAN_RUNNING: case ATH10K_SCAN_ABORTING: if (ar->scan.is_roc && ar->scan.roc_notify) ieee80211_remain_on_channel_expired(ar->hw); fallthrough; case ATH10K_SCAN_STARTING: if (!ar->scan.is_roc) { struct cfg80211_scan_info info = { .aborted = ((ar->scan.state == ATH10K_SCAN_ABORTING) || (ar->scan.state == ATH10K_SCAN_STARTING)), }; ieee80211_scan_completed(ar->hw, &info); } ar->scan.state = ATH10K_SCAN_IDLE; ar->scan_channel = NULL; ar->scan.roc_freq = 0; ath10k_offchan_tx_purge(ar); cancel_delayed_work(&ar->scan.timeout); complete(&ar->scan.completed); break; } } void ath10k_scan_finish(struct ath10k *ar) { spin_lock_bh(&ar->data_lock); __ath10k_scan_finish(ar); spin_unlock_bh(&ar->data_lock); } static int ath10k_scan_stop(struct ath10k *ar) { struct wmi_stop_scan_arg arg = { .req_id = 1, /* FIXME */ .req_type = WMI_SCAN_STOP_ONE, .u.scan_id = ATH10K_SCAN_ID, }; int ret; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_wmi_stop_scan(ar, &arg); if (ret) { ath10k_warn(ar, "failed to stop wmi scan: %d\n", ret); goto out; } ret = wait_for_completion_timeout(&ar->scan.completed, 3 * HZ); if (ret == 0) { ath10k_warn(ar, "failed to receive scan abortion completion: timed out\n"); ret = -ETIMEDOUT; } else if (ret > 0) { ret = 0; } out: /* Scan state should be updated upon scan completion but in case * firmware fails to deliver the event (for whatever reason) it is * desired to clean up scan state anyway. Firmware may have just * dropped the scan completion event delivery due to transport pipe * being overflown with data and/or it can recover on its own before * next scan request is submitted. */ spin_lock_bh(&ar->data_lock); if (ar->scan.state != ATH10K_SCAN_IDLE) __ath10k_scan_finish(ar); spin_unlock_bh(&ar->data_lock); return ret; } static void ath10k_scan_abort(struct ath10k *ar) { int ret; lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); switch (ar->scan.state) { case ATH10K_SCAN_IDLE: /* This can happen if timeout worker kicked in and called * abortion while scan completion was being processed. */ break; case ATH10K_SCAN_STARTING: case ATH10K_SCAN_ABORTING: ath10k_warn(ar, "refusing scan abortion due to invalid scan state: %s (%d)\n", ath10k_scan_state_str(ar->scan.state), ar->scan.state); break; case ATH10K_SCAN_RUNNING: ar->scan.state = ATH10K_SCAN_ABORTING; spin_unlock_bh(&ar->data_lock); ret = ath10k_scan_stop(ar); if (ret) ath10k_warn(ar, "failed to abort scan: %d\n", ret); spin_lock_bh(&ar->data_lock); break; } spin_unlock_bh(&ar->data_lock); } void ath10k_scan_timeout_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, scan.timeout.work); mutex_lock(&ar->conf_mutex); ath10k_scan_abort(ar); mutex_unlock(&ar->conf_mutex); } static int ath10k_start_scan(struct ath10k *ar, const struct wmi_start_scan_arg *arg) { int ret; lockdep_assert_held(&ar->conf_mutex); ret = ath10k_wmi_start_scan(ar, arg); if (ret) return ret; ret = wait_for_completion_timeout(&ar->scan.started, 1 * HZ); if (ret == 0) { ret = ath10k_scan_stop(ar); if (ret) ath10k_warn(ar, "failed to stop scan: %d\n", ret); return -ETIMEDOUT; } /* If we failed to start the scan, return error code at * this point. This is probably due to some issue in the * firmware, but no need to wedge the driver due to that... */ spin_lock_bh(&ar->data_lock); if (ar->scan.state == ATH10K_SCAN_IDLE) { spin_unlock_bh(&ar->data_lock); return -EINVAL; } spin_unlock_bh(&ar->data_lock); return 0; } /**********************/ /* mac80211 callbacks */ /**********************/ static void ath10k_mac_op_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) { struct ath10k *ar = hw->priv; struct ath10k_htt *htt = &ar->htt; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_vif *vif = info->control.vif; struct ieee80211_sta *sta = control->sta; struct ieee80211_txq *txq = NULL; enum ath10k_hw_txrx_mode txmode; enum ath10k_mac_tx_path txpath; bool is_htt; bool is_mgmt; int ret; u16 airtime; airtime = ath10k_mac_update_airtime(ar, txq, skb); ath10k_mac_tx_h_fill_cb(ar, vif, txq, sta, skb, airtime); txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); is_htt = (txpath == ATH10K_MAC_TX_HTT || txpath == ATH10K_MAC_TX_HTT_MGMT); is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT); if (is_htt) { bool is_presp = false; spin_lock_bh(&ar->htt.tx_lock); if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) { struct ieee80211_hdr *hdr = (void *)skb->data; is_presp = ieee80211_is_probe_resp(hdr->frame_control); } ret = ath10k_htt_tx_inc_pending(htt); if (ret) { ath10k_warn(ar, "failed to increase tx pending count: %d, dropping\n", ret); spin_unlock_bh(&ar->htt.tx_lock); ieee80211_free_txskb(ar->hw, skb); return; } ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp); if (ret) { ath10k_dbg(ar, ATH10K_DBG_MAC, "failed to increase tx mgmt pending count: %d, dropping\n", ret); ath10k_htt_tx_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); ieee80211_free_txskb(ar->hw, skb); return; } spin_unlock_bh(&ar->htt.tx_lock); } ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false); if (ret) { ath10k_warn(ar, "failed to transmit frame: %d\n", ret); if (is_htt) { spin_lock_bh(&ar->htt.tx_lock); ath10k_htt_tx_dec_pending(htt); if (is_mgmt) ath10k_htt_tx_mgmt_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); } return; } } static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { struct ath10k *ar = hw->priv; int ret; u8 ac = txq->ac; ath10k_htt_tx_txq_update(hw, txq); if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH) return; spin_lock_bh(&ar->queue_lock[ac]); ieee80211_txq_schedule_start(hw, ac); txq = ieee80211_next_txq(hw, ac); if (!txq) goto out; while (ath10k_mac_tx_can_push(hw, txq)) { ret = ath10k_mac_tx_push_txq(hw, txq); if (ret < 0) break; } ieee80211_return_txq(hw, txq, false); ath10k_htt_tx_txq_update(hw, txq); out: ieee80211_txq_schedule_end(hw, ac); spin_unlock_bh(&ar->queue_lock[ac]); } /* Must not be called with conf_mutex held as workers can use that also. */ void ath10k_drain_tx(struct ath10k *ar) { lockdep_assert_not_held(&ar->conf_mutex); /* make sure rcu-protected mac80211 tx path itself is drained */ synchronize_net(); ath10k_offchan_tx_purge(ar); ath10k_mgmt_over_wmi_tx_purge(ar); cancel_work_sync(&ar->offchan_tx_work); cancel_work_sync(&ar->wmi_mgmt_tx_work); } void ath10k_halt(struct ath10k *ar) { struct ath10k_vif *arvif; lockdep_assert_held(&ar->conf_mutex); clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); ar->filter_flags = 0; ar->monitor = false; ar->monitor_arvif = NULL; if (ar->monitor_started) ath10k_monitor_stop(ar); ar->monitor_started = false; ar->tx_paused = 0; ath10k_scan_finish(ar); ath10k_peer_cleanup_all(ar); ath10k_stop_radar_confirmation(ar); ath10k_core_stop(ar); ath10k_hif_power_down(ar); spin_lock_bh(&ar->data_lock); list_for_each_entry(arvif, &ar->arvifs, list) ath10k_mac_vif_beacon_cleanup(arvif); spin_unlock_bh(&ar->data_lock); } static int ath10k_get_antenna(struct ieee80211_hw *hw, int radio_idx, u32 *tx_ant, u32 *rx_ant) { struct ath10k *ar = hw->priv; mutex_lock(&ar->conf_mutex); *tx_ant = ar->cfg_tx_chainmask; *rx_ant = ar->cfg_rx_chainmask; mutex_unlock(&ar->conf_mutex); return 0; } static bool ath10k_check_chain_mask(struct ath10k *ar, u32 cm, const char *dbg) { /* It is not clear that allowing gaps in chainmask * is helpful. Probably it will not do what user * is hoping for, so warn in that case. */ if (cm == 15 || cm == 7 || cm == 3 || cm == 1 || cm == 0) return true; ath10k_warn(ar, "mac %s antenna chainmask is invalid: 0x%x. Suggested values: 15, 7, 3, 1 or 0.\n", dbg, cm); return false; } static int ath10k_mac_get_vht_cap_bf_sts(struct ath10k *ar) { int nsts = ar->vht_cap_info; nsts &= IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; nsts >>= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT; /* If firmware does not deliver to host number of space-time * streams supported, assume it support up to 4 BF STS and return * the value for VHT CAP: nsts-1) */ if (nsts == 0) return 3; return nsts; } static int ath10k_mac_get_vht_cap_bf_sound_dim(struct ath10k *ar) { int sound_dim = ar->vht_cap_info; sound_dim &= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK; sound_dim >>= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT; /* If the sounding dimension is not advertised by the firmware, * let's use a default value of 1 */ if (sound_dim == 0) return 1; return sound_dim; } static struct ieee80211_sta_vht_cap ath10k_create_vht_cap(struct ath10k *ar) { struct ieee80211_sta_vht_cap vht_cap = {}; struct ath10k_hw_params *hw = &ar->hw_params; u16 mcs_map; u32 val; int i; vht_cap.vht_supported = 1; vht_cap.cap = ar->vht_cap_info; if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)) { val = ath10k_mac_get_vht_cap_bf_sts(ar); val <<= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT; val &= IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; vht_cap.cap |= val; } if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)) { val = ath10k_mac_get_vht_cap_bf_sound_dim(ar); val <<= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT; val &= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK; vht_cap.cap |= val; } mcs_map = 0; for (i = 0; i < 8; i++) { if ((i < ar->num_rf_chains) && (ar->cfg_tx_chainmask & BIT(i))) mcs_map |= IEEE80211_VHT_MCS_SUPPORT_0_9 << (i * 2); else mcs_map |= IEEE80211_VHT_MCS_NOT_SUPPORTED << (i * 2); } if (ar->cfg_tx_chainmask <= 1) vht_cap.cap &= ~IEEE80211_VHT_CAP_TXSTBC; vht_cap.vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map); vht_cap.vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map); /* If we are supporting 160Mhz or 80+80, then the NIC may be able to do * a restricted NSS for 160 or 80+80 vs what it can do for 80Mhz. Give * user-space a clue if that is the case. */ if ((vht_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) && (hw->vht160_mcs_rx_highest != 0 || hw->vht160_mcs_tx_highest != 0)) { vht_cap.vht_mcs.rx_highest = cpu_to_le16(hw->vht160_mcs_rx_highest); vht_cap.vht_mcs.tx_highest = cpu_to_le16(hw->vht160_mcs_tx_highest); } return vht_cap; } static struct ieee80211_sta_ht_cap ath10k_get_ht_cap(struct ath10k *ar) { int i; struct ieee80211_sta_ht_cap ht_cap = {}; if (!(ar->ht_cap_info & WMI_HT_CAP_ENABLED)) return ht_cap; ht_cap.ht_supported = 1; ht_cap.ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K; ht_cap.ampdu_density = IEEE80211_HT_MPDU_DENSITY_8; ht_cap.cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; ht_cap.cap |= IEEE80211_HT_CAP_DSSSCCK40; ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED << IEEE80211_HT_CAP_SM_PS_SHIFT; if (ar->ht_cap_info & WMI_HT_CAP_HT20_SGI) ht_cap.cap |= IEEE80211_HT_CAP_SGI_20; if (ar->ht_cap_info & WMI_HT_CAP_HT40_SGI) ht_cap.cap |= IEEE80211_HT_CAP_SGI_40; if (ar->ht_cap_info & WMI_HT_CAP_DYNAMIC_SMPS) { u32 smps; smps = WLAN_HT_CAP_SM_PS_DYNAMIC; smps <<= IEEE80211_HT_CAP_SM_PS_SHIFT; ht_cap.cap |= smps; } if (ar->ht_cap_info & WMI_HT_CAP_TX_STBC && (ar->cfg_tx_chainmask > 1)) ht_cap.cap |= IEEE80211_HT_CAP_TX_STBC; if (ar->ht_cap_info & WMI_HT_CAP_RX_STBC) { u32 stbc; stbc = ar->ht_cap_info; stbc &= WMI_HT_CAP_RX_STBC; stbc >>= WMI_HT_CAP_RX_STBC_MASK_SHIFT; stbc <<= IEEE80211_HT_CAP_RX_STBC_SHIFT; stbc &= IEEE80211_HT_CAP_RX_STBC; ht_cap.cap |= stbc; } if (ar->ht_cap_info & WMI_HT_CAP_LDPC || (ar->ht_cap_info & WMI_HT_CAP_RX_LDPC && (ar->ht_cap_info & WMI_HT_CAP_TX_LDPC))) ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING; if (ar->ht_cap_info & WMI_HT_CAP_L_SIG_TXOP_PROT) ht_cap.cap |= IEEE80211_HT_CAP_LSIG_TXOP_PROT; /* max AMSDU is implicitly taken from vht_cap_info */ if (ar->vht_cap_info & WMI_VHT_CAP_MAX_MPDU_LEN_MASK) ht_cap.cap |= IEEE80211_HT_CAP_MAX_AMSDU; for (i = 0; i < ar->num_rf_chains; i++) { if (ar->cfg_rx_chainmask & BIT(i)) ht_cap.mcs.rx_mask[i] = 0xFF; } ht_cap.mcs.tx_params |= IEEE80211_HT_MCS_TX_DEFINED; return ht_cap; } static void ath10k_mac_setup_ht_vht_cap(struct ath10k *ar) { struct ieee80211_supported_band *band; struct ieee80211_sta_vht_cap vht_cap; struct ieee80211_sta_ht_cap ht_cap; ht_cap = ath10k_get_ht_cap(ar); vht_cap = ath10k_create_vht_cap(ar); if (ar->phy_capability & WHAL_WLAN_11G_CAPABILITY) { band = &ar->mac.sbands[NL80211_BAND_2GHZ]; band->ht_cap = ht_cap; } if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY) { band = &ar->mac.sbands[NL80211_BAND_5GHZ]; band->ht_cap = ht_cap; band->vht_cap = vht_cap; } } static int __ath10k_set_antenna(struct ath10k *ar, u32 tx_ant, u32 rx_ant) { int ret; bool is_valid_tx_chain_mask, is_valid_rx_chain_mask; lockdep_assert_held(&ar->conf_mutex); is_valid_tx_chain_mask = ath10k_check_chain_mask(ar, tx_ant, "tx"); is_valid_rx_chain_mask = ath10k_check_chain_mask(ar, rx_ant, "rx"); if (!is_valid_tx_chain_mask || !is_valid_rx_chain_mask) return -EINVAL; ar->cfg_tx_chainmask = tx_ant; ar->cfg_rx_chainmask = rx_ant; if ((ar->state != ATH10K_STATE_ON) && (ar->state != ATH10K_STATE_RESTARTED)) return 0; ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->tx_chain_mask, tx_ant); if (ret) { ath10k_warn(ar, "failed to set tx-chainmask: %d, req 0x%x\n", ret, tx_ant); return ret; } ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->rx_chain_mask, rx_ant); if (ret) { ath10k_warn(ar, "failed to set rx-chainmask: %d, req 0x%x\n", ret, rx_ant); return ret; } /* Reload HT/VHT capability */ ath10k_mac_setup_ht_vht_cap(ar); return 0; } static int ath10k_set_antenna(struct ieee80211_hw *hw, int radio_idx, u32 tx_ant, u32 rx_ant) { struct ath10k *ar = hw->priv; int ret; mutex_lock(&ar->conf_mutex); ret = __ath10k_set_antenna(ar, tx_ant, rx_ant); mutex_unlock(&ar->conf_mutex); return ret; } static int __ath10k_fetch_bb_timing_dt(struct ath10k *ar, struct wmi_bb_timing_cfg_arg *bb_timing) { struct device_node *node; const char *fem_name; int ret; node = ar->dev->of_node; if (!node) return -ENOENT; ret = of_property_read_string_index(node, "ext-fem-name", 0, &fem_name); if (ret) return -ENOENT; /* * If external Front End module used in hardware, then default base band timing * parameter cannot be used since they were fine tuned for reference hardware, * so choosing different value suitable for that external FEM. */ if (!strcmp("microsemi-lx5586", fem_name)) { bb_timing->bb_tx_timing = 0x00; bb_timing->bb_xpa_timing = 0x0101; } else { return -ENOENT; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot bb_tx_timing 0x%x bb_xpa_timing 0x%x\n", bb_timing->bb_tx_timing, bb_timing->bb_xpa_timing); return 0; } static int ath10k_mac_rfkill_config(struct ath10k *ar) { u32 param; int ret; if (ar->hw_values->rfkill_pin == 0) { ath10k_warn(ar, "ath10k does not support hardware rfkill with this device\n"); return -EOPNOTSUPP; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac rfkill_pin %d rfkill_cfg %d rfkill_on_level %d", ar->hw_values->rfkill_pin, ar->hw_values->rfkill_cfg, ar->hw_values->rfkill_on_level); param = FIELD_PREP(WMI_TLV_RFKILL_CFG_RADIO_LEVEL, ar->hw_values->rfkill_on_level) | FIELD_PREP(WMI_TLV_RFKILL_CFG_GPIO_PIN_NUM, ar->hw_values->rfkill_pin) | FIELD_PREP(WMI_TLV_RFKILL_CFG_PIN_AS_GPIO, ar->hw_values->rfkill_cfg); ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->rfkill_config, param); if (ret) { ath10k_warn(ar, "failed to set rfkill config 0x%x: %d\n", param, ret); return ret; } return 0; } int ath10k_mac_rfkill_enable_radio(struct ath10k *ar, bool enable) { enum wmi_tlv_rfkill_enable_radio param; int ret; if (enable) param = WMI_TLV_RFKILL_ENABLE_RADIO_ON; else param = WMI_TLV_RFKILL_ENABLE_RADIO_OFF; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac rfkill enable %d", param); ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->rfkill_enable, param); if (ret) { ath10k_warn(ar, "failed to set rfkill enable param %d: %d\n", param, ret); return ret; } return 0; } static int ath10k_start(struct ieee80211_hw *hw) { struct ath10k *ar = hw->priv; u32 param; int ret = 0; struct wmi_bb_timing_cfg_arg bb_timing = {}; /* * This makes sense only when restarting hw. It is harmless to call * unconditionally. This is necessary to make sure no HTT/WMI tx * commands will be submitted while restarting. */ ath10k_drain_tx(ar); mutex_lock(&ar->conf_mutex); switch (ar->state) { case ATH10K_STATE_OFF: ar->state = ATH10K_STATE_ON; break; case ATH10K_STATE_RESTARTING: ar->state = ATH10K_STATE_RESTARTED; break; case ATH10K_STATE_ON: case ATH10K_STATE_RESTARTED: case ATH10K_STATE_WEDGED: WARN_ON(1); ret = -EINVAL; goto err; case ATH10K_STATE_UTF: ret = -EBUSY; goto err; } spin_lock_bh(&ar->data_lock); if (ar->hw_rfkill_on) { ar->hw_rfkill_on = false; spin_unlock_bh(&ar->data_lock); goto err; } spin_unlock_bh(&ar->data_lock); ret = ath10k_hif_power_up(ar, ATH10K_FIRMWARE_MODE_NORMAL); if (ret) { ath10k_err(ar, "Could not init hif: %d\n", ret); goto err_off; } ret = ath10k_core_start(ar, ATH10K_FIRMWARE_MODE_NORMAL, &ar->normal_mode_fw); if (ret) { ath10k_err(ar, "Could not init core: %d\n", ret); goto err_power_down; } if (ar->sys_cap_info & WMI_TLV_SYS_CAP_INFO_RFKILL) { ret = ath10k_mac_rfkill_config(ar); if (ret && ret != -EOPNOTSUPP) { ath10k_warn(ar, "failed to configure rfkill: %d", ret); goto err_core_stop; } } param = ar->wmi.pdev_param->pmf_qos; ret = ath10k_wmi_pdev_set_param(ar, param, 1); if (ret) { ath10k_warn(ar, "failed to enable PMF QOS: %d\n", ret); goto err_core_stop; } param = ar->wmi.pdev_param->dynamic_bw; ret = ath10k_wmi_pdev_set_param(ar, param, 1); if (ret) { ath10k_warn(ar, "failed to enable dynamic BW: %d\n", ret); goto err_core_stop; } if (test_bit(WMI_SERVICE_SPOOF_MAC_SUPPORT, ar->wmi.svc_map)) { ret = ath10k_wmi_scan_prob_req_oui(ar, ar->mac_addr); if (ret) { ath10k_err(ar, "failed to set prob req oui: %i\n", ret); goto err_core_stop; } } if (test_bit(WMI_SERVICE_ADAPTIVE_OCS, ar->wmi.svc_map)) { ret = ath10k_wmi_adaptive_qcs(ar, true); if (ret) { ath10k_warn(ar, "failed to enable adaptive qcs: %d\n", ret); goto err_core_stop; } } if (test_bit(WMI_SERVICE_BURST, ar->wmi.svc_map)) { param = ar->wmi.pdev_param->burst_enable; ret = ath10k_wmi_pdev_set_param(ar, param, 0); if (ret) { ath10k_warn(ar, "failed to disable burst: %d\n", ret); goto err_core_stop; } } param = ar->wmi.pdev_param->idle_ps_config; ret = ath10k_wmi_pdev_set_param(ar, param, 1); if (ret && ret != -EOPNOTSUPP) { ath10k_warn(ar, "failed to enable idle_ps_config: %d\n", ret); goto err_core_stop; } __ath10k_set_antenna(ar, ar->cfg_tx_chainmask, ar->cfg_rx_chainmask); /* * By default FW set ARP frames ac to voice (6). In that case ARP * exchange is not working properly for UAPSD enabled AP. ARP requests * which arrives with access category 0 are processed by network stack * and send back with access category 0, but FW changes access category * to 6. Set ARP frames access category to best effort (0) solves * this problem. */ param = ar->wmi.pdev_param->arp_ac_override; ret = ath10k_wmi_pdev_set_param(ar, param, 0); if (ret) { ath10k_warn(ar, "failed to set arp ac override parameter: %d\n", ret); goto err_core_stop; } if (test_bit(ATH10K_FW_FEATURE_SUPPORTS_ADAPTIVE_CCA, ar->running_fw->fw_file.fw_features)) { ret = ath10k_wmi_pdev_enable_adaptive_cca(ar, 1, WMI_CCA_DETECT_LEVEL_AUTO, WMI_CCA_DETECT_MARGIN_AUTO); if (ret) { ath10k_warn(ar, "failed to enable adaptive cca: %d\n", ret); goto err_core_stop; } } param = ar->wmi.pdev_param->ani_enable; ret = ath10k_wmi_pdev_set_param(ar, param, 1); if (ret) { ath10k_warn(ar, "failed to enable ani by default: %d\n", ret); goto err_core_stop; } ar->ani_enabled = true; if (ath10k_peer_stats_enabled(ar)) { param = ar->wmi.pdev_param->peer_stats_update_period; ret = ath10k_wmi_pdev_set_param(ar, param, PEER_DEFAULT_STATS_UPDATE_PERIOD); if (ret) { ath10k_warn(ar, "failed to set peer stats period : %d\n", ret); goto err_core_stop; } } param = ar->wmi.pdev_param->enable_btcoex; if (test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map) && test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM, ar->running_fw->fw_file.fw_features) && ar->coex_support) { ret = ath10k_wmi_pdev_set_param(ar, param, 0); if (ret) { ath10k_warn(ar, "failed to set btcoex param: %d\n", ret); goto err_core_stop; } clear_bit(ATH10K_FLAG_BTCOEX, &ar->dev_flags); } if (test_bit(WMI_SERVICE_BB_TIMING_CONFIG_SUPPORT, ar->wmi.svc_map)) { ret = __ath10k_fetch_bb_timing_dt(ar, &bb_timing); if (!ret) { ret = ath10k_wmi_pdev_bb_timing(ar, &bb_timing); if (ret) { ath10k_warn(ar, "failed to set bb timings: %d\n", ret); goto err_core_stop; } } } ar->num_started_vdevs = 0; ath10k_regd_update(ar); ath10k_spectral_start(ar); ath10k_thermal_set_throttling(ar); ar->radar_conf_state = ATH10K_RADAR_CONFIRMATION_IDLE; mutex_unlock(&ar->conf_mutex); return 0; err_core_stop: ath10k_core_stop(ar); err_power_down: ath10k_hif_power_down(ar); err_off: ar->state = ATH10K_STATE_OFF; err: mutex_unlock(&ar->conf_mutex); return ret; } static void ath10k_stop(struct ieee80211_hw *hw, bool suspend) { struct ath10k *ar = hw->priv; u32 opt; ath10k_drain_tx(ar); mutex_lock(&ar->conf_mutex); if (ar->state != ATH10K_STATE_OFF) { if (!ar->hw_rfkill_on) { /* If the current driver state is RESTARTING but not yet * fully RESTARTED because of incoming suspend event, * then ath10k_halt() is already called via * ath10k_core_restart() and should not be called here. */ if (ar->state != ATH10K_STATE_RESTARTING) { ath10k_halt(ar); } else { /* Suspending here, because when in RESTARTING * state, ath10k_core_stop() skips * ath10k_wait_for_suspend(). */ opt = WMI_PDEV_SUSPEND_AND_DISABLE_INTR; ath10k_wait_for_suspend(ar, opt); } } ar->state = ATH10K_STATE_OFF; } mutex_unlock(&ar->conf_mutex); cancel_work_sync(&ar->set_coverage_class_work); cancel_delayed_work_sync(&ar->scan.timeout); cancel_work_sync(&ar->restart_work); } static int ath10k_config_ps(struct ath10k *ar) { struct ath10k_vif *arvif; int ret = 0; lockdep_assert_held(&ar->conf_mutex); list_for_each_entry(arvif, &ar->arvifs, list) { ret = ath10k_mac_vif_setup_ps(arvif); if (ret) { ath10k_warn(ar, "failed to setup powersave: %d\n", ret); break; } } return ret; } static int ath10k_config(struct ieee80211_hw *hw, int radio_idx, u32 changed) { struct ath10k *ar = hw->priv; struct ieee80211_conf *conf = &hw->conf; int ret = 0; mutex_lock(&ar->conf_mutex); if (changed & IEEE80211_CONF_CHANGE_PS) ath10k_config_ps(ar); if (changed & IEEE80211_CONF_CHANGE_MONITOR) { ar->monitor = conf->flags & IEEE80211_CONF_MONITOR; ret = ath10k_monitor_recalc(ar); if (ret) ath10k_warn(ar, "failed to recalc monitor: %d\n", ret); } mutex_unlock(&ar->conf_mutex); return ret; } static u32 get_nss_from_chainmask(u16 chain_mask) { if ((chain_mask & 0xf) == 0xf) return 4; else if ((chain_mask & 0x7) == 0x7) return 3; else if ((chain_mask & 0x3) == 0x3) return 2; return 1; } static int ath10k_mac_set_txbf_conf(struct ath10k_vif *arvif) { u32 value = 0; struct ath10k *ar = arvif->ar; int nsts; int sound_dim; if (ath10k_wmi_get_txbf_conf_scheme(ar) != WMI_TXBF_CONF_BEFORE_ASSOC) return 0; nsts = ath10k_mac_get_vht_cap_bf_sts(ar); if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)) value |= SM(nsts, WMI_TXBF_STS_CAP_OFFSET); sound_dim = ath10k_mac_get_vht_cap_bf_sound_dim(ar); if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)) value |= SM(sound_dim, WMI_BF_SOUND_DIM_OFFSET); if (!value) return 0; if (ar->vht_cap_info & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFER; if (ar->vht_cap_info & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) value |= (WMI_VDEV_PARAM_TXBF_MU_TX_BFER | WMI_VDEV_PARAM_TXBF_SU_TX_BFER); if (ar->vht_cap_info & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFEE; if (ar->vht_cap_info & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) value |= (WMI_VDEV_PARAM_TXBF_MU_TX_BFEE | WMI_VDEV_PARAM_TXBF_SU_TX_BFEE); return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, ar->wmi.vdev_param->txbf, value); } static void ath10k_update_vif_offload(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k *ar = hw->priv; u32 vdev_param; int ret; if (ath10k_frame_mode != ATH10K_HW_TXRX_ETHERNET || ar->wmi.vdev_param->tx_encap_type == WMI_VDEV_PARAM_UNSUPPORTED || (vif->type != NL80211_IFTYPE_STATION && vif->type != NL80211_IFTYPE_AP)) vif->offload_flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED; vdev_param = ar->wmi.vdev_param->tx_encap_type; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, ATH10K_HW_TXRX_NATIVE_WIFI); /* 10.X firmware does not support this VDEV parameter. Do not warn */ if (ret && ret != -EOPNOTSUPP) { ath10k_warn(ar, "failed to set vdev %i TX encapsulation: %d\n", arvif->vdev_id, ret); } } /* * TODO: * Figure out how to handle WMI_VDEV_SUBTYPE_P2P_DEVICE, * because we will send mgmt frames without CCK. This requirement * for P2P_FIND/GO_NEG should be handled by checking CCK flag * in the TX packet. */ static int ath10k_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_peer *peer; enum wmi_sta_powersave_param param; int ret = 0; u32 value; int bit; int i; u32 vdev_param; vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD; mutex_lock(&ar->conf_mutex); memset(arvif, 0, sizeof(*arvif)); ath10k_mac_txq_init(vif->txq); arvif->ar = ar; arvif->vif = vif; INIT_LIST_HEAD(&arvif->list); INIT_WORK(&arvif->ap_csa_work, ath10k_mac_vif_ap_csa_work); INIT_DELAYED_WORK(&arvif->connection_loss_work, ath10k_mac_vif_sta_connection_loss_work); for (i = 0; i < ARRAY_SIZE(arvif->bitrate_mask.control); i++) { arvif->bitrate_mask.control[i].legacy = 0xffffffff; memset(arvif->bitrate_mask.control[i].ht_mcs, 0xff, sizeof(arvif->bitrate_mask.control[i].ht_mcs)); memset(arvif->bitrate_mask.control[i].vht_mcs, 0xff, sizeof(arvif->bitrate_mask.control[i].vht_mcs)); } if (ar->num_peers >= ar->max_num_peers) { ath10k_warn(ar, "refusing vdev creation due to insufficient peer entry resources in firmware\n"); ret = -ENOBUFS; goto err; } if (ar->free_vdev_map == 0) { ath10k_warn(ar, "Free vdev map is empty, no more interfaces allowed.\n"); ret = -EBUSY; goto err; } bit = __ffs64(ar->free_vdev_map); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac create vdev %i map %llx\n", bit, ar->free_vdev_map); arvif->vdev_id = bit; arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE); switch (vif->type) { case NL80211_IFTYPE_P2P_DEVICE: arvif->vdev_type = WMI_VDEV_TYPE_STA; arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype (ar, WMI_VDEV_SUBTYPE_P2P_DEVICE); break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_STATION: arvif->vdev_type = WMI_VDEV_TYPE_STA; if (vif->p2p) arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype (ar, WMI_VDEV_SUBTYPE_P2P_CLIENT); break; case NL80211_IFTYPE_ADHOC: arvif->vdev_type = WMI_VDEV_TYPE_IBSS; break; case NL80211_IFTYPE_MESH_POINT: if (test_bit(WMI_SERVICE_MESH_11S, ar->wmi.svc_map)) { arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype (ar, WMI_VDEV_SUBTYPE_MESH_11S); } else if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) { ret = -EINVAL; ath10k_warn(ar, "must load driver with rawmode=1 to add mesh interfaces\n"); goto err; } arvif->vdev_type = WMI_VDEV_TYPE_AP; break; case NL80211_IFTYPE_AP: arvif->vdev_type = WMI_VDEV_TYPE_AP; if (vif->p2p) arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype (ar, WMI_VDEV_SUBTYPE_P2P_GO); break; case NL80211_IFTYPE_MONITOR: arvif->vdev_type = WMI_VDEV_TYPE_MONITOR; break; default: WARN_ON(1); break; } /* Using vdev_id as queue number will make it very easy to do per-vif * tx queue locking. This shouldn't wrap due to interface combinations * but do a modulo for correctness sake and prevent using offchannel tx * queues for regular vif tx. */ vif->cab_queue = arvif->vdev_id % (IEEE80211_MAX_QUEUES - 1); for (i = 0; i < ARRAY_SIZE(vif->hw_queue); i++) vif->hw_queue[i] = arvif->vdev_id % (IEEE80211_MAX_QUEUES - 1); /* Some firmware revisions don't wait for beacon tx completion before * sending another SWBA event. This could lead to hardware using old * (freed) beacon data in some cases, e.g. tx credit starvation * combined with missed TBTT. This is very rare. * * On non-IOMMU-enabled hosts this could be a possible security issue * because hw could beacon some random data on the air. On * IOMMU-enabled hosts DMAR faults would occur in most cases and target * device would crash. * * Since there are no beacon tx completions (implicit nor explicit) * propagated to host the only workaround for this is to allocate a * DMA-coherent buffer for a lifetime of a vif and use it for all * beacon tx commands. Worst case for this approach is some beacons may * become corrupted, e.g. have garbled IEs or out-of-date TIM bitmap. */ if (vif->type == NL80211_IFTYPE_ADHOC || vif->type == NL80211_IFTYPE_MESH_POINT || vif->type == NL80211_IFTYPE_AP) { if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) { arvif->beacon_buf = kmalloc(IEEE80211_MAX_FRAME_LEN, GFP_KERNEL); /* Using a kernel pointer in place of a dma_addr_t * token can lead to undefined behavior if that * makes it into cache management functions. Use a * known-invalid address token instead, which * avoids the warning and makes it easier to catch * bugs if it does end up getting used. */ arvif->beacon_paddr = DMA_MAPPING_ERROR; } else { arvif->beacon_buf = dma_alloc_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, &arvif->beacon_paddr, GFP_ATOMIC); } if (!arvif->beacon_buf) { ret = -ENOMEM; ath10k_warn(ar, "failed to allocate beacon buffer: %d\n", ret); goto err; } } if (test_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags)) arvif->nohwcrypt = true; if (arvif->nohwcrypt && !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) { ret = -EINVAL; ath10k_warn(ar, "cryptmode module param needed for sw crypto\n"); goto err; } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev create %d (add interface) type %d subtype %d bcnmode %s\n", arvif->vdev_id, arvif->vdev_type, arvif->vdev_subtype, arvif->beacon_buf ? "single-buf" : "per-skb"); ret = ath10k_wmi_vdev_create(ar, arvif->vdev_id, arvif->vdev_type, arvif->vdev_subtype, vif->addr); if (ret) { ath10k_warn(ar, "failed to create WMI vdev %i: %d\n", arvif->vdev_id, ret); goto err; } if (test_bit(WMI_SERVICE_VDEV_DISABLE_4_ADDR_SRC_LRN_SUPPORT, ar->wmi.svc_map)) { vdev_param = ar->wmi.vdev_param->disable_4addr_src_lrn; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, WMI_VDEV_DISABLE_4_ADDR_SRC_LRN); if (ret && ret != -EOPNOTSUPP) { ath10k_warn(ar, "failed to disable 4addr src lrn vdev %i: %d\n", arvif->vdev_id, ret); } } ar->free_vdev_map &= ~(1LL << arvif->vdev_id); spin_lock_bh(&ar->data_lock); list_add(&arvif->list, &ar->arvifs); spin_unlock_bh(&ar->data_lock); /* It makes no sense to have firmware do keepalives. mac80211 already * takes care of this with idle connection polling. */ ret = ath10k_mac_vif_disable_keepalive(arvif); if (ret) { ath10k_warn(ar, "failed to disable keepalive on vdev %i: %d\n", arvif->vdev_id, ret); goto err_vdev_delete; } arvif->def_wep_key_idx = -1; ath10k_update_vif_offload(hw, vif); /* Configuring number of spatial stream for monitor interface is causing * target assert in qca9888 and qca6174. */ if (ar->cfg_tx_chainmask && (vif->type != NL80211_IFTYPE_MONITOR)) { u16 nss = get_nss_from_chainmask(ar->cfg_tx_chainmask); vdev_param = ar->wmi.vdev_param->nss; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, nss); if (ret) { ath10k_warn(ar, "failed to set vdev %i chainmask 0x%x, nss %i: %d\n", arvif->vdev_id, ar->cfg_tx_chainmask, nss, ret); goto err_vdev_delete; } } if (arvif->vdev_type == WMI_VDEV_TYPE_AP || arvif->vdev_type == WMI_VDEV_TYPE_IBSS) { ret = ath10k_peer_create(ar, vif, NULL, arvif->vdev_id, vif->addr, WMI_PEER_TYPE_DEFAULT); if (ret) { ath10k_warn(ar, "failed to create vdev %i peer for AP/IBSS: %d\n", arvif->vdev_id, ret); goto err_vdev_delete; } spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arvif->vdev_id, vif->addr); if (!peer) { ath10k_warn(ar, "failed to lookup peer %pM on vdev %i\n", vif->addr, arvif->vdev_id); spin_unlock_bh(&ar->data_lock); ret = -ENOENT; goto err_peer_delete; } arvif->peer_id = find_first_bit(peer->peer_ids, ATH10K_MAX_NUM_PEER_IDS); spin_unlock_bh(&ar->data_lock); } else { arvif->peer_id = HTT_INVALID_PEERID; } if (arvif->vdev_type == WMI_VDEV_TYPE_AP) { ret = ath10k_mac_set_kickout(arvif); if (ret) { ath10k_warn(ar, "failed to set vdev %i kickout parameters: %d\n", arvif->vdev_id, ret); goto err_peer_delete; } } if (arvif->vdev_type == WMI_VDEV_TYPE_STA) { param = WMI_STA_PS_PARAM_RX_WAKE_POLICY; value = WMI_STA_PS_RX_WAKE_POLICY_WAKE; ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, value); if (ret) { ath10k_warn(ar, "failed to set vdev %i RX wake policy: %d\n", arvif->vdev_id, ret); goto err_peer_delete; } ret = ath10k_mac_vif_recalc_ps_wake_threshold(arvif); if (ret) { ath10k_warn(ar, "failed to recalc ps wake threshold on vdev %i: %d\n", arvif->vdev_id, ret); goto err_peer_delete; } ret = ath10k_mac_vif_recalc_ps_poll_count(arvif); if (ret) { ath10k_warn(ar, "failed to recalc ps poll count on vdev %i: %d\n", arvif->vdev_id, ret); goto err_peer_delete; } } ret = ath10k_mac_set_txbf_conf(arvif); if (ret) { ath10k_warn(ar, "failed to set txbf for vdev %d: %d\n", arvif->vdev_id, ret); goto err_peer_delete; } ret = ath10k_mac_set_rts(arvif, ar->hw->wiphy->rts_threshold); if (ret) { ath10k_warn(ar, "failed to set rts threshold for vdev %d: %d\n", arvif->vdev_id, ret); goto err_peer_delete; } arvif->txpower = vif->bss_conf.txpower; ret = ath10k_mac_txpower_recalc(ar); if (ret) { ath10k_warn(ar, "failed to recalc tx power: %d\n", ret); goto err_peer_delete; } if (test_bit(WMI_SERVICE_RTT_RESPONDER_ROLE, ar->wmi.svc_map)) { vdev_param = ar->wmi.vdev_param->rtt_responder_role; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, arvif->ftm_responder); /* It is harmless to not set FTM role. Do not warn */ if (ret && ret != -EOPNOTSUPP) ath10k_warn(ar, "failed to set vdev %i FTM Responder: %d\n", arvif->vdev_id, ret); } if (vif->type == NL80211_IFTYPE_MONITOR) { ar->monitor_arvif = arvif; ret = ath10k_monitor_recalc(ar); if (ret) { ath10k_warn(ar, "failed to recalc monitor: %d\n", ret); goto err_peer_delete; } } spin_lock_bh(&ar->htt.tx_lock); if (!ar->tx_paused) ieee80211_wake_queue(ar->hw, arvif->vdev_id); spin_unlock_bh(&ar->htt.tx_lock); mutex_unlock(&ar->conf_mutex); return 0; err_peer_delete: if (arvif->vdev_type == WMI_VDEV_TYPE_AP || arvif->vdev_type == WMI_VDEV_TYPE_IBSS) { ath10k_wmi_peer_delete(ar, arvif->vdev_id, vif->addr); ath10k_wait_for_peer_delete_done(ar, arvif->vdev_id, vif->addr); } err_vdev_delete: ath10k_wmi_vdev_delete(ar, arvif->vdev_id); ar->free_vdev_map |= 1LL << arvif->vdev_id; spin_lock_bh(&ar->data_lock); list_del(&arvif->list); spin_unlock_bh(&ar->data_lock); err: if (arvif->beacon_buf) { if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) kfree(arvif->beacon_buf); else dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, arvif->beacon_buf, arvif->beacon_paddr); arvif->beacon_buf = NULL; } mutex_unlock(&ar->conf_mutex); return ret; } static void ath10k_mac_vif_tx_unlock_all(struct ath10k_vif *arvif) { int i; for (i = 0; i < BITS_PER_LONG; i++) ath10k_mac_vif_tx_unlock(arvif, i); } static void ath10k_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_peer *peer; int ret; int i; cancel_work_sync(&arvif->ap_csa_work); cancel_delayed_work_sync(&arvif->connection_loss_work); mutex_lock(&ar->conf_mutex); ret = ath10k_spectral_vif_stop(arvif); if (ret) ath10k_warn(ar, "failed to stop spectral for vdev %i: %d\n", arvif->vdev_id, ret); ar->free_vdev_map |= 1LL << arvif->vdev_id; spin_lock_bh(&ar->data_lock); list_del(&arvif->list); spin_unlock_bh(&ar->data_lock); if (arvif->vdev_type == WMI_VDEV_TYPE_AP || arvif->vdev_type == WMI_VDEV_TYPE_IBSS) { ret = ath10k_wmi_peer_delete(arvif->ar, arvif->vdev_id, vif->addr); if (ret) ath10k_warn(ar, "failed to submit AP/IBSS self-peer removal on vdev %i: %d\n", arvif->vdev_id, ret); ath10k_wait_for_peer_delete_done(ar, arvif->vdev_id, vif->addr); kfree(arvif->u.ap.noa_data); } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %i delete (remove interface)\n", arvif->vdev_id); ret = ath10k_wmi_vdev_delete(ar, arvif->vdev_id); if (ret) ath10k_warn(ar, "failed to delete WMI vdev %i: %d\n", arvif->vdev_id, ret); ret = ath10k_vdev_delete_sync(ar); if (ret) { ath10k_warn(ar, "Error in receiving vdev delete response: %d\n", ret); goto out; } /* Some firmware revisions don't notify host about self-peer removal * until after associated vdev is deleted. */ if (arvif->vdev_type == WMI_VDEV_TYPE_AP || arvif->vdev_type == WMI_VDEV_TYPE_IBSS) { ret = ath10k_wait_for_peer_deleted(ar, arvif->vdev_id, vif->addr); if (ret) ath10k_warn(ar, "failed to remove AP self-peer on vdev %i: %d\n", arvif->vdev_id, ret); spin_lock_bh(&ar->data_lock); ar->num_peers--; spin_unlock_bh(&ar->data_lock); } spin_lock_bh(&ar->data_lock); for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { peer = ar->peer_map[i]; if (!peer) continue; if (peer->vif == vif) { ath10k_warn(ar, "found vif peer %pM entry on vdev %i after it was supposedly removed\n", vif->addr, arvif->vdev_id); peer->vif = NULL; } } /* Clean this up late, less opportunity for firmware to access * DMA memory we have deleted. */ ath10k_mac_vif_beacon_cleanup(arvif); spin_unlock_bh(&ar->data_lock); ath10k_peer_cleanup(ar, arvif->vdev_id); ath10k_mac_txq_unref(ar, vif->txq); if (vif->type == NL80211_IFTYPE_MONITOR) { ar->monitor_arvif = NULL; ret = ath10k_monitor_recalc(ar); if (ret) ath10k_warn(ar, "failed to recalc monitor: %d\n", ret); } ret = ath10k_mac_txpower_recalc(ar); if (ret) ath10k_warn(ar, "failed to recalc tx power: %d\n", ret); spin_lock_bh(&ar->htt.tx_lock); ath10k_mac_vif_tx_unlock_all(arvif); spin_unlock_bh(&ar->htt.tx_lock); ath10k_mac_txq_unref(ar, vif->txq); out: mutex_unlock(&ar->conf_mutex); } /* * FIXME: Has to be verified. */ #define SUPPORTED_FILTERS \ (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ FIF_BCN_PRBRESP_PROMISC | \ FIF_PROBE_REQ | \ FIF_FCSFAIL) static void ath10k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *total_flags, u64 multicast) { struct ath10k *ar = hw->priv; int ret; unsigned int supported = SUPPORTED_FILTERS; mutex_lock(&ar->conf_mutex); if (ar->hw_params.mcast_frame_registration) supported |= FIF_MCAST_ACTION; *total_flags &= supported; ar->filter_flags = *total_flags; ret = ath10k_monitor_recalc(ar); if (ret) ath10k_warn(ar, "failed to recalc monitor: %d\n", ret); mutex_unlock(&ar->conf_mutex); } static void ath10k_recalculate_mgmt_rate(struct ath10k *ar, struct ieee80211_vif *vif, struct cfg80211_chan_def *def) { struct ath10k_vif *arvif = (void *)vif->drv_priv; const struct ieee80211_supported_band *sband; u8 basic_rate_idx; int hw_rate_code; u32 vdev_param; u16 bitrate; int ret; lockdep_assert_held(&ar->conf_mutex); sband = ar->hw->wiphy->bands[def->chan->band]; basic_rate_idx = ffs(vif->bss_conf.basic_rates) - 1; bitrate = sband->bitrates[basic_rate_idx].bitrate; hw_rate_code = ath10k_mac_get_rate_hw_value(bitrate); if (hw_rate_code < 0) { ath10k_warn(ar, "bitrate not supported %d\n", bitrate); return; } vdev_param = ar->wmi.vdev_param->mgmt_rate; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, hw_rate_code); if (ret) ath10k_warn(ar, "failed to set mgmt tx rate %d\n", ret); } static void ath10k_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *info, u64 changed) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct cfg80211_chan_def def; u32 vdev_param, pdev_param, slottime, preamble; u16 bitrate, hw_value; u8 rate, rateidx; int ret = 0, mcast_rate; enum nl80211_band band; mutex_lock(&ar->conf_mutex); if (changed & BSS_CHANGED_IBSS) ath10k_control_ibss(arvif, vif); if (changed & BSS_CHANGED_BEACON_INT) { arvif->beacon_interval = info->beacon_int; vdev_param = ar->wmi.vdev_param->beacon_interval; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, arvif->beacon_interval); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d beacon_interval %d\n", arvif->vdev_id, arvif->beacon_interval); if (ret) ath10k_warn(ar, "failed to set beacon interval for vdev %d: %i\n", arvif->vdev_id, ret); } if (changed & BSS_CHANGED_BEACON) { ath10k_dbg(ar, ATH10K_DBG_MAC, "vdev %d set beacon tx mode to staggered\n", arvif->vdev_id); pdev_param = ar->wmi.pdev_param->beacon_tx_mode; ret = ath10k_wmi_pdev_set_param(ar, pdev_param, WMI_BEACON_STAGGERED_MODE); if (ret) ath10k_warn(ar, "failed to set beacon mode for vdev %d: %i\n", arvif->vdev_id, ret); ret = ath10k_mac_setup_bcn_tmpl(arvif); if (ret) ath10k_warn(ar, "failed to update beacon template: %d\n", ret); if (ieee80211_vif_is_mesh(vif)) { /* mesh doesn't use SSID but firmware needs it */ arvif->u.ap.ssid_len = 4; memcpy(arvif->u.ap.ssid, "mesh", arvif->u.ap.ssid_len); } } if (changed & BSS_CHANGED_AP_PROBE_RESP) { ret = ath10k_mac_setup_prb_tmpl(arvif); if (ret) ath10k_warn(ar, "failed to setup probe resp template on vdev %i: %d\n", arvif->vdev_id, ret); } if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) { arvif->dtim_period = info->dtim_period; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d dtim_period %d\n", arvif->vdev_id, arvif->dtim_period); vdev_param = ar->wmi.vdev_param->dtim_period; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, arvif->dtim_period); if (ret) ath10k_warn(ar, "failed to set dtim period for vdev %d: %i\n", arvif->vdev_id, ret); } if (changed & BSS_CHANGED_SSID && vif->type == NL80211_IFTYPE_AP) { arvif->u.ap.ssid_len = vif->cfg.ssid_len; if (vif->cfg.ssid_len) memcpy(arvif->u.ap.ssid, vif->cfg.ssid, vif->cfg.ssid_len); arvif->u.ap.hidden_ssid = info->hidden_ssid; } if (changed & BSS_CHANGED_BSSID && !is_zero_ether_addr(info->bssid)) ether_addr_copy(arvif->bssid, info->bssid); if (changed & BSS_CHANGED_FTM_RESPONDER && arvif->ftm_responder != info->ftm_responder && test_bit(WMI_SERVICE_RTT_RESPONDER_ROLE, ar->wmi.svc_map)) { arvif->ftm_responder = info->ftm_responder; vdev_param = ar->wmi.vdev_param->rtt_responder_role; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, arvif->ftm_responder); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d ftm_responder %d:ret %d\n", arvif->vdev_id, arvif->ftm_responder, ret); } if (changed & BSS_CHANGED_BEACON_ENABLED) ath10k_control_beaconing(arvif, info); if (changed & BSS_CHANGED_ERP_CTS_PROT) { arvif->use_cts_prot = info->use_cts_prot; ret = ath10k_recalc_rtscts_prot(arvif); if (ret) ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n", arvif->vdev_id, ret); if (ath10k_mac_can_set_cts_prot(arvif)) { ret = ath10k_mac_set_cts_prot(arvif); if (ret) ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n", arvif->vdev_id, ret); } } if (changed & BSS_CHANGED_ERP_SLOT) { if (info->use_short_slot) slottime = WMI_VDEV_SLOT_TIME_SHORT; /* 9us */ else slottime = WMI_VDEV_SLOT_TIME_LONG; /* 20us */ ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d slot_time %d\n", arvif->vdev_id, slottime); vdev_param = ar->wmi.vdev_param->slot_time; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, slottime); if (ret) ath10k_warn(ar, "failed to set erp slot for vdev %d: %i\n", arvif->vdev_id, ret); } if (changed & BSS_CHANGED_ERP_PREAMBLE) { if (info->use_short_preamble) preamble = WMI_VDEV_PREAMBLE_SHORT; else preamble = WMI_VDEV_PREAMBLE_LONG; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d preamble %dn", arvif->vdev_id, preamble); vdev_param = ar->wmi.vdev_param->preamble; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, preamble); if (ret) ath10k_warn(ar, "failed to set preamble for vdev %d: %i\n", arvif->vdev_id, ret); } if (changed & BSS_CHANGED_ASSOC) { if (vif->cfg.assoc) { /* Workaround: Make sure monitor vdev is not running * when associating to prevent some firmware revisions * (e.g. 10.1 and 10.2) from crashing. */ if (ar->monitor_started) ath10k_monitor_stop(ar); ath10k_bss_assoc(hw, vif, info); ath10k_monitor_recalc(ar); } else { ath10k_bss_disassoc(hw, vif); } } if (changed & BSS_CHANGED_TXPOWER) { ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev_id %i txpower %d\n", arvif->vdev_id, info->txpower); arvif->txpower = info->txpower; ret = ath10k_mac_txpower_recalc(ar); if (ret) ath10k_warn(ar, "failed to recalc tx power: %d\n", ret); } if (changed & BSS_CHANGED_PS) { arvif->ps = vif->cfg.ps; ret = ath10k_config_ps(ar); if (ret) ath10k_warn(ar, "failed to setup ps on vdev %i: %d\n", arvif->vdev_id, ret); } if (changed & BSS_CHANGED_MCAST_RATE && !ath10k_mac_vif_chan(arvif->vif, &def)) { band = def.chan->band; mcast_rate = vif->bss_conf.mcast_rate[band]; if (mcast_rate > 0) rateidx = mcast_rate - 1; else rateidx = ffs(vif->bss_conf.basic_rates) - 1; if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY) rateidx += ATH10K_MAC_FIRST_OFDM_RATE_IDX; bitrate = ath10k_wmi_legacy_rates[rateidx].bitrate; hw_value = ath10k_wmi_legacy_rates[rateidx].hw_value; if (ath10k_mac_bitrate_is_cck(bitrate)) preamble = WMI_RATE_PREAMBLE_CCK; else preamble = WMI_RATE_PREAMBLE_OFDM; rate = ATH10K_HW_RATECODE(hw_value, 0, preamble); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d mcast_rate %x\n", arvif->vdev_id, rate); vdev_param = ar->wmi.vdev_param->mcast_data_rate; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, rate); if (ret) ath10k_warn(ar, "failed to set mcast rate on vdev %i: %d\n", arvif->vdev_id, ret); vdev_param = ar->wmi.vdev_param->bcast_data_rate; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, rate); if (ret) ath10k_warn(ar, "failed to set bcast rate on vdev %i: %d\n", arvif->vdev_id, ret); } if (changed & BSS_CHANGED_BASIC_RATES && !ath10k_mac_vif_chan(arvif->vif, &def)) ath10k_recalculate_mgmt_rate(ar, vif, &def); mutex_unlock(&ar->conf_mutex); } static void ath10k_mac_op_set_coverage_class(struct ieee80211_hw *hw, int radio_idx, s16 value) { struct ath10k *ar = hw->priv; /* This function should never be called if setting the coverage class * is not supported on this hardware. */ if (!ar->hw_params.hw_ops->set_coverage_class) { WARN_ON_ONCE(1); return; } ar->hw_params.hw_ops->set_coverage_class(ar, -1, value); } struct ath10k_mac_tdls_iter_data { u32 num_tdls_stations; struct ieee80211_vif *curr_vif; }; static void ath10k_mac_tdls_vif_stations_count_iter(void *data, struct ieee80211_sta *sta) { struct ath10k_mac_tdls_iter_data *iter_data = data; struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ieee80211_vif *sta_vif = arsta->arvif->vif; if (sta->tdls && sta_vif == iter_data->curr_vif) iter_data->num_tdls_stations++; } static int ath10k_mac_tdls_vif_stations_count(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath10k_mac_tdls_iter_data data = {}; data.curr_vif = vif; ieee80211_iterate_stations_atomic(hw, ath10k_mac_tdls_vif_stations_count_iter, &data); return data.num_tdls_stations; } static int ath10k_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_scan_request *hw_req) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct cfg80211_scan_request *req = &hw_req->req; struct wmi_start_scan_arg *arg = NULL; int ret = 0; int i; u32 scan_timeout; mutex_lock(&ar->conf_mutex); if (ath10k_mac_tdls_vif_stations_count(hw, vif) > 0) { ret = -EBUSY; goto exit; } spin_lock_bh(&ar->data_lock); switch (ar->scan.state) { case ATH10K_SCAN_IDLE: reinit_completion(&ar->scan.started); reinit_completion(&ar->scan.completed); ar->scan.state = ATH10K_SCAN_STARTING; ar->scan.is_roc = false; ar->scan.vdev_id = arvif->vdev_id; ret = 0; break; case ATH10K_SCAN_STARTING: case ATH10K_SCAN_RUNNING: case ATH10K_SCAN_ABORTING: ret = -EBUSY; break; } spin_unlock_bh(&ar->data_lock); if (ret) goto exit; arg = kzalloc(sizeof(*arg), GFP_KERNEL); if (!arg) { ret = -ENOMEM; goto exit; } ath10k_wmi_start_scan_init(ar, arg); arg->vdev_id = arvif->vdev_id; arg->scan_id = ATH10K_SCAN_ID; if (req->ie_len) { arg->ie_len = req->ie_len; memcpy(arg->ie, req->ie, arg->ie_len); } if (req->n_ssids) { arg->n_ssids = req->n_ssids; for (i = 0; i < arg->n_ssids; i++) { arg->ssids[i].len = req->ssids[i].ssid_len; arg->ssids[i].ssid = req->ssids[i].ssid; } } else { arg->scan_ctrl_flags |= WMI_SCAN_FLAG_PASSIVE; } if (req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { arg->scan_ctrl_flags |= WMI_SCAN_ADD_SPOOFED_MAC_IN_PROBE_REQ; ether_addr_copy(arg->mac_addr.addr, req->mac_addr); ether_addr_copy(arg->mac_mask.addr, req->mac_addr_mask); } if (req->n_channels) { arg->n_channels = req->n_channels; for (i = 0; i < arg->n_channels; i++) arg->channels[i] = req->channels[i]->center_freq; } /* if duration is set, default dwell times will be overwritten */ if (req->duration) { arg->dwell_time_active = req->duration; arg->dwell_time_passive = req->duration; arg->burst_duration_ms = req->duration; scan_timeout = min_t(u32, arg->max_rest_time * (arg->n_channels - 1) + (req->duration + ATH10K_SCAN_CHANNEL_SWITCH_WMI_EVT_OVERHEAD) * arg->n_channels, arg->max_scan_time); } else { scan_timeout = arg->max_scan_time; } /* Add a 200ms margin to account for event/command processing */ scan_timeout += 200; ret = ath10k_start_scan(ar, arg); if (ret) { ath10k_warn(ar, "failed to start hw scan: %d\n", ret); spin_lock_bh(&ar->data_lock); ar->scan.state = ATH10K_SCAN_IDLE; spin_unlock_bh(&ar->data_lock); } ieee80211_queue_delayed_work(ar->hw, &ar->scan.timeout, msecs_to_jiffies(scan_timeout)); exit: kfree(arg); mutex_unlock(&ar->conf_mutex); return ret; } static void ath10k_cancel_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath10k *ar = hw->priv; mutex_lock(&ar->conf_mutex); ath10k_scan_abort(ar); mutex_unlock(&ar->conf_mutex); cancel_delayed_work_sync(&ar->scan.timeout); } static void ath10k_set_key_h_def_keyidx(struct ath10k *ar, struct ath10k_vif *arvif, enum set_key_cmd cmd, struct ieee80211_key_conf *key) { u32 vdev_param = arvif->ar->wmi.vdev_param->def_keyid; int ret; /* 10.1 firmware branch requires default key index to be set to group * key index after installing it. Otherwise FW/HW Txes corrupted * frames with multi-vif APs. This is not required for main firmware * branch (e.g. 636). * * This is also needed for 636 fw for IBSS-RSN to work more reliably. * * FIXME: It remains unknown if this is required for multi-vif STA * interfaces on 10.1. */ if (arvif->vdev_type != WMI_VDEV_TYPE_AP && arvif->vdev_type != WMI_VDEV_TYPE_IBSS) return; if (key->cipher == WLAN_CIPHER_SUITE_WEP40) return; if (key->cipher == WLAN_CIPHER_SUITE_WEP104) return; if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) return; if (cmd != SET_KEY) return; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, key->keyidx); if (ret) ath10k_warn(ar, "failed to set vdev %i group key as default key: %d\n", arvif->vdev_id, ret); } static int ath10k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_sta *arsta; struct ath10k_peer *peer; const u8 *peer_addr; bool is_wep = key->cipher == WLAN_CIPHER_SUITE_WEP40 || key->cipher == WLAN_CIPHER_SUITE_WEP104; int ret = 0; int ret2; u32 flags = 0; u32 flags2; /* this one needs to be done in software */ if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC || key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256 || key->cipher == WLAN_CIPHER_SUITE_BIP_CMAC_256) return 1; if (arvif->nohwcrypt) return 1; if (key->keyidx > WMI_MAX_KEY_INDEX) return -ENOSPC; mutex_lock(&ar->conf_mutex); if (sta) { arsta = (struct ath10k_sta *)sta->drv_priv; peer_addr = sta->addr; spin_lock_bh(&ar->data_lock); arsta->ucast_cipher = key->cipher; spin_unlock_bh(&ar->data_lock); } else if (arvif->vdev_type == WMI_VDEV_TYPE_STA) { peer_addr = vif->bss_conf.bssid; } else { peer_addr = vif->addr; } key->hw_key_idx = key->keyidx; if (is_wep) { if (cmd == SET_KEY) arvif->wep_keys[key->keyidx] = key; else arvif->wep_keys[key->keyidx] = NULL; } /* the peer should not disappear in mid-way (unless FW goes awry) since * we already hold conf_mutex. we just make sure its there now. */ spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arvif->vdev_id, peer_addr); spin_unlock_bh(&ar->data_lock); if (!peer) { if (cmd == SET_KEY) { ath10k_warn(ar, "failed to install key for non-existent peer %pM\n", peer_addr); ret = -EOPNOTSUPP; goto exit; } else { /* if the peer doesn't exist there is no key to disable anymore */ goto exit; } } if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) flags |= WMI_KEY_PAIRWISE; else flags |= WMI_KEY_GROUP; if (is_wep) { if (cmd == DISABLE_KEY) ath10k_clear_vdev_key(arvif, key); /* When WEP keys are uploaded it's possible that there are * stations associated already (e.g. when merging) without any * keys. Static WEP needs an explicit per-peer key upload. */ if (vif->type == NL80211_IFTYPE_ADHOC && cmd == SET_KEY) ath10k_mac_vif_update_wep_key(arvif, key); /* 802.1x never sets the def_wep_key_idx so each set_key() * call changes default tx key. * * Static WEP sets def_wep_key_idx via .set_default_unicast_key * after first set_key(). */ if (cmd == SET_KEY && arvif->def_wep_key_idx == -1) flags |= WMI_KEY_TX_USAGE; } ret = ath10k_install_key(arvif, key, cmd, peer_addr, flags); if (ret) { WARN_ON(ret > 0); ath10k_warn(ar, "failed to install key for vdev %i peer %pM: %d\n", arvif->vdev_id, peer_addr, ret); goto exit; } /* mac80211 sets static WEP keys as groupwise while firmware requires * them to be installed twice as both pairwise and groupwise. */ if (is_wep && !sta && vif->type == NL80211_IFTYPE_STATION) { flags2 = flags; flags2 &= ~WMI_KEY_GROUP; flags2 |= WMI_KEY_PAIRWISE; ret = ath10k_install_key(arvif, key, cmd, peer_addr, flags2); if (ret) { WARN_ON(ret > 0); ath10k_warn(ar, "failed to install (ucast) key for vdev %i peer %pM: %d\n", arvif->vdev_id, peer_addr, ret); ret2 = ath10k_install_key(arvif, key, DISABLE_KEY, peer_addr, flags); if (ret2) { WARN_ON(ret2 > 0); ath10k_warn(ar, "failed to disable (mcast) key for vdev %i peer %pM: %d\n", arvif->vdev_id, peer_addr, ret2); } goto exit; } } ath10k_set_key_h_def_keyidx(ar, arvif, cmd, key); spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arvif->vdev_id, peer_addr); if (peer && cmd == SET_KEY) peer->keys[key->keyidx] = key; else if (peer && cmd == DISABLE_KEY) peer->keys[key->keyidx] = NULL; else if (peer == NULL) /* impossible unless FW goes crazy */ ath10k_warn(ar, "Peer %pM disappeared!\n", peer_addr); spin_unlock_bh(&ar->data_lock); if (sta && sta->tdls) ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, ar->wmi.peer_param->authorize, 1); else if (sta && cmd == SET_KEY && (key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) ath10k_wmi_peer_set_param(ar, arvif->vdev_id, peer_addr, ar->wmi.peer_param->authorize, 1); exit: mutex_unlock(&ar->conf_mutex); return ret; } static void ath10k_set_default_unicast_key(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int keyidx) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; int ret; mutex_lock(&arvif->ar->conf_mutex); if (arvif->ar->state != ATH10K_STATE_ON) goto unlock; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d set keyidx %d\n", arvif->vdev_id, keyidx); ret = ath10k_wmi_vdev_set_param(arvif->ar, arvif->vdev_id, arvif->ar->wmi.vdev_param->def_keyid, keyidx); if (ret) { ath10k_warn(ar, "failed to update wep key index for vdev %d: %d\n", arvif->vdev_id, ret); goto unlock; } arvif->def_wep_key_idx = keyidx; unlock: mutex_unlock(&arvif->ar->conf_mutex); } static void ath10k_sta_rc_update_wk(struct work_struct *wk) { struct ath10k *ar; struct ath10k_vif *arvif; struct ath10k_sta *arsta; struct ieee80211_sta *sta; struct cfg80211_chan_def def; enum nl80211_band band; const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; u32 changed, bw, nss, smps; int err; arsta = container_of(wk, struct ath10k_sta, update_wk); sta = container_of((void *)arsta, struct ieee80211_sta, drv_priv); arvif = arsta->arvif; ar = arvif->ar; if (WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def))) return; band = def.chan->band; ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs; vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs; spin_lock_bh(&ar->data_lock); changed = arsta->changed; arsta->changed = 0; bw = arsta->bw; nss = arsta->nss; smps = arsta->smps; spin_unlock_bh(&ar->data_lock); mutex_lock(&ar->conf_mutex); nss = max_t(u32, 1, nss); nss = min(nss, max(ath10k_mac_max_ht_nss(ht_mcs_mask), ath10k_mac_max_vht_nss(vht_mcs_mask))); if (changed & IEEE80211_RC_BW_CHANGED) { enum wmi_phy_mode mode; mode = chan_to_phymode(&def); ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM peer bw %d phymode %d\n", sta->addr, bw, mode); err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, ar->wmi.peer_param->phymode, mode); if (err) { ath10k_warn(ar, "failed to update STA %pM peer phymode %d: %d\n", sta->addr, mode, err); goto exit; } err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, ar->wmi.peer_param->chan_width, bw); if (err) ath10k_warn(ar, "failed to update STA %pM peer bw %d: %d\n", sta->addr, bw, err); } if (changed & IEEE80211_RC_NSS_CHANGED) { ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM nss %d\n", sta->addr, nss); err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, ar->wmi.peer_param->nss, nss); if (err) ath10k_warn(ar, "failed to update STA %pM nss %d: %d\n", sta->addr, nss, err); } if (changed & IEEE80211_RC_SMPS_CHANGED) { ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM smps %d\n", sta->addr, smps); err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, ar->wmi.peer_param->smps_state, smps); if (err) ath10k_warn(ar, "failed to update STA %pM smps %d: %d\n", sta->addr, smps, err); } if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) { ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM supp rates\n", sta->addr); err = ath10k_station_assoc(ar, arvif->vif, sta, true); if (err) ath10k_warn(ar, "failed to reassociate station: %pM\n", sta->addr); } exit: mutex_unlock(&ar->conf_mutex); } static int ath10k_mac_inc_num_stations(struct ath10k_vif *arvif, struct ieee80211_sta *sta) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->conf_mutex); if (arvif->vdev_type == WMI_VDEV_TYPE_STA && !sta->tdls) return 0; if (ar->num_stations >= ar->max_num_stations) return -ENOBUFS; ar->num_stations++; return 0; } static void ath10k_mac_dec_num_stations(struct ath10k_vif *arvif, struct ieee80211_sta *sta) { struct ath10k *ar = arvif->ar; lockdep_assert_held(&ar->conf_mutex); if (arvif->vdev_type == WMI_VDEV_TYPE_STA && !sta->tdls) return; ar->num_stations--; } static int ath10k_sta_set_txpwr(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; int ret = 0; s16 txpwr; if (sta->deflink.txpwr.type == NL80211_TX_POWER_AUTOMATIC) { txpwr = 0; } else { txpwr = sta->deflink.txpwr.power; if (!txpwr) return -EINVAL; } if (txpwr > ATH10K_TX_POWER_MAX_VAL || txpwr < ATH10K_TX_POWER_MIN_VAL) return -EINVAL; mutex_lock(&ar->conf_mutex); ret = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, ar->wmi.peer_param->use_fixed_power, txpwr); if (ret) { ath10k_warn(ar, "failed to set tx power for station ret: %d\n", ret); goto out; } out: mutex_unlock(&ar->conf_mutex); return ret; } struct ath10k_mac_iter_tid_conf_data { struct ieee80211_vif *curr_vif; struct ath10k *ar; bool reset_config; }; static bool ath10k_mac_bitrate_mask_has_single_rate(struct ath10k *ar, enum nl80211_band band, const struct cfg80211_bitrate_mask *mask, int *vht_num_rates) { int num_rates = 0; int i, tmp; num_rates += hweight32(mask->control[band].legacy); for (i = 0; i < ARRAY_SIZE(mask->control[band].ht_mcs); i++) num_rates += hweight8(mask->control[band].ht_mcs[i]); *vht_num_rates = 0; for (i = 0; i < ARRAY_SIZE(mask->control[band].vht_mcs); i++) { tmp = hweight16(mask->control[band].vht_mcs[i]); num_rates += tmp; *vht_num_rates += tmp; } return num_rates == 1; } static int ath10k_mac_bitrate_mask_get_single_rate(struct ath10k *ar, enum nl80211_band band, const struct cfg80211_bitrate_mask *mask, u8 *rate, u8 *nss, bool vht_only) { int rate_idx; int i; u16 bitrate; u8 preamble; u8 hw_rate; if (vht_only) goto next; if (hweight32(mask->control[band].legacy) == 1) { rate_idx = ffs(mask->control[band].legacy) - 1; if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY) rate_idx += ATH10K_MAC_FIRST_OFDM_RATE_IDX; hw_rate = ath10k_wmi_legacy_rates[rate_idx].hw_value; bitrate = ath10k_wmi_legacy_rates[rate_idx].bitrate; if (ath10k_mac_bitrate_is_cck(bitrate)) preamble = WMI_RATE_PREAMBLE_CCK; else preamble = WMI_RATE_PREAMBLE_OFDM; *nss = 1; *rate = preamble << 6 | (*nss - 1) << 4 | hw_rate << 0; return 0; } for (i = 0; i < ARRAY_SIZE(mask->control[band].ht_mcs); i++) { if (hweight8(mask->control[band].ht_mcs[i]) == 1) { *nss = i + 1; *rate = WMI_RATE_PREAMBLE_HT << 6 | (*nss - 1) << 4 | (ffs(mask->control[band].ht_mcs[i]) - 1); return 0; } } next: for (i = 0; i < ARRAY_SIZE(mask->control[band].vht_mcs); i++) { if (hweight16(mask->control[band].vht_mcs[i]) == 1) { *nss = i + 1; *rate = WMI_RATE_PREAMBLE_VHT << 6 | (*nss - 1) << 4 | (ffs(mask->control[band].vht_mcs[i]) - 1); return 0; } } return -EINVAL; } static int ath10k_mac_validate_rate_mask(struct ath10k *ar, struct ieee80211_sta *sta, u32 rate_ctrl_flag, u8 nss) { struct ieee80211_sta_ht_cap *ht_cap = &sta->deflink.ht_cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; if (nss > sta->deflink.rx_nss) { ath10k_warn(ar, "Invalid nss field, configured %u limit %u\n", nss, sta->deflink.rx_nss); return -EINVAL; } if (ATH10K_HW_PREAMBLE(rate_ctrl_flag) == WMI_RATE_PREAMBLE_VHT) { if (!vht_cap->vht_supported) { ath10k_warn(ar, "Invalid VHT rate for sta %pM\n", sta->addr); return -EINVAL; } } else if (ATH10K_HW_PREAMBLE(rate_ctrl_flag) == WMI_RATE_PREAMBLE_HT) { if (!ht_cap->ht_supported || vht_cap->vht_supported) { ath10k_warn(ar, "Invalid HT rate for sta %pM\n", sta->addr); return -EINVAL; } } else { if (ht_cap->ht_supported || vht_cap->vht_supported) return -EINVAL; } return 0; } static int ath10k_mac_tid_bitrate_config(struct ath10k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta, u32 *rate_ctrl_flag, u8 *rate_ctrl, enum nl80211_tx_rate_setting txrate_type, const struct cfg80211_bitrate_mask *mask) { struct cfg80211_chan_def def; enum nl80211_band band; u8 nss, rate; int vht_num_rates, ret; if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) return -EINVAL; if (txrate_type == NL80211_TX_RATE_AUTOMATIC) { *rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_AUTO; *rate_ctrl_flag = 0; return 0; } band = def.chan->band; if (!ath10k_mac_bitrate_mask_has_single_rate(ar, band, mask, &vht_num_rates)) { return -EINVAL; } ret = ath10k_mac_bitrate_mask_get_single_rate(ar, band, mask, &rate, &nss, false); if (ret) { ath10k_warn(ar, "failed to get single rate: %d\n", ret); return ret; } *rate_ctrl_flag = rate; if (sta && ath10k_mac_validate_rate_mask(ar, sta, *rate_ctrl_flag, nss)) return -EINVAL; if (txrate_type == NL80211_TX_RATE_FIXED) *rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_FIXED_RATE; else if (txrate_type == NL80211_TX_RATE_LIMITED && (test_bit(WMI_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map))) *rate_ctrl = WMI_PEER_TID_CONFIG_RATE_UPPER_CAP; else return -EOPNOTSUPP; return 0; } static int ath10k_mac_set_tid_config(struct ath10k *ar, struct ieee80211_sta *sta, struct ieee80211_vif *vif, u32 changed, struct wmi_per_peer_per_tid_cfg_arg *arg) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_sta *arsta; int ret; if (sta) { if (!sta->wme) return -EOPNOTSUPP; arsta = (struct ath10k_sta *)sta->drv_priv; if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) { if ((arsta->retry_long[arg->tid] > 0 || arsta->rate_code[arg->tid] > 0 || arsta->ampdu[arg->tid] == WMI_TID_CONFIG_AGGR_CONTROL_ENABLE) && arg->ack_policy == WMI_PEER_TID_CONFIG_NOACK) { changed &= ~BIT(NL80211_TID_CONFIG_ATTR_NOACK); arg->ack_policy = 0; arg->aggr_control = 0; arg->rate_ctrl = 0; arg->rcode_flags = 0; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) { if (arsta->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK || arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) { arg->aggr_control = 0; changed &= ~BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG); } } if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) { if (arsta->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK || arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) { arg->rate_ctrl = 0; arg->rcode_flags = 0; } } ether_addr_copy(arg->peer_macaddr.addr, sta->addr); ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, arg); if (ret) return ret; /* Store the configured parameters in success case */ if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) { arsta->noack[arg->tid] = arg->ack_policy; arg->ack_policy = 0; arg->aggr_control = 0; arg->rate_ctrl = 0; arg->rcode_flags = 0; } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG)) { arsta->retry_long[arg->tid] = arg->retry_count; arg->retry_count = 0; } if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) { arsta->ampdu[arg->tid] = arg->aggr_control; arg->aggr_control = 0; } if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) { arsta->rate_ctrl[arg->tid] = arg->rate_ctrl; arg->rate_ctrl = 0; arg->rcode_flags = 0; } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) { arsta->rtscts[arg->tid] = arg->rtscts_ctrl; arg->ext_tid_cfg_bitmap = 0; } } else { if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) { if ((arvif->retry_long[arg->tid] || arvif->rate_code[arg->tid] || arvif->ampdu[arg->tid] == WMI_TID_CONFIG_AGGR_CONTROL_ENABLE) && arg->ack_policy == WMI_PEER_TID_CONFIG_NOACK) { changed &= ~BIT(NL80211_TID_CONFIG_ATTR_NOACK); } else { arvif->noack[arg->tid] = arg->ack_policy; arvif->ampdu[arg->tid] = arg->aggr_control; arvif->rate_ctrl[arg->tid] = arg->rate_ctrl; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG)) { if (arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) changed &= ~BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG); else arvif->retry_long[arg->tid] = arg->retry_count; } if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) { if (arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) changed &= ~BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL); else arvif->ampdu[arg->tid] = arg->aggr_control; } if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) { if (arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) { changed &= ~(BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE)); } else { arvif->rate_ctrl[arg->tid] = arg->rate_ctrl; arvif->rate_code[arg->tid] = arg->rcode_flags; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) { arvif->rtscts[arg->tid] = arg->rtscts_ctrl; arg->ext_tid_cfg_bitmap = 0; } if (changed) arvif->tid_conf_changed[arg->tid] |= changed; } return 0; } static int ath10k_mac_parse_tid_config(struct ath10k *ar, struct ieee80211_sta *sta, struct ieee80211_vif *vif, struct cfg80211_tid_cfg *tid_conf, struct wmi_per_peer_per_tid_cfg_arg *arg) { u32 changed = tid_conf->mask; int ret = 0, i = 0; if (!changed) return -EINVAL; while (i < ATH10K_TID_MAX) { if (!(tid_conf->tids & BIT(i))) { i++; continue; } arg->tid = i; if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) { if (tid_conf->noack == NL80211_TID_CONFIG_ENABLE) { arg->ack_policy = WMI_PEER_TID_CONFIG_NOACK; arg->rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_DEFAULT_LOWEST_RATE; arg->aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_DISABLE; } else { arg->ack_policy = WMI_PEER_TID_CONFIG_ACK; arg->rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_AUTO; arg->aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_ENABLE; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG)) arg->retry_count = tid_conf->retry_long; if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) { if (tid_conf->noack == NL80211_TID_CONFIG_ENABLE) arg->aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_ENABLE; else arg->aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_DISABLE; } if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) { ret = ath10k_mac_tid_bitrate_config(ar, vif, sta, &arg->rcode_flags, &arg->rate_ctrl, tid_conf->txrate_type, &tid_conf->txrate_mask); if (ret) { ath10k_warn(ar, "failed to configure bitrate mask %d\n", ret); arg->rcode_flags = 0; arg->rate_ctrl = 0; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) { if (tid_conf->rtscts) arg->rtscts_ctrl = tid_conf->rtscts; arg->ext_tid_cfg_bitmap = WMI_EXT_TID_RTS_CTS_CONFIG; } ret = ath10k_mac_set_tid_config(ar, sta, vif, changed, arg); if (ret) return ret; i++; } return ret; } static int ath10k_mac_reset_tid_config(struct ath10k *ar, struct ieee80211_sta *sta, struct ath10k_vif *arvif, u8 tids) { struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct wmi_per_peer_per_tid_cfg_arg arg; int ret = 0, i = 0; arg.vdev_id = arvif->vdev_id; while (i < ATH10K_TID_MAX) { if (!(tids & BIT(i))) { i++; continue; } arg.tid = i; arg.ack_policy = WMI_PEER_TID_CONFIG_ACK; arg.retry_count = ATH10K_MAX_RETRY_COUNT; arg.rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_AUTO; arg.aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_ENABLE; arg.rtscts_ctrl = WMI_TID_CONFIG_RTSCTS_CONTROL_ENABLE; arg.ext_tid_cfg_bitmap = WMI_EXT_TID_RTS_CTS_CONFIG; ether_addr_copy(arg.peer_macaddr.addr, sta->addr); ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, &arg); if (ret) return ret; if (!arvif->tids_rst) { arsta->retry_long[i] = -1; arsta->noack[i] = -1; arsta->ampdu[i] = -1; arsta->rate_code[i] = -1; arsta->rate_ctrl[i] = 0; arsta->rtscts[i] = -1; } else { arvif->retry_long[i] = 0; arvif->noack[i] = 0; arvif->ampdu[i] = 0; arvif->rate_code[i] = 0; arvif->rate_ctrl[i] = 0; arvif->rtscts[i] = 0; } i++; } return ret; } static void ath10k_sta_tid_cfg_wk(struct work_struct *wk) { struct wmi_per_peer_per_tid_cfg_arg arg = {}; struct ieee80211_sta *sta; struct ath10k_sta *arsta; struct ath10k_vif *arvif; struct ath10k *ar; bool config_apply; int ret, i; u32 changed; u8 nss; arsta = container_of(wk, struct ath10k_sta, tid_config_wk); sta = container_of((void *)arsta, struct ieee80211_sta, drv_priv); arvif = arsta->arvif; ar = arvif->ar; mutex_lock(&ar->conf_mutex); if (arvif->tids_rst) { ret = ath10k_mac_reset_tid_config(ar, sta, arvif, arvif->tids_rst); goto exit; } ether_addr_copy(arg.peer_macaddr.addr, sta->addr); for (i = 0; i < ATH10K_TID_MAX; i++) { config_apply = false; changed = arvif->tid_conf_changed[i]; if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) { if (arsta->noack[i] != -1) { arg.ack_policy = 0; } else { config_apply = true; arg.ack_policy = arvif->noack[i]; arg.aggr_control = arvif->ampdu[i]; arg.rate_ctrl = arvif->rate_ctrl[i]; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG)) { if (arsta->retry_long[i] != -1 || arsta->noack[i] == WMI_PEER_TID_CONFIG_NOACK || arvif->noack[i] == WMI_PEER_TID_CONFIG_NOACK) { arg.retry_count = 0; } else { arg.retry_count = arvif->retry_long[i]; config_apply = true; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) { if (arsta->ampdu[i] != -1 || arsta->noack[i] == WMI_PEER_TID_CONFIG_NOACK || arvif->noack[i] == WMI_PEER_TID_CONFIG_NOACK) { arg.aggr_control = 0; } else { arg.aggr_control = arvif->ampdu[i]; config_apply = true; } } if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) { nss = ATH10K_HW_NSS(arvif->rate_code[i]); ret = ath10k_mac_validate_rate_mask(ar, sta, arvif->rate_code[i], nss); if (ret && arvif->rate_ctrl[i] > WMI_TID_CONFIG_RATE_CONTROL_AUTO) { arg.rate_ctrl = 0; arg.rcode_flags = 0; } if (arsta->rate_ctrl[i] > WMI_TID_CONFIG_RATE_CONTROL_AUTO || arsta->noack[i] == WMI_PEER_TID_CONFIG_NOACK || arvif->noack[i] == WMI_PEER_TID_CONFIG_NOACK) { arg.rate_ctrl = 0; arg.rcode_flags = 0; } else { arg.rate_ctrl = arvif->rate_ctrl[i]; arg.rcode_flags = arvif->rate_code[i]; config_apply = true; } } if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) { if (arsta->rtscts[i]) { arg.rtscts_ctrl = 0; arg.ext_tid_cfg_bitmap = 0; } else { arg.rtscts_ctrl = arvif->rtscts[i] - 1; arg.ext_tid_cfg_bitmap = WMI_EXT_TID_RTS_CTS_CONFIG; config_apply = true; } } arg.tid = i; if (config_apply) { ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, &arg); if (ret) ath10k_warn(ar, "failed to set per tid config for sta %pM: %d\n", sta->addr, ret); } arg.ack_policy = 0; arg.retry_count = 0; arg.aggr_control = 0; arg.rate_ctrl = 0; arg.rcode_flags = 0; } exit: mutex_unlock(&ar->conf_mutex); } static void ath10k_mac_vif_stations_tid_conf(void *data, struct ieee80211_sta *sta) { struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k_mac_iter_tid_conf_data *iter_data = data; struct ieee80211_vif *sta_vif = arsta->arvif->vif; if (sta_vif != iter_data->curr_vif || !sta->wme) return; ieee80211_queue_work(iter_data->ar->hw, &arsta->tid_config_wk); } static int ath10k_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k_peer *peer; int ret = 0; int i; if (old_state == IEEE80211_STA_NOTEXIST && new_state == IEEE80211_STA_NONE) { memset(arsta, 0, sizeof(*arsta)); arsta->arvif = arvif; arsta->peer_ps_state = WMI_PEER_PS_STATE_DISABLED; INIT_WORK(&arsta->update_wk, ath10k_sta_rc_update_wk); INIT_WORK(&arsta->tid_config_wk, ath10k_sta_tid_cfg_wk); for (i = 0; i < ARRAY_SIZE(sta->txq); i++) ath10k_mac_txq_init(sta->txq[i]); } /* cancel must be done outside the mutex to avoid deadlock */ if ((old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_NOTEXIST)) { cancel_work_sync(&arsta->update_wk); cancel_work_sync(&arsta->tid_config_wk); } mutex_lock(&ar->conf_mutex); if (old_state == IEEE80211_STA_NOTEXIST && new_state == IEEE80211_STA_NONE) { /* * New station addition. */ enum wmi_peer_type peer_type = WMI_PEER_TYPE_DEFAULT; u32 num_tdls_stations; ath10k_dbg(ar, ATH10K_DBG_STA, "mac vdev %d peer create %pM (new sta) sta %d / %d peer %d / %d\n", arvif->vdev_id, sta->addr, ar->num_stations + 1, ar->max_num_stations, ar->num_peers + 1, ar->max_num_peers); num_tdls_stations = ath10k_mac_tdls_vif_stations_count(hw, vif); if (sta->tdls) { if (num_tdls_stations >= ar->max_num_tdls_vdevs) { ath10k_warn(ar, "vdev %i exceeded maximum number of tdls vdevs %i\n", arvif->vdev_id, ar->max_num_tdls_vdevs); ret = -ELNRNG; goto exit; } peer_type = WMI_PEER_TYPE_TDLS; } ret = ath10k_mac_inc_num_stations(arvif, sta); if (ret) { ath10k_warn(ar, "refusing to associate station: too many connected already (%d)\n", ar->max_num_stations); goto exit; } if (ath10k_debug_is_extd_tx_stats_enabled(ar)) { arsta->tx_stats = kzalloc(sizeof(*arsta->tx_stats), GFP_KERNEL); if (!arsta->tx_stats) { ath10k_mac_dec_num_stations(arvif, sta); ret = -ENOMEM; goto exit; } } ret = ath10k_peer_create(ar, vif, sta, arvif->vdev_id, sta->addr, peer_type); if (ret) { ath10k_warn(ar, "failed to add peer %pM for vdev %d when adding a new sta: %i\n", sta->addr, arvif->vdev_id, ret); ath10k_mac_dec_num_stations(arvif, sta); kfree(arsta->tx_stats); goto exit; } spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arvif->vdev_id, sta->addr); if (!peer) { ath10k_warn(ar, "failed to lookup peer %pM on vdev %i\n", vif->addr, arvif->vdev_id); spin_unlock_bh(&ar->data_lock); ath10k_peer_delete(ar, arvif->vdev_id, sta->addr); ath10k_mac_dec_num_stations(arvif, sta); kfree(arsta->tx_stats); ret = -ENOENT; goto exit; } arsta->peer_id = find_first_bit(peer->peer_ids, ATH10K_MAX_NUM_PEER_IDS); spin_unlock_bh(&ar->data_lock); if (!sta->tdls) goto exit; ret = ath10k_wmi_update_fw_tdls_state(ar, arvif->vdev_id, WMI_TDLS_ENABLE_ACTIVE); if (ret) { ath10k_warn(ar, "failed to update fw tdls state on vdev %i: %i\n", arvif->vdev_id, ret); ath10k_peer_delete(ar, arvif->vdev_id, sta->addr); ath10k_mac_dec_num_stations(arvif, sta); kfree(arsta->tx_stats); goto exit; } ret = ath10k_mac_tdls_peer_update(ar, arvif->vdev_id, sta, WMI_TDLS_PEER_STATE_PEERING); if (ret) { ath10k_warn(ar, "failed to update tdls peer %pM for vdev %d when adding a new sta: %i\n", sta->addr, arvif->vdev_id, ret); ath10k_peer_delete(ar, arvif->vdev_id, sta->addr); ath10k_mac_dec_num_stations(arvif, sta); kfree(arsta->tx_stats); if (num_tdls_stations != 0) goto exit; ath10k_wmi_update_fw_tdls_state(ar, arvif->vdev_id, WMI_TDLS_DISABLE); } } else if ((old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_NOTEXIST)) { /* * Existing station deletion. */ ath10k_dbg(ar, ATH10K_DBG_STA, "mac vdev %d peer delete %pM sta %p (sta gone)\n", arvif->vdev_id, sta->addr, sta); if (sta->tdls) { ret = ath10k_mac_tdls_peer_update(ar, arvif->vdev_id, sta, WMI_TDLS_PEER_STATE_TEARDOWN); if (ret) ath10k_warn(ar, "failed to update tdls peer state for %pM state %d: %i\n", sta->addr, WMI_TDLS_PEER_STATE_TEARDOWN, ret); } ret = ath10k_peer_delete(ar, arvif->vdev_id, sta->addr); if (ret) ath10k_warn(ar, "failed to delete peer %pM for vdev %d: %i\n", sta->addr, arvif->vdev_id, ret); ath10k_mac_dec_num_stations(arvif, sta); spin_lock_bh(&ar->data_lock); for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { peer = ar->peer_map[i]; if (!peer) continue; if (peer->sta == sta) { ath10k_warn(ar, "found sta peer %pM (ptr %p id %d) entry on vdev %i after it was supposedly removed\n", sta->addr, peer, i, arvif->vdev_id); peer->sta = NULL; /* Clean up the peer object as well since we * must have failed to do this above. */ ath10k_peer_map_cleanup(ar, peer); } } spin_unlock_bh(&ar->data_lock); if (ath10k_debug_is_extd_tx_stats_enabled(ar)) { kfree(arsta->tx_stats); arsta->tx_stats = NULL; } for (i = 0; i < ARRAY_SIZE(sta->txq); i++) ath10k_mac_txq_unref(ar, sta->txq[i]); if (!sta->tdls) goto exit; if (ath10k_mac_tdls_vif_stations_count(hw, vif)) goto exit; /* This was the last tdls peer in current vif */ ret = ath10k_wmi_update_fw_tdls_state(ar, arvif->vdev_id, WMI_TDLS_DISABLE); if (ret) { ath10k_warn(ar, "failed to update fw tdls state on vdev %i: %i\n", arvif->vdev_id, ret); } } else if (old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC && (vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_MESH_POINT || vif->type == NL80211_IFTYPE_ADHOC)) { /* * New association. */ ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta %pM associated\n", sta->addr); ret = ath10k_station_assoc(ar, vif, sta, false); if (ret) ath10k_warn(ar, "failed to associate station %pM for vdev %i: %i\n", sta->addr, arvif->vdev_id, ret); } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED && sta->tdls) { /* * Tdls station authorized. */ ath10k_dbg(ar, ATH10K_DBG_STA, "mac tdls sta %pM authorized\n", sta->addr); ret = ath10k_station_assoc(ar, vif, sta, false); if (ret) { ath10k_warn(ar, "failed to associate tdls station %pM for vdev %i: %i\n", sta->addr, arvif->vdev_id, ret); goto exit; } ret = ath10k_mac_tdls_peer_update(ar, arvif->vdev_id, sta, WMI_TDLS_PEER_STATE_CONNECTED); if (ret) ath10k_warn(ar, "failed to update tdls peer %pM for vdev %i: %i\n", sta->addr, arvif->vdev_id, ret); } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTH && (vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_MESH_POINT || vif->type == NL80211_IFTYPE_ADHOC)) { /* * Disassociation. */ ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta %pM disassociated\n", sta->addr); ret = ath10k_station_disassoc(ar, vif, sta); if (ret) ath10k_warn(ar, "failed to disassociate station: %pM vdev %i: %i\n", sta->addr, arvif->vdev_id, ret); } exit: mutex_unlock(&ar->conf_mutex); return ret; } static int ath10k_conf_tx_uapsd(struct ath10k *ar, struct ieee80211_vif *vif, u16 ac, bool enable) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct wmi_sta_uapsd_auto_trig_arg arg = {}; u32 prio = 0, acc = 0; u32 value = 0; int ret = 0; lockdep_assert_held(&ar->conf_mutex); if (arvif->vdev_type != WMI_VDEV_TYPE_STA) return 0; switch (ac) { case IEEE80211_AC_VO: value = WMI_STA_PS_UAPSD_AC3_DELIVERY_EN | WMI_STA_PS_UAPSD_AC3_TRIGGER_EN; prio = 7; acc = 3; break; case IEEE80211_AC_VI: value = WMI_STA_PS_UAPSD_AC2_DELIVERY_EN | WMI_STA_PS_UAPSD_AC2_TRIGGER_EN; prio = 5; acc = 2; break; case IEEE80211_AC_BE: value = WMI_STA_PS_UAPSD_AC1_DELIVERY_EN | WMI_STA_PS_UAPSD_AC1_TRIGGER_EN; prio = 2; acc = 1; break; case IEEE80211_AC_BK: value = WMI_STA_PS_UAPSD_AC0_DELIVERY_EN | WMI_STA_PS_UAPSD_AC0_TRIGGER_EN; prio = 0; acc = 0; break; } if (enable) arvif->u.sta.uapsd |= value; else arvif->u.sta.uapsd &= ~value; ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, WMI_STA_PS_PARAM_UAPSD, arvif->u.sta.uapsd); if (ret) { ath10k_warn(ar, "failed to set uapsd params: %d\n", ret); goto exit; } if (arvif->u.sta.uapsd) value = WMI_STA_PS_RX_WAKE_POLICY_POLL_UAPSD; else value = WMI_STA_PS_RX_WAKE_POLICY_WAKE; ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, WMI_STA_PS_PARAM_RX_WAKE_POLICY, value); if (ret) ath10k_warn(ar, "failed to set rx wake param: %d\n", ret); ret = ath10k_mac_vif_recalc_ps_wake_threshold(arvif); if (ret) { ath10k_warn(ar, "failed to recalc ps wake threshold on vdev %i: %d\n", arvif->vdev_id, ret); return ret; } ret = ath10k_mac_vif_recalc_ps_poll_count(arvif); if (ret) { ath10k_warn(ar, "failed to recalc ps poll count on vdev %i: %d\n", arvif->vdev_id, ret); return ret; } if (test_bit(WMI_SERVICE_STA_UAPSD_BASIC_AUTO_TRIG, ar->wmi.svc_map) || test_bit(WMI_SERVICE_STA_UAPSD_VAR_AUTO_TRIG, ar->wmi.svc_map)) { /* Only userspace can make an educated decision when to send * trigger frame. The following effectively disables u-UAPSD * autotrigger in firmware (which is enabled by default * provided the autotrigger service is available). */ arg.wmm_ac = acc; arg.user_priority = prio; arg.service_interval = 0; arg.suspend_interval = WMI_STA_UAPSD_MAX_INTERVAL_MSEC; arg.delay_interval = WMI_STA_UAPSD_MAX_INTERVAL_MSEC; ret = ath10k_wmi_vdev_sta_uapsd(ar, arvif->vdev_id, arvif->bssid, &arg, 1); if (ret) { ath10k_warn(ar, "failed to set uapsd auto trigger %d\n", ret); return ret; } } exit: return ret; } static int ath10k_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, unsigned int link_id, u16 ac, const struct ieee80211_tx_queue_params *params) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct wmi_wmm_params_arg *p = NULL; int ret; mutex_lock(&ar->conf_mutex); switch (ac) { case IEEE80211_AC_VO: p = &arvif->wmm_params.ac_vo; break; case IEEE80211_AC_VI: p = &arvif->wmm_params.ac_vi; break; case IEEE80211_AC_BE: p = &arvif->wmm_params.ac_be; break; case IEEE80211_AC_BK: p = &arvif->wmm_params.ac_bk; break; } if (WARN_ON(!p)) { ret = -EINVAL; goto exit; } p->cwmin = params->cw_min; p->cwmax = params->cw_max; p->aifs = params->aifs; /* * The channel time duration programmed in the HW is in absolute * microseconds, while mac80211 gives the txop in units of * 32 microseconds. */ p->txop = params->txop * 32; if (ar->wmi.ops->gen_vdev_wmm_conf) { ret = ath10k_wmi_vdev_wmm_conf(ar, arvif->vdev_id, &arvif->wmm_params); if (ret) { ath10k_warn(ar, "failed to set vdev wmm params on vdev %i: %d\n", arvif->vdev_id, ret); goto exit; } } else { /* This won't work well with multi-interface cases but it's * better than nothing. */ ret = ath10k_wmi_pdev_set_wmm_params(ar, &arvif->wmm_params); if (ret) { ath10k_warn(ar, "failed to set wmm params: %d\n", ret); goto exit; } } ret = ath10k_conf_tx_uapsd(ar, vif, ac, params->uapsd); if (ret) ath10k_warn(ar, "failed to set sta uapsd: %d\n", ret); exit: mutex_unlock(&ar->conf_mutex); return ret; } static int ath10k_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, int duration, enum ieee80211_roc_type type) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct wmi_start_scan_arg *arg = NULL; int ret = 0; u32 scan_time_msec; mutex_lock(&ar->conf_mutex); if (ath10k_mac_tdls_vif_stations_count(hw, vif) > 0) { ret = -EBUSY; goto exit; } spin_lock_bh(&ar->data_lock); switch (ar->scan.state) { case ATH10K_SCAN_IDLE: reinit_completion(&ar->scan.started); reinit_completion(&ar->scan.completed); reinit_completion(&ar->scan.on_channel); ar->scan.state = ATH10K_SCAN_STARTING; ar->scan.is_roc = true; ar->scan.vdev_id = arvif->vdev_id; ar->scan.roc_freq = chan->center_freq; ar->scan.roc_notify = true; ret = 0; break; case ATH10K_SCAN_STARTING: case ATH10K_SCAN_RUNNING: case ATH10K_SCAN_ABORTING: ret = -EBUSY; break; } spin_unlock_bh(&ar->data_lock); if (ret) goto exit; scan_time_msec = ar->hw->wiphy->max_remain_on_channel_duration * 2; arg = kzalloc(sizeof(*arg), GFP_KERNEL); if (!arg) { ret = -ENOMEM; goto exit; } ath10k_wmi_start_scan_init(ar, arg); arg->vdev_id = arvif->vdev_id; arg->scan_id = ATH10K_SCAN_ID; arg->n_channels = 1; arg->channels[0] = chan->center_freq; arg->dwell_time_active = scan_time_msec; arg->dwell_time_passive = scan_time_msec; arg->max_scan_time = scan_time_msec; arg->scan_ctrl_flags |= WMI_SCAN_FLAG_PASSIVE; arg->scan_ctrl_flags |= WMI_SCAN_FILTER_PROBE_REQ; arg->burst_duration_ms = duration; ret = ath10k_start_scan(ar, arg); if (ret) { ath10k_warn(ar, "failed to start roc scan: %d\n", ret); spin_lock_bh(&ar->data_lock); ar->scan.state = ATH10K_SCAN_IDLE; spin_unlock_bh(&ar->data_lock); goto exit; } ret = wait_for_completion_timeout(&ar->scan.on_channel, 3 * HZ); if (ret == 0) { ath10k_warn(ar, "failed to switch to channel for roc scan\n"); ret = ath10k_scan_stop(ar); if (ret) ath10k_warn(ar, "failed to stop scan: %d\n", ret); ret = -ETIMEDOUT; goto exit; } ieee80211_queue_delayed_work(ar->hw, &ar->scan.timeout, msecs_to_jiffies(duration)); ret = 0; exit: kfree(arg); mutex_unlock(&ar->conf_mutex); return ret; } static int ath10k_cancel_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath10k *ar = hw->priv; mutex_lock(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); ar->scan.roc_notify = false; spin_unlock_bh(&ar->data_lock); ath10k_scan_abort(ar); mutex_unlock(&ar->conf_mutex); cancel_delayed_work_sync(&ar->scan.timeout); return 0; } /* * Both RTS and Fragmentation threshold are interface-specific * in ath10k, but device-specific in mac80211. */ static int ath10k_set_rts_threshold(struct ieee80211_hw *hw, int radio_idx, u32 value) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif; int ret = 0; mutex_lock(&ar->conf_mutex); list_for_each_entry(arvif, &ar->arvifs, list) { ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d rts threshold %d\n", arvif->vdev_id, value); ret = ath10k_mac_set_rts(arvif, value); if (ret) { ath10k_warn(ar, "failed to set rts threshold for vdev %d: %d\n", arvif->vdev_id, ret); break; } } mutex_unlock(&ar->conf_mutex); return ret; } static int ath10k_mac_op_set_frag_threshold(struct ieee80211_hw *hw, int radio_idx, u32 value) { /* Even though there's a WMI enum for fragmentation threshold no known * firmware actually implements it. Moreover it is not possible to rely * frame fragmentation to mac80211 because firmware clears the "more * fragments" bit in frame control making it impossible for remote * devices to reassemble frames. * * Hence implement a dummy callback just to say fragmentation isn't * supported. This effectively prevents mac80211 from doing frame * fragmentation in software. */ return -EOPNOTSUPP; } void ath10k_mac_wait_tx_complete(struct ath10k *ar) { bool skip; long time_left; /* mac80211 doesn't care if we really xmit queued frames or not * we'll collect those frames either way if we stop/delete vdevs */ if (ar->state == ATH10K_STATE_WEDGED) return; time_left = wait_event_timeout(ar->htt.empty_tx_wq, ({ bool empty; spin_lock_bh(&ar->htt.tx_lock); empty = (ar->htt.num_pending_tx == 0); spin_unlock_bh(&ar->htt.tx_lock); skip = (ar->state == ATH10K_STATE_WEDGED) || test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags); (empty || skip); }), ATH10K_FLUSH_TIMEOUT_HZ); if (time_left == 0 || skip) ath10k_warn(ar, "failed to flush transmit queue (skip %i ar-state %i): %ld\n", skip, ar->state, time_left); } static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif; u32 bitmap; if (drop) { if (vif && vif->type == NL80211_IFTYPE_STATION) { bitmap = ~(1 << WMI_MGMT_TID); list_for_each_entry(arvif, &ar->arvifs, list) { if (arvif->vdev_type == WMI_VDEV_TYPE_STA) ath10k_wmi_peer_flush(ar, arvif->vdev_id, arvif->bssid, bitmap); } ath10k_htt_flush_tx(&ar->htt); } return; } mutex_lock(&ar->conf_mutex); ath10k_mac_wait_tx_complete(ar); mutex_unlock(&ar->conf_mutex); } /* TODO: Implement this function properly * For now it is needed to reply to Probe Requests in IBSS mode. * Probably we need this information from FW. */ static int ath10k_tx_last_beacon(struct ieee80211_hw *hw) { return 1; } static void ath10k_reconfig_complete(struct ieee80211_hw *hw, enum ieee80211_reconfig_type reconfig_type) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif; if (reconfig_type != IEEE80211_RECONFIG_TYPE_RESTART) return; mutex_lock(&ar->conf_mutex); /* If device failed to restart it will be in a different state, e.g. * ATH10K_STATE_WEDGED */ if (ar->state == ATH10K_STATE_RESTARTED) { ath10k_info(ar, "device successfully recovered\n"); ar->state = ATH10K_STATE_ON; ieee80211_wake_queues(ar->hw); /* Clear recovery state. */ complete(&ar->driver_recovery); atomic_set(&ar->fail_cont_count, 0); atomic_set(&ar->pending_recovery, 0); if (ar->hw_params.hw_restart_disconnect) { list_for_each_entry(arvif, &ar->arvifs, list) { if (arvif->is_up && arvif->vdev_type == WMI_VDEV_TYPE_STA) ieee80211_hw_restart_disconnect(arvif->vif); } } } mutex_unlock(&ar->conf_mutex); } static void ath10k_mac_update_bss_chan_survey(struct ath10k *ar, struct ieee80211_channel *channel) { int ret; enum wmi_bss_survey_req_type type = WMI_BSS_SURVEY_REQ_TYPE_READ; lockdep_assert_held(&ar->conf_mutex); if (!test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map) || (ar->rx_channel != channel)) return; if (ar->scan.state != ATH10K_SCAN_IDLE) { ath10k_dbg(ar, ATH10K_DBG_MAC, "ignoring bss chan info request while scanning..\n"); return; } reinit_completion(&ar->bss_survey_done); ret = ath10k_wmi_pdev_bss_chan_info_request(ar, type); if (ret) { ath10k_warn(ar, "failed to send pdev bss chan info request\n"); return; } ret = wait_for_completion_timeout(&ar->bss_survey_done, 3 * HZ); if (!ret) { ath10k_warn(ar, "bss channel survey timed out\n"); return; } } static int ath10k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey) { struct ath10k *ar = hw->priv; struct ieee80211_supported_band *sband; struct survey_info *ar_survey = &ar->survey[idx]; int ret = 0; mutex_lock(&ar->conf_mutex); sband = hw->wiphy->bands[NL80211_BAND_2GHZ]; if (sband && idx >= sband->n_channels) { idx -= sband->n_channels; sband = NULL; } if (!sband) sband = hw->wiphy->bands[NL80211_BAND_5GHZ]; if (!sband || idx >= sband->n_channels) { ret = -ENOENT; goto exit; } ath10k_mac_update_bss_chan_survey(ar, &sband->channels[idx]); spin_lock_bh(&ar->data_lock); memcpy(survey, ar_survey, sizeof(*survey)); spin_unlock_bh(&ar->data_lock); survey->channel = &sband->channels[idx]; if (ar->rx_channel == survey->channel) survey->filled |= SURVEY_INFO_IN_USE; exit: mutex_unlock(&ar->conf_mutex); return ret; } static bool ath10k_mac_bitrate_mask_get_single_nss(struct ath10k *ar, enum nl80211_band band, const struct cfg80211_bitrate_mask *mask, int *nss) { struct ieee80211_supported_band *sband = &ar->mac.sbands[band]; u16 vht_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); u8 ht_nss_mask = 0; u8 vht_nss_mask = 0; int i; if (mask->control[band].legacy) return false; for (i = 0; i < ARRAY_SIZE(mask->control[band].ht_mcs); i++) { if (mask->control[band].ht_mcs[i] == 0) continue; else if (mask->control[band].ht_mcs[i] == sband->ht_cap.mcs.rx_mask[i]) ht_nss_mask |= BIT(i); else return false; } for (i = 0; i < ARRAY_SIZE(mask->control[band].vht_mcs); i++) { if (mask->control[band].vht_mcs[i] == 0) continue; else if (mask->control[band].vht_mcs[i] == ath10k_mac_get_max_vht_mcs_map(vht_mcs_map, i)) vht_nss_mask |= BIT(i); else return false; } if (ht_nss_mask != vht_nss_mask) return false; if (ht_nss_mask == 0) return false; if (BIT(fls(ht_nss_mask)) - 1 != ht_nss_mask) return false; *nss = fls(ht_nss_mask); return true; } static int ath10k_mac_set_fixed_rate_params(struct ath10k_vif *arvif, u8 rate, u8 nss, u8 sgi, u8 ldpc) { struct ath10k *ar = arvif->ar; u32 vdev_param; int ret; lockdep_assert_held(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac set fixed rate params vdev %i rate 0x%02x nss %u sgi %u\n", arvif->vdev_id, rate, nss, sgi); vdev_param = ar->wmi.vdev_param->fixed_rate; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, rate); if (ret) { ath10k_warn(ar, "failed to set fixed rate param 0x%02x: %d\n", rate, ret); return ret; } vdev_param = ar->wmi.vdev_param->nss; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, nss); if (ret) { ath10k_warn(ar, "failed to set nss param %d: %d\n", nss, ret); return ret; } vdev_param = ar->wmi.vdev_param->sgi; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, sgi); if (ret) { ath10k_warn(ar, "failed to set sgi param %d: %d\n", sgi, ret); return ret; } vdev_param = ar->wmi.vdev_param->ldpc; ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, ldpc); if (ret) { ath10k_warn(ar, "failed to set ldpc param %d: %d\n", ldpc, ret); return ret; } return 0; } static bool ath10k_mac_can_set_bitrate_mask(struct ath10k *ar, enum nl80211_band band, const struct cfg80211_bitrate_mask *mask, bool allow_pfr) { int i; u16 vht_mcs; /* Due to firmware limitation in WMI_PEER_ASSOC_CMDID it is impossible * to express all VHT MCS rate masks. Effectively only the following * ranges can be used: none, 0-7, 0-8 and 0-9. */ for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { vht_mcs = mask->control[band].vht_mcs[i]; switch (vht_mcs) { case 0: case BIT(8) - 1: case BIT(9) - 1: case BIT(10) - 1: break; default: if (!allow_pfr) ath10k_warn(ar, "refusing bitrate mask with missing 0-7 VHT MCS rates\n"); return false; } } return true; } static bool ath10k_mac_set_vht_bitrate_mask_fixup(struct ath10k *ar, struct ath10k_vif *arvif, struct ieee80211_sta *sta) { int err; u8 rate = arvif->vht_pfr; /* skip non vht and multiple rate peers */ if (!sta->deflink.vht_cap.vht_supported || arvif->vht_num_rates != 1) return false; err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, WMI_PEER_PARAM_FIXED_RATE, rate); if (err) ath10k_warn(ar, "failed to enable STA %pM peer fixed rate: %d\n", sta->addr, err); return true; } static void ath10k_mac_set_bitrate_mask_iter(void *data, struct ieee80211_sta *sta) { struct ath10k_vif *arvif = data; struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k *ar = arvif->ar; if (arsta->arvif != arvif) return; if (ath10k_mac_set_vht_bitrate_mask_fixup(ar, arvif, sta)) return; spin_lock_bh(&ar->data_lock); arsta->changed |= IEEE80211_RC_SUPP_RATES_CHANGED; spin_unlock_bh(&ar->data_lock); ieee80211_queue_work(ar->hw, &arsta->update_wk); } static void ath10k_mac_clr_bitrate_mask_iter(void *data, struct ieee80211_sta *sta) { struct ath10k_vif *arvif = data; struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k *ar = arvif->ar; int err; /* clear vht peers only */ if (arsta->arvif != arvif || !sta->deflink.vht_cap.vht_supported) return; err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, WMI_PEER_PARAM_FIXED_RATE, WMI_FIXED_RATE_NONE); if (err) ath10k_warn(ar, "failed to clear STA %pM peer fixed rate: %d\n", sta->addr, err); } static int ath10k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const struct cfg80211_bitrate_mask *mask) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct cfg80211_chan_def def; struct ath10k *ar = arvif->ar; enum nl80211_band band; const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; u8 rate; u8 nss; u8 sgi; u8 ldpc; int single_nss; int ret; int vht_num_rates, allow_pfr; u8 vht_pfr; bool update_bitrate_mask = true; if (ath10k_mac_vif_chan(vif, &def)) return -EPERM; band = def.chan->band; ht_mcs_mask = mask->control[band].ht_mcs; vht_mcs_mask = mask->control[band].vht_mcs; ldpc = !!(ar->ht_cap_info & WMI_HT_CAP_LDPC); sgi = mask->control[band].gi; if (sgi == NL80211_TXRATE_FORCE_LGI) return -EINVAL; allow_pfr = test_bit(ATH10K_FW_FEATURE_PEER_FIXED_RATE, ar->normal_mode_fw.fw_file.fw_features); if (allow_pfr) { mutex_lock(&ar->conf_mutex); ieee80211_iterate_stations_atomic(ar->hw, ath10k_mac_clr_bitrate_mask_iter, arvif); mutex_unlock(&ar->conf_mutex); } if (ath10k_mac_bitrate_mask_has_single_rate(ar, band, mask, &vht_num_rates)) { ret = ath10k_mac_bitrate_mask_get_single_rate(ar, band, mask, &rate, &nss, false); if (ret) { ath10k_warn(ar, "failed to get single rate for vdev %i: %d\n", arvif->vdev_id, ret); return ret; } } else if (ath10k_mac_bitrate_mask_get_single_nss(ar, band, mask, &single_nss)) { rate = WMI_FIXED_RATE_NONE; nss = single_nss; } else { rate = WMI_FIXED_RATE_NONE; nss = min(ar->num_rf_chains, max(ath10k_mac_max_ht_nss(ht_mcs_mask), ath10k_mac_max_vht_nss(vht_mcs_mask))); if (!ath10k_mac_can_set_bitrate_mask(ar, band, mask, allow_pfr)) { u8 vht_nss; if (!allow_pfr || vht_num_rates != 1) return -EINVAL; /* Reach here, firmware supports peer fixed rate and has * single vht rate, and don't update vif birate_mask, as * the rate only for specific peer. */ ath10k_mac_bitrate_mask_get_single_rate(ar, band, mask, &vht_pfr, &vht_nss, true); update_bitrate_mask = false; } else { vht_pfr = 0; } mutex_lock(&ar->conf_mutex); if (update_bitrate_mask) arvif->bitrate_mask = *mask; arvif->vht_num_rates = vht_num_rates; arvif->vht_pfr = vht_pfr; ieee80211_iterate_stations_atomic(ar->hw, ath10k_mac_set_bitrate_mask_iter, arvif); mutex_unlock(&ar->conf_mutex); } mutex_lock(&ar->conf_mutex); ret = ath10k_mac_set_fixed_rate_params(arvif, rate, nss, sgi, ldpc); if (ret) { ath10k_warn(ar, "failed to set fixed rate params on vdev %i: %d\n", arvif->vdev_id, ret); goto exit; } exit: mutex_unlock(&ar->conf_mutex); return ret; } static void ath10k_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_link_sta *link_sta, u32 changed) { struct ieee80211_sta *sta = link_sta->sta; struct ath10k *ar = hw->priv; struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_peer *peer; u32 bw, smps; spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arvif->vdev_id, sta->addr); if (!peer) { spin_unlock_bh(&ar->data_lock); ath10k_warn(ar, "mac sta rc update failed to find peer %pM on vdev %i\n", sta->addr, arvif->vdev_id); return; } ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n", sta->addr, changed, sta->deflink.bandwidth, sta->deflink.rx_nss, sta->deflink.smps_mode); if (changed & IEEE80211_RC_BW_CHANGED) { bw = WMI_PEER_CHWIDTH_20MHZ; switch (sta->deflink.bandwidth) { case IEEE80211_STA_RX_BW_20: bw = WMI_PEER_CHWIDTH_20MHZ; break; case IEEE80211_STA_RX_BW_40: bw = WMI_PEER_CHWIDTH_40MHZ; break; case IEEE80211_STA_RX_BW_80: bw = WMI_PEER_CHWIDTH_80MHZ; break; case IEEE80211_STA_RX_BW_160: bw = WMI_PEER_CHWIDTH_160MHZ; break; default: ath10k_warn(ar, "Invalid bandwidth %d in rc update for %pM\n", sta->deflink.bandwidth, sta->addr); bw = WMI_PEER_CHWIDTH_20MHZ; break; } arsta->bw = bw; } if (changed & IEEE80211_RC_NSS_CHANGED) arsta->nss = sta->deflink.rx_nss; if (changed & IEEE80211_RC_SMPS_CHANGED) { smps = WMI_PEER_SMPS_PS_NONE; switch (sta->deflink.smps_mode) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_OFF: smps = WMI_PEER_SMPS_PS_NONE; break; case IEEE80211_SMPS_STATIC: smps = WMI_PEER_SMPS_STATIC; break; case IEEE80211_SMPS_DYNAMIC: smps = WMI_PEER_SMPS_DYNAMIC; break; case IEEE80211_SMPS_NUM_MODES: ath10k_warn(ar, "Invalid smps %d in sta rc update for %pM\n", sta->deflink.smps_mode, sta->addr); smps = WMI_PEER_SMPS_PS_NONE; break; } arsta->smps = smps; } arsta->changed |= changed; spin_unlock_bh(&ar->data_lock); ieee80211_queue_work(hw, &arsta->update_wk); } static void ath10k_offset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif, s64 tsf_offset) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; u32 offset, vdev_param; int ret; if (tsf_offset < 0) { vdev_param = ar->wmi.vdev_param->dec_tsf; offset = -tsf_offset; } else { vdev_param = ar->wmi.vdev_param->inc_tsf; offset = tsf_offset; } ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, offset); if (ret && ret != -EOPNOTSUPP) ath10k_warn(ar, "failed to set tsf offset %d cmd %d: %d\n", offset, vdev_param, ret); } static int ath10k_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_ampdu_params *params) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ieee80211_sta *sta = params->sta; enum ieee80211_ampdu_mlme_action action = params->action; u16 tid = params->tid; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac ampdu vdev_id %i sta %pM tid %u action %d\n", arvif->vdev_id, sta->addr, tid, action); switch (action) { case IEEE80211_AMPDU_RX_START: case IEEE80211_AMPDU_RX_STOP: /* HTT AddBa/DelBa events trigger mac80211 Rx BA session * creation/removal. Do we need to verify this? */ return 0; case IEEE80211_AMPDU_TX_START: case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: case IEEE80211_AMPDU_TX_OPERATIONAL: /* Firmware offloads Tx aggregation entirely so deny mac80211 * Tx aggregation requests. */ return -EOPNOTSUPP; } return -EINVAL; } static void ath10k_mac_update_rx_channel(struct ath10k *ar, struct ieee80211_chanctx_conf *ctx, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs) { struct cfg80211_chan_def *def = NULL; /* Both locks are required because ar->rx_channel is modified. This * allows readers to hold either lock. */ lockdep_assert_held(&ar->conf_mutex); lockdep_assert_held(&ar->data_lock); WARN_ON(ctx && vifs); WARN_ON(vifs && !n_vifs); /* FIXME: Sort of an optimization and a workaround. Peers and vifs are * on a linked list now. Doing a lookup peer -> vif -> chanctx for each * ppdu on Rx may reduce performance on low-end systems. It should be * possible to make tables/hashmaps to speed the lookup up (be vary of * cpu data cache lines though regarding sizes) but to keep the initial * implementation simple and less intrusive fallback to the slow lookup * only for multi-channel cases. Single-channel cases will remain to * use the old channel derival and thus performance should not be * affected much. */ rcu_read_lock(); if (!ctx && ath10k_mac_num_chanctxs(ar) == 1) { ieee80211_iter_chan_contexts_atomic(ar->hw, ath10k_mac_get_any_chandef_iter, &def); if (vifs) def = &vifs[0].new_ctx->def; ar->rx_channel = def->chan; } else if ((ctx && ath10k_mac_num_chanctxs(ar) == 0) || (ctx && (ar->state == ATH10K_STATE_RESTARTED))) { /* During driver restart due to firmware assert, since mac80211 * already has valid channel context for given radio, channel * context iteration return num_chanctx > 0. So fix rx_channel * when restart is in progress. */ ar->rx_channel = ctx->def.chan; } else { ar->rx_channel = NULL; } rcu_read_unlock(); } static void ath10k_mac_update_vif_chan(struct ath10k *ar, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs) { struct ath10k_vif *arvif; int ret; int i; lockdep_assert_held(&ar->conf_mutex); /* First stop monitor interface. Some FW versions crash if there's a * lone monitor interface. */ if (ar->monitor_started) ath10k_monitor_stop(ar); for (i = 0; i < n_vifs; i++) { arvif = (void *)vifs[i].vif->drv_priv; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac chanctx switch vdev_id %i freq %u->%u width %d->%d\n", arvif->vdev_id, vifs[i].old_ctx->def.chan->center_freq, vifs[i].new_ctx->def.chan->center_freq, vifs[i].old_ctx->def.width, vifs[i].new_ctx->def.width); if (WARN_ON(!arvif->is_started)) continue; if (WARN_ON(!arvif->is_up)) continue; ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) { ath10k_warn(ar, "failed to down vdev %d: %d\n", arvif->vdev_id, ret); continue; } } /* All relevant vdevs are downed and associated channel resources * should be available for the channel switch now. */ spin_lock_bh(&ar->data_lock); ath10k_mac_update_rx_channel(ar, NULL, vifs, n_vifs); spin_unlock_bh(&ar->data_lock); for (i = 0; i < n_vifs; i++) { arvif = (void *)vifs[i].vif->drv_priv; if (WARN_ON(!arvif->is_started)) continue; if (WARN_ON(!arvif->is_up)) continue; ret = ath10k_mac_setup_bcn_tmpl(arvif); if (ret) ath10k_warn(ar, "failed to update bcn tmpl during csa: %d\n", ret); ret = ath10k_mac_setup_prb_tmpl(arvif); if (ret) ath10k_warn(ar, "failed to update prb tmpl during csa: %d\n", ret); ret = ath10k_vdev_restart(arvif, &vifs[i].new_ctx->def); if (ret) { ath10k_warn(ar, "failed to restart vdev %d: %d\n", arvif->vdev_id, ret); continue; } ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid, arvif->bssid); if (ret) { ath10k_warn(ar, "failed to bring vdev up %d: %d\n", arvif->vdev_id, ret); continue; } } ath10k_monitor_recalc(ar); } static int ath10k_mac_op_add_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac chanctx add freq %u width %d ptr %p\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); ath10k_mac_update_rx_channel(ar, ctx, NULL, 0); spin_unlock_bh(&ar->data_lock); ath10k_recalc_radar_detection(ar); ath10k_monitor_recalc(ar); mutex_unlock(&ar->conf_mutex); return 0; } static void ath10k_mac_op_remove_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac chanctx remove freq %u width %d ptr %p\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); ath10k_mac_update_rx_channel(ar, NULL, NULL, 0); spin_unlock_bh(&ar->data_lock); ath10k_recalc_radar_detection(ar); ath10k_monitor_recalc(ar); mutex_unlock(&ar->conf_mutex); } struct ath10k_mac_change_chanctx_arg { struct ieee80211_chanctx_conf *ctx; struct ieee80211_vif_chanctx_switch *vifs; int n_vifs; int next_vif; }; static void ath10k_mac_change_chanctx_cnt_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { struct ath10k_mac_change_chanctx_arg *arg = data; if (rcu_access_pointer(vif->bss_conf.chanctx_conf) != arg->ctx) return; arg->n_vifs++; } static void ath10k_mac_change_chanctx_fill_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { struct ath10k_mac_change_chanctx_arg *arg = data; struct ieee80211_chanctx_conf *ctx; ctx = rcu_access_pointer(vif->bss_conf.chanctx_conf); if (ctx != arg->ctx) return; if (WARN_ON(arg->next_vif == arg->n_vifs)) return; arg->vifs[arg->next_vif].vif = vif; arg->vifs[arg->next_vif].old_ctx = ctx; arg->vifs[arg->next_vif].new_ctx = ctx; arg->next_vif++; } static void ath10k_mac_op_change_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx, u32 changed) { struct ath10k *ar = hw->priv; struct ath10k_mac_change_chanctx_arg arg = { .ctx = ctx }; mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac chanctx change freq %u width %d ptr %p changed %x\n", ctx->def.chan->center_freq, ctx->def.width, ctx, changed); /* This shouldn't really happen because channel switching should use * switch_vif_chanctx(). */ if (WARN_ON(changed & IEEE80211_CHANCTX_CHANGE_CHANNEL)) goto unlock; if (changed & IEEE80211_CHANCTX_CHANGE_WIDTH) { ieee80211_iterate_active_interfaces_atomic( hw, ATH10K_ITER_NORMAL_FLAGS, ath10k_mac_change_chanctx_cnt_iter, &arg); if (arg.n_vifs == 0) goto radar; arg.vifs = kcalloc(arg.n_vifs, sizeof(arg.vifs[0]), GFP_KERNEL); if (!arg.vifs) goto radar; ieee80211_iterate_active_interfaces_atomic( hw, ATH10K_ITER_NORMAL_FLAGS, ath10k_mac_change_chanctx_fill_iter, &arg); ath10k_mac_update_vif_chan(ar, arg.vifs, arg.n_vifs); kfree(arg.vifs); } radar: ath10k_recalc_radar_detection(ar); /* FIXME: How to configure Rx chains properly? */ /* No other actions are actually necessary. Firmware maintains channel * definitions per vdev internally and there's no host-side channel * context abstraction to configure, e.g. channel width. */ unlock: mutex_unlock(&ar->conf_mutex); } static int ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, struct ieee80211_chanctx_conf *ctx) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; int ret; mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac chanctx assign ptr %p vdev_id %i\n", ctx, arvif->vdev_id); if (WARN_ON(arvif->is_started)) { mutex_unlock(&ar->conf_mutex); return -EBUSY; } ret = ath10k_vdev_start(arvif, &ctx->def); if (ret) { ath10k_warn(ar, "failed to start vdev %i addr %pM on freq %d: %d\n", arvif->vdev_id, vif->addr, ctx->def.chan->center_freq, ret); goto err; } arvif->is_started = true; ret = ath10k_mac_vif_setup_ps(arvif); if (ret) { ath10k_warn(ar, "failed to update vdev %i ps: %d\n", arvif->vdev_id, ret); goto err_stop; } if (vif->type == NL80211_IFTYPE_MONITOR) { ret = ath10k_wmi_vdev_up(ar, arvif->vdev_id, 0, vif->addr); if (ret) { ath10k_warn(ar, "failed to up monitor vdev %i: %d\n", arvif->vdev_id, ret); goto err_stop; } arvif->is_up = true; } if (ath10k_mac_can_set_cts_prot(arvif)) { ret = ath10k_mac_set_cts_prot(arvif); if (ret) ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n", arvif->vdev_id, ret); } if (ath10k_peer_stats_enabled(ar) && ar->hw_params.tx_stats_over_pktlog) { ar->pktlog_filter |= ATH10K_PKTLOG_PEER_STATS; ret = ath10k_wmi_pdev_pktlog_enable(ar, ar->pktlog_filter); if (ret) { ath10k_warn(ar, "failed to enable pktlog %d\n", ret); goto err_stop; } } mutex_unlock(&ar->conf_mutex); return 0; err_stop: ath10k_vdev_stop(arvif); arvif->is_started = false; ath10k_mac_vif_setup_ps(arvif); err: mutex_unlock(&ar->conf_mutex); return ret; } static void ath10k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, struct ieee80211_chanctx_conf *ctx) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; int ret; mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac chanctx unassign ptr %p vdev_id %i\n", ctx, arvif->vdev_id); WARN_ON(!arvif->is_started); if (vif->type == NL80211_IFTYPE_MONITOR) { WARN_ON(!arvif->is_up); ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) ath10k_warn(ar, "failed to down monitor vdev %i: %d\n", arvif->vdev_id, ret); arvif->is_up = false; } ret = ath10k_vdev_stop(arvif); if (ret) ath10k_warn(ar, "failed to stop vdev %i: %d\n", arvif->vdev_id, ret); arvif->is_started = false; mutex_unlock(&ar->conf_mutex); } static int ath10k_mac_op_switch_vif_chanctx(struct ieee80211_hw *hw, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs, enum ieee80211_chanctx_switch_mode mode) { struct ath10k *ar = hw->priv; mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac chanctx switch n_vifs %d mode %d\n", n_vifs, mode); ath10k_mac_update_vif_chan(ar, vifs, n_vifs); mutex_unlock(&ar->conf_mutex); return 0; } static void ath10k_mac_op_sta_pre_rcu_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct ath10k *ar; struct ath10k_peer *peer; ar = hw->priv; list_for_each_entry(peer, &ar->peers, list) if (peer->sta == sta) peer->removed = true; } /* HT MCS parameters with Nss = 1 */ static const struct ath10k_index_ht_data_rate_type supported_ht_mcs_rate_nss1[] = { /* MCS L20 L40 S20 S40 */ {0, { 65, 135, 72, 150} }, {1, { 130, 270, 144, 300} }, {2, { 195, 405, 217, 450} }, {3, { 260, 540, 289, 600} }, {4, { 390, 810, 433, 900} }, {5, { 520, 1080, 578, 1200} }, {6, { 585, 1215, 650, 1350} }, {7, { 650, 1350, 722, 1500} } }; /* HT MCS parameters with Nss = 2 */ static const struct ath10k_index_ht_data_rate_type supported_ht_mcs_rate_nss2[] = { /* MCS L20 L40 S20 S40 */ {0, {130, 270, 144, 300} }, {1, {260, 540, 289, 600} }, {2, {390, 810, 433, 900} }, {3, {520, 1080, 578, 1200} }, {4, {780, 1620, 867, 1800} }, {5, {1040, 2160, 1156, 2400} }, {6, {1170, 2430, 1300, 2700} }, {7, {1300, 2700, 1444, 3000} } }; /* MCS parameters with Nss = 1 */ static const struct ath10k_index_vht_data_rate_type supported_vht_mcs_rate_nss1[] = { /* MCS L80 S80 L40 S40 L20 S20 */ {0, {293, 325}, {135, 150}, {65, 72} }, {1, {585, 650}, {270, 300}, {130, 144} }, {2, {878, 975}, {405, 450}, {195, 217} }, {3, {1170, 1300}, {540, 600}, {260, 289} }, {4, {1755, 1950}, {810, 900}, {390, 433} }, {5, {2340, 2600}, {1080, 1200}, {520, 578} }, {6, {2633, 2925}, {1215, 1350}, {585, 650} }, {7, {2925, 3250}, {1350, 1500}, {650, 722} }, {8, {3510, 3900}, {1620, 1800}, {780, 867} }, {9, {3900, 4333}, {1800, 2000}, {865, 960} } }; /*MCS parameters with Nss = 2 */ static const struct ath10k_index_vht_data_rate_type supported_vht_mcs_rate_nss2[] = { /* MCS L80 S80 L40 S40 L20 S20 */ {0, {585, 650}, {270, 300}, {130, 144} }, {1, {1170, 1300}, {540, 600}, {260, 289} }, {2, {1755, 1950}, {810, 900}, {390, 433} }, {3, {2340, 2600}, {1080, 1200}, {520, 578} }, {4, {3510, 3900}, {1620, 1800}, {780, 867} }, {5, {4680, 5200}, {2160, 2400}, {1040, 1156} }, {6, {5265, 5850}, {2430, 2700}, {1170, 1300} }, {7, {5850, 6500}, {2700, 3000}, {1300, 1444} }, {8, {7020, 7800}, {3240, 3600}, {1560, 1733} }, {9, {7800, 8667}, {3600, 4000}, {1730, 1920} } }; static void ath10k_mac_get_rate_flags_ht(struct ath10k *ar, u32 rate, u8 nss, u8 mcs, u8 *flags, u8 *bw) { struct ath10k_index_ht_data_rate_type *mcs_rate; u8 index; size_t len_nss1 = ARRAY_SIZE(supported_ht_mcs_rate_nss1); size_t len_nss2 = ARRAY_SIZE(supported_ht_mcs_rate_nss2); if (mcs >= (len_nss1 + len_nss2)) { ath10k_warn(ar, "not supported mcs %d in current rate table", mcs); return; } mcs_rate = (struct ath10k_index_ht_data_rate_type *) ((nss == 1) ? &supported_ht_mcs_rate_nss1 : &supported_ht_mcs_rate_nss2); if (mcs >= len_nss1) index = mcs - len_nss1; else index = mcs; if (rate == mcs_rate[index].supported_rate[0]) { *bw = RATE_INFO_BW_20; } else if (rate == mcs_rate[index].supported_rate[1]) { *bw |= RATE_INFO_BW_40; } else if (rate == mcs_rate[index].supported_rate[2]) { *bw |= RATE_INFO_BW_20; *flags |= RATE_INFO_FLAGS_SHORT_GI; } else if (rate == mcs_rate[index].supported_rate[3]) { *bw |= RATE_INFO_BW_40; *flags |= RATE_INFO_FLAGS_SHORT_GI; } else { ath10k_warn(ar, "invalid ht params rate %d 100kbps nss %d mcs %d", rate, nss, mcs); } } static void ath10k_mac_get_rate_flags_vht(struct ath10k *ar, u32 rate, u8 nss, u8 mcs, u8 *flags, u8 *bw) { struct ath10k_index_vht_data_rate_type *mcs_rate; mcs_rate = (struct ath10k_index_vht_data_rate_type *) ((nss == 1) ? &supported_vht_mcs_rate_nss1 : &supported_vht_mcs_rate_nss2); if (rate == mcs_rate[mcs].supported_VHT80_rate[0]) { *bw = RATE_INFO_BW_80; } else if (rate == mcs_rate[mcs].supported_VHT80_rate[1]) { *bw = RATE_INFO_BW_80; *flags |= RATE_INFO_FLAGS_SHORT_GI; } else if (rate == mcs_rate[mcs].supported_VHT40_rate[0]) { *bw = RATE_INFO_BW_40; } else if (rate == mcs_rate[mcs].supported_VHT40_rate[1]) { *bw = RATE_INFO_BW_40; *flags |= RATE_INFO_FLAGS_SHORT_GI; } else if (rate == mcs_rate[mcs].supported_VHT20_rate[0]) { *bw = RATE_INFO_BW_20; } else if (rate == mcs_rate[mcs].supported_VHT20_rate[1]) { *bw = RATE_INFO_BW_20; *flags |= RATE_INFO_FLAGS_SHORT_GI; } else { ath10k_warn(ar, "invalid vht params rate %d 100kbps nss %d mcs %d", rate, nss, mcs); } } static void ath10k_mac_get_rate_flags(struct ath10k *ar, u32 rate, enum ath10k_phy_mode mode, u8 nss, u8 mcs, u8 *flags, u8 *bw) { if (mode == ATH10K_PHY_MODE_HT) { *flags = RATE_INFO_FLAGS_MCS; ath10k_mac_get_rate_flags_ht(ar, rate, nss, mcs, flags, bw); } else if (mode == ATH10K_PHY_MODE_VHT) { *flags = RATE_INFO_FLAGS_VHT_MCS; ath10k_mac_get_rate_flags_vht(ar, rate, nss, mcs, flags, bw); } } static void ath10k_mac_parse_bitrate(struct ath10k *ar, u32 rate_code, u32 bitrate_kbps, struct rate_info *rate) { enum ath10k_phy_mode mode = ATH10K_PHY_MODE_LEGACY; enum wmi_rate_preamble preamble = WMI_TLV_GET_HW_RC_PREAM_V1(rate_code); u8 nss = WMI_TLV_GET_HW_RC_NSS_V1(rate_code) + 1; u8 mcs = WMI_TLV_GET_HW_RC_RATE_V1(rate_code); u8 flags = 0, bw = 0; ath10k_dbg(ar, ATH10K_DBG_MAC, "mac parse rate code 0x%x bitrate %d kbps\n", rate_code, bitrate_kbps); if (preamble == WMI_RATE_PREAMBLE_HT) mode = ATH10K_PHY_MODE_HT; else if (preamble == WMI_RATE_PREAMBLE_VHT) mode = ATH10K_PHY_MODE_VHT; ath10k_mac_get_rate_flags(ar, bitrate_kbps / 100, mode, nss, mcs, &flags, &bw); ath10k_dbg(ar, ATH10K_DBG_MAC, "mac parse bitrate preamble %d mode %d nss %d mcs %d flags %x bw %d\n", preamble, mode, nss, mcs, flags, bw); rate->flags = flags; rate->bw = bw; rate->legacy = bitrate_kbps / 100; rate->nss = nss; rate->mcs = mcs; } static void ath10k_mac_sta_get_peer_stats_info(struct ath10k *ar, struct ieee80211_sta *sta, struct station_info *sinfo) { struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k_peer *peer; unsigned long time_left; int ret; if (!(ar->hw_params.supports_peer_stats_info && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA)) return; spin_lock_bh(&ar->data_lock); peer = ath10k_peer_find(ar, arsta->arvif->vdev_id, sta->addr); spin_unlock_bh(&ar->data_lock); if (!peer) return; reinit_completion(&ar->peer_stats_info_complete); ret = ath10k_wmi_request_peer_stats_info(ar, arsta->arvif->vdev_id, WMI_REQUEST_ONE_PEER_STATS_INFO, arsta->arvif->bssid, 0); if (ret && ret != -EOPNOTSUPP) { ath10k_warn(ar, "could not request peer stats info: %d\n", ret); return; } time_left = wait_for_completion_timeout(&ar->peer_stats_info_complete, 3 * HZ); if (time_left == 0) { ath10k_warn(ar, "timed out waiting peer stats info\n"); return; } if (arsta->rx_rate_code != 0 && arsta->rx_bitrate_kbps != 0) { ath10k_mac_parse_bitrate(ar, arsta->rx_rate_code, arsta->rx_bitrate_kbps, &sinfo->rxrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); arsta->rx_rate_code = 0; arsta->rx_bitrate_kbps = 0; } if (arsta->tx_rate_code != 0 && arsta->tx_bitrate_kbps != 0) { ath10k_mac_parse_bitrate(ar, arsta->tx_rate_code, arsta->tx_bitrate_kbps, &sinfo->txrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); arsta->tx_rate_code = 0; arsta->tx_bitrate_kbps = 0; } } static void ath10k_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct station_info *sinfo) { struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k *ar = arsta->arvif->ar; if (!ath10k_peer_stats_enabled(ar)) return; mutex_lock(&ar->conf_mutex); ath10k_debug_fw_stats_request(ar); mutex_unlock(&ar->conf_mutex); sinfo->rx_duration = arsta->rx_duration; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION); if (arsta->txrate.legacy || arsta->txrate.nss) { if (arsta->txrate.legacy) { sinfo->txrate.legacy = arsta->txrate.legacy; } else { sinfo->txrate.mcs = arsta->txrate.mcs; sinfo->txrate.nss = arsta->txrate.nss; sinfo->txrate.bw = arsta->txrate.bw; } sinfo->txrate.flags = arsta->txrate.flags; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); } if (ar->htt.disable_tx_comp) { sinfo->tx_failed = arsta->tx_failed; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } sinfo->tx_retries = arsta->tx_retries; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); ath10k_mac_sta_get_peer_stats_info(ar, sta, sinfo); } static int ath10k_mac_op_set_tid_config(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct cfg80211_tid_config *tid_config) { struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_mac_iter_tid_conf_data data = {}; struct wmi_per_peer_per_tid_cfg_arg arg = {}; int ret, i; mutex_lock(&ar->conf_mutex); arg.vdev_id = arvif->vdev_id; arvif->tids_rst = 0; memset(arvif->tid_conf_changed, 0, sizeof(arvif->tid_conf_changed)); for (i = 0; i < tid_config->n_tid_conf; i++) { ret = ath10k_mac_parse_tid_config(ar, sta, vif, &tid_config->tid_conf[i], &arg); if (ret) goto exit; } ret = 0; if (sta) goto exit; arvif->tids_rst = 0; data.curr_vif = vif; data.ar = ar; ieee80211_iterate_stations_atomic(hw, ath10k_mac_vif_stations_tid_conf, &data); exit: mutex_unlock(&ar->conf_mutex); return ret; } static int ath10k_mac_op_reset_tid_config(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, u8 tids) { struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_mac_iter_tid_conf_data data = {}; struct ath10k *ar = hw->priv; int ret = 0; mutex_lock(&ar->conf_mutex); if (sta) { arvif->tids_rst = 0; ret = ath10k_mac_reset_tid_config(ar, sta, arvif, tids); goto exit; } arvif->tids_rst = tids; data.curr_vif = vif; data.ar = ar; ieee80211_iterate_stations_atomic(hw, ath10k_mac_vif_stations_tid_conf, &data); exit: mutex_unlock(&ar->conf_mutex); return ret; } static const struct ieee80211_ops ath10k_ops = { .tx = ath10k_mac_op_tx, .wake_tx_queue = ath10k_mac_op_wake_tx_queue, .start = ath10k_start, .stop = ath10k_stop, .config = ath10k_config, .add_interface = ath10k_add_interface, .update_vif_offload = ath10k_update_vif_offload, .remove_interface = ath10k_remove_interface, .configure_filter = ath10k_configure_filter, .bss_info_changed = ath10k_bss_info_changed, .set_coverage_class = ath10k_mac_op_set_coverage_class, .hw_scan = ath10k_hw_scan, .cancel_hw_scan = ath10k_cancel_hw_scan, .set_key = ath10k_set_key, .set_default_unicast_key = ath10k_set_default_unicast_key, .sta_state = ath10k_sta_state, .sta_set_txpwr = ath10k_sta_set_txpwr, .conf_tx = ath10k_conf_tx, .remain_on_channel = ath10k_remain_on_channel, .cancel_remain_on_channel = ath10k_cancel_remain_on_channel, .set_rts_threshold = ath10k_set_rts_threshold, .set_frag_threshold = ath10k_mac_op_set_frag_threshold, .flush = ath10k_flush, .tx_last_beacon = ath10k_tx_last_beacon, .set_antenna = ath10k_set_antenna, .get_antenna = ath10k_get_antenna, .reconfig_complete = ath10k_reconfig_complete, .get_survey = ath10k_get_survey, .set_bitrate_mask = ath10k_mac_op_set_bitrate_mask, .link_sta_rc_update = ath10k_sta_rc_update, .offset_tsf = ath10k_offset_tsf, .ampdu_action = ath10k_ampdu_action, .get_et_sset_count = ath10k_debug_get_et_sset_count, .get_et_stats = ath10k_debug_get_et_stats, .get_et_strings = ath10k_debug_get_et_strings, .add_chanctx = ath10k_mac_op_add_chanctx, .remove_chanctx = ath10k_mac_op_remove_chanctx, .change_chanctx = ath10k_mac_op_change_chanctx, .assign_vif_chanctx = ath10k_mac_op_assign_vif_chanctx, .unassign_vif_chanctx = ath10k_mac_op_unassign_vif_chanctx, .switch_vif_chanctx = ath10k_mac_op_switch_vif_chanctx, .sta_pre_rcu_remove = ath10k_mac_op_sta_pre_rcu_remove, .sta_statistics = ath10k_sta_statistics, .set_tid_config = ath10k_mac_op_set_tid_config, .reset_tid_config = ath10k_mac_op_reset_tid_config, CFG80211_TESTMODE_CMD(ath10k_tm_cmd) #ifdef CONFIG_PM .suspend = ath10k_wow_op_suspend, .resume = ath10k_wow_op_resume, .set_wakeup = ath10k_wow_op_set_wakeup, #endif #ifdef CONFIG_MAC80211_DEBUGFS .sta_add_debugfs = ath10k_sta_add_debugfs, #endif .set_sar_specs = ath10k_mac_set_sar_specs, }; #define CHAN2G(_channel, _freq, _flags) { \ .band = NL80211_BAND_2GHZ, \ .hw_value = (_channel), \ .center_freq = (_freq), \ .flags = (_flags), \ .max_antenna_gain = 0, \ .max_power = 30, \ } #define CHAN5G(_channel, _freq, _flags) { \ .band = NL80211_BAND_5GHZ, \ .hw_value = (_channel), \ .center_freq = (_freq), \ .flags = (_flags), \ .max_antenna_gain = 0, \ .max_power = 30, \ } static const struct ieee80211_channel ath10k_2ghz_channels[] = { CHAN2G(1, 2412, 0), CHAN2G(2, 2417, 0), CHAN2G(3, 2422, 0), CHAN2G(4, 2427, 0), CHAN2G(5, 2432, 0), CHAN2G(6, 2437, 0), CHAN2G(7, 2442, 0), CHAN2G(8, 2447, 0), CHAN2G(9, 2452, 0), CHAN2G(10, 2457, 0), CHAN2G(11, 2462, 0), CHAN2G(12, 2467, 0), CHAN2G(13, 2472, 0), CHAN2G(14, 2484, 0), }; static const struct ieee80211_channel ath10k_5ghz_channels[] = { CHAN5G(36, 5180, 0), CHAN5G(40, 5200, 0), CHAN5G(44, 5220, 0), CHAN5G(48, 5240, 0), CHAN5G(52, 5260, 0), CHAN5G(56, 5280, 0), CHAN5G(60, 5300, 0), CHAN5G(64, 5320, 0), CHAN5G(100, 5500, 0), CHAN5G(104, 5520, 0), CHAN5G(108, 5540, 0), CHAN5G(112, 5560, 0), CHAN5G(116, 5580, 0), CHAN5G(120, 5600, 0), CHAN5G(124, 5620, 0), CHAN5G(128, 5640, 0), CHAN5G(132, 5660, 0), CHAN5G(136, 5680, 0), CHAN5G(140, 5700, 0), CHAN5G(144, 5720, 0), CHAN5G(149, 5745, 0), CHAN5G(153, 5765, 0), CHAN5G(157, 5785, 0), CHAN5G(161, 5805, 0), CHAN5G(165, 5825, 0), CHAN5G(169, 5845, 0), CHAN5G(173, 5865, 0), /* If you add more, you may need to change ATH10K_MAX_5G_CHAN */ /* And you will definitely need to change ATH10K_NUM_CHANS in core.h */ }; struct ath10k *ath10k_mac_create(size_t priv_size) { struct ieee80211_hw *hw; struct ieee80211_ops *ops; struct ath10k *ar; ops = kmemdup(&ath10k_ops, sizeof(ath10k_ops), GFP_KERNEL); if (!ops) return NULL; hw = ieee80211_alloc_hw(sizeof(struct ath10k) + priv_size, ops); if (!hw) { kfree(ops); return NULL; } ar = hw->priv; ar->hw = hw; ar->ops = ops; return ar; } void ath10k_mac_destroy(struct ath10k *ar) { struct ieee80211_ops *ops = ar->ops; ieee80211_free_hw(ar->hw); kfree(ops); } static const struct ieee80211_iface_limit ath10k_if_limits[] = { { .max = 8, .types = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_P2P_CLIENT) }, { .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) }, { .max = 1, .types = BIT(NL80211_IFTYPE_P2P_DEVICE) }, { .max = 7, .types = BIT(NL80211_IFTYPE_AP) #ifdef CONFIG_MAC80211_MESH | BIT(NL80211_IFTYPE_MESH_POINT) #endif }, }; static const struct ieee80211_iface_limit ath10k_10x_if_limits[] = { { .max = 8, .types = BIT(NL80211_IFTYPE_AP) #ifdef CONFIG_MAC80211_MESH | BIT(NL80211_IFTYPE_MESH_POINT) #endif }, { .max = 1, .types = BIT(NL80211_IFTYPE_STATION) }, }; static const struct ieee80211_iface_combination ath10k_if_comb[] = { { .limits = ath10k_if_limits, .n_limits = ARRAY_SIZE(ath10k_if_limits), .max_interfaces = 8, .num_different_channels = 1, .beacon_int_infra_match = true, }, }; static const struct ieee80211_iface_combination ath10k_10x_if_comb[] = { { .limits = ath10k_10x_if_limits, .n_limits = ARRAY_SIZE(ath10k_10x_if_limits), .max_interfaces = 8, .num_different_channels = 1, .beacon_int_infra_match = true, .beacon_int_min_gcd = 1, #ifdef CONFIG_ATH10K_DFS_CERTIFIED .radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | BIT(NL80211_CHAN_WIDTH_20) | BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80), #endif }, }; static const struct ieee80211_iface_limit ath10k_tlv_if_limit[] = { { .max = 2, .types = BIT(NL80211_IFTYPE_STATION), }, { .max = 2, .types = BIT(NL80211_IFTYPE_AP) | #ifdef CONFIG_MAC80211_MESH BIT(NL80211_IFTYPE_MESH_POINT) | #endif BIT(NL80211_IFTYPE_P2P_CLIENT) | BIT(NL80211_IFTYPE_P2P_GO), }, { .max = 1, .types = BIT(NL80211_IFTYPE_P2P_DEVICE), }, }; static const struct ieee80211_iface_limit ath10k_tlv_qcs_if_limit[] = { { .max = 2, .types = BIT(NL80211_IFTYPE_STATION), }, { .max = 2, .types = BIT(NL80211_IFTYPE_P2P_CLIENT), }, { .max = 1, .types = BIT(NL80211_IFTYPE_AP) | #ifdef CONFIG_MAC80211_MESH BIT(NL80211_IFTYPE_MESH_POINT) | #endif BIT(NL80211_IFTYPE_P2P_GO), }, { .max = 1, .types = BIT(NL80211_IFTYPE_P2P_DEVICE), }, }; static const struct ieee80211_iface_limit ath10k_tlv_if_limit_ibss[] = { { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), }, { .max = 1, .types = BIT(NL80211_IFTYPE_ADHOC), }, }; /* FIXME: This is not thoroughly tested. These combinations may over- or * underestimate hw/fw capabilities. */ static struct ieee80211_iface_combination ath10k_tlv_if_comb[] = { { .limits = ath10k_tlv_if_limit, .num_different_channels = 1, .max_interfaces = 4, .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit), }, { .limits = ath10k_tlv_if_limit_ibss, .num_different_channels = 1, .max_interfaces = 2, .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit_ibss), }, }; static struct ieee80211_iface_combination ath10k_tlv_qcs_if_comb[] = { { .limits = ath10k_tlv_if_limit, .num_different_channels = 1, .max_interfaces = 4, .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit), }, { .limits = ath10k_tlv_qcs_if_limit, .num_different_channels = 2, .max_interfaces = 4, .n_limits = ARRAY_SIZE(ath10k_tlv_qcs_if_limit), }, { .limits = ath10k_tlv_if_limit_ibss, .num_different_channels = 1, .max_interfaces = 2, .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit_ibss), }, }; static const struct ieee80211_iface_limit ath10k_10_4_if_limits[] = { { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), }, { .max = 16, .types = BIT(NL80211_IFTYPE_AP) #ifdef CONFIG_MAC80211_MESH | BIT(NL80211_IFTYPE_MESH_POINT) #endif }, }; static const struct ieee80211_iface_combination ath10k_10_4_if_comb[] = { { .limits = ath10k_10_4_if_limits, .n_limits = ARRAY_SIZE(ath10k_10_4_if_limits), .max_interfaces = 16, .num_different_channels = 1, .beacon_int_infra_match = true, .beacon_int_min_gcd = 1, #ifdef CONFIG_ATH10K_DFS_CERTIFIED .radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | BIT(NL80211_CHAN_WIDTH_20) | BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80) | BIT(NL80211_CHAN_WIDTH_80P80) | BIT(NL80211_CHAN_WIDTH_160), #endif }, }; static const struct ieee80211_iface_combination ath10k_10_4_bcn_int_if_comb[] = { { .limits = ath10k_10_4_if_limits, .n_limits = ARRAY_SIZE(ath10k_10_4_if_limits), .max_interfaces = 16, .num_different_channels = 1, .beacon_int_infra_match = true, .beacon_int_min_gcd = 100, #ifdef CONFIG_ATH10K_DFS_CERTIFIED .radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | BIT(NL80211_CHAN_WIDTH_20) | BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80) | BIT(NL80211_CHAN_WIDTH_80P80) | BIT(NL80211_CHAN_WIDTH_160), #endif }, }; static void ath10k_get_arvif_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { struct ath10k_vif_iter *arvif_iter = data; struct ath10k_vif *arvif = (void *)vif->drv_priv; if (arvif->vdev_id == arvif_iter->vdev_id) arvif_iter->arvif = arvif; } struct ath10k_vif *ath10k_get_arvif(struct ath10k *ar, u32 vdev_id) { struct ath10k_vif_iter arvif_iter; memset(&arvif_iter, 0, sizeof(struct ath10k_vif_iter)); arvif_iter.vdev_id = vdev_id; ieee80211_iterate_active_interfaces_atomic(ar->hw, ATH10K_ITER_RESUME_FLAGS, ath10k_get_arvif_iter, &arvif_iter); if (!arvif_iter.arvif) { ath10k_warn(ar, "No VIF found for vdev %d\n", vdev_id); return NULL; } return arvif_iter.arvif; } #define WRD_METHOD "WRDD" #define WRDD_WIFI (0x07) static u32 ath10k_mac_wrdd_get_mcc(struct ath10k *ar, union acpi_object *wrdd) { union acpi_object *mcc_pkg; union acpi_object *domain_type; union acpi_object *mcc_value; u32 i; if (wrdd->type != ACPI_TYPE_PACKAGE || wrdd->package.count < 2 || wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || wrdd->package.elements[0].integer.value != 0) { ath10k_warn(ar, "ignoring malformed/unsupported wrdd structure\n"); return 0; } for (i = 1; i < wrdd->package.count; ++i) { mcc_pkg = &wrdd->package.elements[i]; if (mcc_pkg->type != ACPI_TYPE_PACKAGE) continue; if (mcc_pkg->package.count < 2) continue; if (mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) continue; domain_type = &mcc_pkg->package.elements[0]; if (domain_type->integer.value != WRDD_WIFI) continue; mcc_value = &mcc_pkg->package.elements[1]; return mcc_value->integer.value; } return 0; } static int ath10k_mac_get_wrdd_regulatory(struct ath10k *ar, u16 *rd) { acpi_handle root_handle; acpi_handle handle; struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; acpi_status status; u32 alpha2_code; char alpha2[3]; root_handle = ACPI_HANDLE(ar->dev); if (!root_handle) return -EOPNOTSUPP; status = acpi_get_handle(root_handle, (acpi_string)WRD_METHOD, &handle); if (ACPI_FAILURE(status)) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "failed to get wrd method %d\n", status); return -EIO; } status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); if (ACPI_FAILURE(status)) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "failed to call wrdc %d\n", status); return -EIO; } alpha2_code = ath10k_mac_wrdd_get_mcc(ar, wrdd.pointer); kfree(wrdd.pointer); if (!alpha2_code) return -EIO; alpha2[0] = (alpha2_code >> 8) & 0xff; alpha2[1] = (alpha2_code >> 0) & 0xff; alpha2[2] = '\0'; ath10k_dbg(ar, ATH10K_DBG_BOOT, "regulatory hint from WRDD (alpha2-code): %s\n", alpha2); *rd = ath_regd_find_country_by_name(alpha2); if (*rd == 0xffff) return -EIO; *rd |= COUNTRY_ERD_FLAG; return 0; } static int ath10k_mac_init_rd(struct ath10k *ar) { int ret; u16 rd; ret = ath10k_mac_get_wrdd_regulatory(ar, &rd); if (ret) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "fallback to eeprom programmed regulatory settings\n"); rd = ar->hw_eeprom_rd; } ar->ath_common.regulatory.current_rd = rd; return 0; } int ath10k_mac_register(struct ath10k *ar) { static const u32 cipher_suites[] = { WLAN_CIPHER_SUITE_WEP40, WLAN_CIPHER_SUITE_WEP104, WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, /* Do not add hardware supported ciphers before this line. * Allow software encryption for all chips. Don't forget to * update n_cipher_suites below. */ WLAN_CIPHER_SUITE_AES_CMAC, WLAN_CIPHER_SUITE_BIP_CMAC_256, WLAN_CIPHER_SUITE_BIP_GMAC_128, WLAN_CIPHER_SUITE_BIP_GMAC_256, /* Only QCA99x0 and QCA4019 variants support GCMP-128, GCMP-256 * and CCMP-256 in hardware. */ WLAN_CIPHER_SUITE_GCMP, WLAN_CIPHER_SUITE_GCMP_256, WLAN_CIPHER_SUITE_CCMP_256, }; struct ieee80211_supported_band *band; void *channels; int ret; if (!is_valid_ether_addr(ar->mac_addr)) { ath10k_warn(ar, "invalid MAC address; choosing random\n"); eth_random_addr(ar->mac_addr); } SET_IEEE80211_PERM_ADDR(ar->hw, ar->mac_addr); SET_IEEE80211_DEV(ar->hw, ar->dev); BUILD_BUG_ON((ARRAY_SIZE(ath10k_2ghz_channels) + ARRAY_SIZE(ath10k_5ghz_channels)) != ATH10K_NUM_CHANS); if (ar->phy_capability & WHAL_WLAN_11G_CAPABILITY) { channels = kmemdup(ath10k_2ghz_channels, sizeof(ath10k_2ghz_channels), GFP_KERNEL); if (!channels) { ret = -ENOMEM; goto err_free; } band = &ar->mac.sbands[NL80211_BAND_2GHZ]; band->n_channels = ARRAY_SIZE(ath10k_2ghz_channels); band->channels = channels; if (ar->hw_params.cck_rate_map_rev2) { band->n_bitrates = ath10k_g_rates_rev2_size; band->bitrates = ath10k_g_rates_rev2; } else { band->n_bitrates = ath10k_g_rates_size; band->bitrates = ath10k_g_rates; } ar->hw->wiphy->bands[NL80211_BAND_2GHZ] = band; } if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY) { channels = kmemdup(ath10k_5ghz_channels, sizeof(ath10k_5ghz_channels), GFP_KERNEL); if (!channels) { ret = -ENOMEM; goto err_free; } band = &ar->mac.sbands[NL80211_BAND_5GHZ]; band->n_channels = ARRAY_SIZE(ath10k_5ghz_channels); band->channels = channels; band->n_bitrates = ath10k_a_rates_size; band->bitrates = ath10k_a_rates; ar->hw->wiphy->bands[NL80211_BAND_5GHZ] = band; } wiphy_read_of_freq_limits(ar->hw->wiphy); ath10k_mac_setup_ht_vht_cap(ar); ar->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_MESH_POINT); ar->hw->wiphy->available_antennas_rx = ar->cfg_rx_chainmask; ar->hw->wiphy->available_antennas_tx = ar->cfg_tx_chainmask; if (!test_bit(ATH10K_FW_FEATURE_NO_P2P, ar->normal_mode_fw.fw_file.fw_features)) ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_P2P_DEVICE) | BIT(NL80211_IFTYPE_P2P_CLIENT) | BIT(NL80211_IFTYPE_P2P_GO); ieee80211_hw_set(ar->hw, SIGNAL_DBM); if (!test_bit(ATH10K_FW_FEATURE_NO_PS, ar->running_fw->fw_file.fw_features)) { ieee80211_hw_set(ar->hw, SUPPORTS_PS); ieee80211_hw_set(ar->hw, SUPPORTS_DYNAMIC_PS); } ieee80211_hw_set(ar->hw, MFP_CAPABLE); ieee80211_hw_set(ar->hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(ar->hw, HAS_RATE_CONTROL); ieee80211_hw_set(ar->hw, AP_LINK_PS); ieee80211_hw_set(ar->hw, SPECTRUM_MGMT); ieee80211_hw_set(ar->hw, SUPPORT_FAST_XMIT); ieee80211_hw_set(ar->hw, CONNECTION_MONITOR); ieee80211_hw_set(ar->hw, SUPPORTS_PER_STA_GTK); ieee80211_hw_set(ar->hw, WANT_MONITOR_VIF); ieee80211_hw_set(ar->hw, CHANCTX_STA_CSA); ieee80211_hw_set(ar->hw, QUEUE_CONTROL); ieee80211_hw_set(ar->hw, SUPPORTS_TX_FRAG); ieee80211_hw_set(ar->hw, REPORTS_LOW_ACK); if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) ieee80211_hw_set(ar->hw, SW_CRYPTO_CONTROL); ar->hw->wiphy->features |= NL80211_FEATURE_STATIC_SMPS; ar->hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; if (ar->ht_cap_info & WMI_HT_CAP_DYNAMIC_SMPS) ar->hw->wiphy->features |= NL80211_FEATURE_DYNAMIC_SMPS; if (ar->ht_cap_info & WMI_HT_CAP_ENABLED) { ieee80211_hw_set(ar->hw, AMPDU_AGGREGATION); ieee80211_hw_set(ar->hw, TX_AMPDU_SETUP_IN_HW); } ar->hw->wiphy->max_scan_ssids = WLAN_SCAN_PARAMS_MAX_SSID; ar->hw->wiphy->max_scan_ie_len = WLAN_SCAN_PARAMS_MAX_IE_LEN; if (test_bit(WMI_SERVICE_NLO, ar->wmi.svc_map)) { ar->hw->wiphy->max_sched_scan_ssids = WMI_PNO_MAX_SUPP_NETWORKS; ar->hw->wiphy->max_match_sets = WMI_PNO_MAX_SUPP_NETWORKS; ar->hw->wiphy->max_sched_scan_ie_len = WMI_PNO_MAX_IE_LENGTH; ar->hw->wiphy->max_sched_scan_plans = WMI_PNO_MAX_SCHED_SCAN_PLANS; ar->hw->wiphy->max_sched_scan_plan_interval = WMI_PNO_MAX_SCHED_SCAN_PLAN_INT; ar->hw->wiphy->max_sched_scan_plan_iterations = WMI_PNO_MAX_SCHED_SCAN_PLAN_ITRNS; ar->hw->wiphy->features |= NL80211_FEATURE_ND_RANDOM_MAC_ADDR; } ar->hw->vif_data_size = sizeof(struct ath10k_vif); ar->hw->sta_data_size = sizeof(struct ath10k_sta); ar->hw->txq_data_size = sizeof(struct ath10k_txq); ar->hw->max_listen_interval = ATH10K_MAX_HW_LISTEN_INTERVAL; if (test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map)) { ar->hw->wiphy->flags |= WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD; /* Firmware delivers WPS/P2P Probe Requests frames to driver so * that userspace (e.g. wpa_supplicant/hostapd) can generate * correct Probe Responses. This is more of a hack advert.. */ ar->hw->wiphy->probe_resp_offload |= NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS | NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 | NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P; } if (test_bit(WMI_SERVICE_TDLS, ar->wmi.svc_map) || test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map)) { ar->hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS; if (test_bit(WMI_SERVICE_TDLS_WIDER_BANDWIDTH, ar->wmi.svc_map)) ieee80211_hw_set(ar->hw, TDLS_WIDER_BW); } if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map)) ieee80211_hw_set(ar->hw, SUPPORTS_TDLS_BUFFER_STA); if (ath10k_frame_mode == ATH10K_HW_TXRX_ETHERNET) { if (ar->wmi.vdev_param->tx_encap_type != WMI_VDEV_PARAM_UNSUPPORTED) ieee80211_hw_set(ar->hw, SUPPORTS_TX_ENCAP_OFFLOAD); } ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH; ar->hw->wiphy->max_remain_on_channel_duration = 5000; ar->hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; ar->hw->wiphy->features |= NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE | NL80211_FEATURE_AP_SCAN; ar->hw->wiphy->max_ap_assoc_sta = ar->max_num_stations; ret = ath10k_wow_init(ar); if (ret) { ath10k_warn(ar, "failed to init wow: %d\n", ret); goto err_free; } wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_VHT_IBSS); wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_SET_SCAN_DWELL); wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_AQL); if (ar->hw_params.mcast_frame_registration) wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS); if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map) || test_bit(WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS, ar->wmi.svc_map)) wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT); if (ath10k_peer_stats_enabled(ar) || test_bit(WMI_SERVICE_REPORT_AIRTIME, ar->wmi.svc_map)) wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS); if (test_bit(WMI_SERVICE_RTT_RESPONDER_ROLE, ar->wmi.svc_map)) wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER); if (test_bit(WMI_SERVICE_TX_PWR_PER_PEER, ar->wmi.svc_map)) wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_STA_TX_PWR); if (test_bit(WMI_SERVICE_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map)) { ar->hw->wiphy->tid_config_support.vif |= BIT(NL80211_TID_CONFIG_ATTR_NOACK) | BIT(NL80211_TID_CONFIG_ATTR_RETRY_SHORT) | BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG) | BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) | BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE); if (test_bit(WMI_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map)) { ar->hw->wiphy->tid_config_support.vif |= BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL); } ar->hw->wiphy->tid_config_support.peer = ar->hw->wiphy->tid_config_support.vif; ar->hw->wiphy->max_data_retry_count = ATH10K_MAX_RETRY_COUNT; } else { ar->ops->set_tid_config = NULL; } /* * on LL hardware queues are managed entirely by the FW * so we only advertise to mac we can do the queues thing */ ar->hw->queues = IEEE80211_MAX_QUEUES; /* vdev_ids are used as hw queue numbers. Make sure offchan tx queue is * something that vdev_ids can't reach so that we don't stop the queue * accidentally. */ ar->hw->offchannel_tx_hw_queue = IEEE80211_MAX_QUEUES - 1; switch (ar->running_fw->fw_file.wmi_op_version) { case ATH10K_FW_WMI_OP_VERSION_MAIN: ar->hw->wiphy->iface_combinations = ath10k_if_comb; ar->hw->wiphy->n_iface_combinations = ARRAY_SIZE(ath10k_if_comb); ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC); break; case ATH10K_FW_WMI_OP_VERSION_TLV: if (test_bit(WMI_SERVICE_ADAPTIVE_OCS, ar->wmi.svc_map)) { ar->hw->wiphy->iface_combinations = ath10k_tlv_qcs_if_comb; ar->hw->wiphy->n_iface_combinations = ARRAY_SIZE(ath10k_tlv_qcs_if_comb); } else { ar->hw->wiphy->iface_combinations = ath10k_tlv_if_comb; ar->hw->wiphy->n_iface_combinations = ARRAY_SIZE(ath10k_tlv_if_comb); } ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC); break; case ATH10K_FW_WMI_OP_VERSION_10_1: case ATH10K_FW_WMI_OP_VERSION_10_2: case ATH10K_FW_WMI_OP_VERSION_10_2_4: ar->hw->wiphy->iface_combinations = ath10k_10x_if_comb; ar->hw->wiphy->n_iface_combinations = ARRAY_SIZE(ath10k_10x_if_comb); break; case ATH10K_FW_WMI_OP_VERSION_10_4: ar->hw->wiphy->iface_combinations = ath10k_10_4_if_comb; ar->hw->wiphy->n_iface_combinations = ARRAY_SIZE(ath10k_10_4_if_comb); if (test_bit(WMI_SERVICE_VDEV_DIFFERENT_BEACON_INTERVAL_SUPPORT, ar->wmi.svc_map)) { ar->hw->wiphy->iface_combinations = ath10k_10_4_bcn_int_if_comb; ar->hw->wiphy->n_iface_combinations = ARRAY_SIZE(ath10k_10_4_bcn_int_if_comb); } break; case ATH10K_FW_WMI_OP_VERSION_UNSET: case ATH10K_FW_WMI_OP_VERSION_MAX: WARN_ON(1); ret = -EINVAL; goto err_free; } if (ar->hw_params.dynamic_sar_support) ar->hw->wiphy->sar_capa = &ath10k_sar_capa; if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) ar->hw->netdev_features = NETIF_F_HW_CSUM; if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) { /* Init ath dfs pattern detector */ ar->ath_common.debug_mask = ATH_DBG_DFS; ar->dfs_detector = dfs_pattern_detector_init(&ar->ath_common, NL80211_DFS_UNSET); if (!ar->dfs_detector) ath10k_warn(ar, "failed to initialise DFS pattern detector\n"); } ret = ath10k_mac_init_rd(ar); if (ret) { ath10k_err(ar, "failed to derive regdom: %d\n", ret); goto err_dfs_detector_exit; } /* Disable set_coverage_class for chipsets that do not support it. */ if (!ar->hw_params.hw_ops->set_coverage_class) ar->ops->set_coverage_class = NULL; ret = ath_regd_init(&ar->ath_common.regulatory, ar->hw->wiphy, ath10k_reg_notifier); if (ret) { ath10k_err(ar, "failed to initialise regulatory: %i\n", ret); goto err_dfs_detector_exit; } if (test_bit(WMI_SERVICE_SPOOF_MAC_SUPPORT, ar->wmi.svc_map)) { ar->hw->wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; } ar->hw->wiphy->cipher_suites = cipher_suites; /* QCA988x and QCA6174 family chips do not support CCMP-256, GCMP-128 * and GCMP-256 ciphers in hardware. Fetch number of ciphers supported * from chip specific hw_param table. */ if (!ar->hw_params.n_cipher_suites || ar->hw_params.n_cipher_suites > ARRAY_SIZE(cipher_suites)) { ath10k_err(ar, "invalid hw_params.n_cipher_suites %d\n", ar->hw_params.n_cipher_suites); ar->hw_params.n_cipher_suites = 8; } ar->hw->wiphy->n_cipher_suites = ar->hw_params.n_cipher_suites; wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); ar->hw->weight_multiplier = ATH10K_AIRTIME_WEIGHT_MULTIPLIER; ret = ieee80211_register_hw(ar->hw); if (ret) { ath10k_err(ar, "failed to register ieee80211: %d\n", ret); goto err_dfs_detector_exit; } if (test_bit(WMI_SERVICE_PER_PACKET_SW_ENCRYPT, ar->wmi.svc_map)) { ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); ar->hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN); } if (!ath_is_world_regd(&ar->ath_common.reg_world_copy) && !ath_is_world_regd(&ar->ath_common.regulatory)) { ret = regulatory_hint(ar->hw->wiphy, ar->ath_common.regulatory.alpha2); if (ret) goto err_unregister; } return 0; err_unregister: ieee80211_unregister_hw(ar->hw); err_dfs_detector_exit: if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) ar->dfs_detector->exit(ar->dfs_detector); err_free: kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels); kfree(ar->mac.sbands[NL80211_BAND_5GHZ].channels); SET_IEEE80211_DEV(ar->hw, NULL); return ret; } void ath10k_mac_unregister(struct ath10k *ar) { ieee80211_unregister_hw(ar->hw); if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) ar->dfs_detector->exit(ar->dfs_detector); kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels); kfree(ar->mac.sbands[NL80211_BAND_5GHZ].channels); SET_IEEE80211_DEV(ar->hw, NULL); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 /* SPDX-License-Identifier: GPL-2.0 */ /* * Functions used by both the SCSI initiator code and the SCSI target code. */ #ifndef _SCSI_COMMON_H_ #define _SCSI_COMMON_H_ #include <linux/types.h> #include <uapi/linux/pr.h> #include <scsi/scsi_proto.h> enum scsi_pr_type { SCSI_PR_WRITE_EXCLUSIVE = 0x01, SCSI_PR_EXCLUSIVE_ACCESS = 0x03, SCSI_PR_WRITE_EXCLUSIVE_REG_ONLY = 0x05, SCSI_PR_EXCLUSIVE_ACCESS_REG_ONLY = 0x06, SCSI_PR_WRITE_EXCLUSIVE_ALL_REGS = 0x07, SCSI_PR_EXCLUSIVE_ACCESS_ALL_REGS = 0x08, }; enum scsi_pr_type block_pr_type_to_scsi(enum pr_type type); enum pr_type scsi_pr_type_to_block(enum scsi_pr_type type); static inline unsigned scsi_varlen_cdb_length(const void *hdr) { return ((struct scsi_varlen_cdb_hdr *)hdr)->additional_cdb_length + 8; } extern const unsigned char scsi_command_size_tbl[8]; #define COMMAND_SIZE(opcode) scsi_command_size_tbl[((opcode) >> 5) & 7] static inline unsigned scsi_command_size(const unsigned char *cmnd) { return (cmnd[0] == VARIABLE_LENGTH_CMD) ? scsi_varlen_cdb_length(cmnd) : COMMAND_SIZE(cmnd[0]); } static inline unsigned char scsi_command_control(const unsigned char *cmnd) { return (cmnd[0] == VARIABLE_LENGTH_CMD) ? cmnd[1] : cmnd[COMMAND_SIZE(cmnd[0]) - 1]; } /* Returns a human-readable name for the device */ extern const char *scsi_device_type(unsigned type); extern void int_to_scsilun(u64, struct scsi_lun *); extern u64 scsilun_to_int(struct scsi_lun *); /* * This is a slightly modified SCSI sense "descriptor" format header. * The addition is to allow the 0x70 and 0x71 response codes. The idea * is to place the salient data from either "fixed" or "descriptor" sense * format into one structure to ease application processing. * * The original sense buffer should be kept around for those cases * in which more information is required (e.g. the LBA of a MEDIUM ERROR). */ struct scsi_sense_hdr { /* See SPC-3 section 4.5 */ u8 response_code; /* permit: 0x0, 0x70, 0x71, 0x72, 0x73 */ u8 sense_key; u8 asc; u8 ascq; u8 byte4; u8 byte5; u8 byte6; u8 additional_length; /* always 0 for fixed sense format */ }; static inline bool scsi_sense_valid(const struct scsi_sense_hdr *sshdr) { if (!sshdr) return false; return (sshdr->response_code & 0x70) == 0x70; } extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len, struct scsi_sense_hdr *sshdr); extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); int scsi_set_sense_information(u8 *buf, int buf_len, u64 info); int scsi_set_sense_field_pointer(u8 *buf, int buf_len, u16 fp, u8 bp, bool cd); extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, int desc_type); #endif /* _SCSI_COMMON_H_ */
11 11 1 1 3 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 // SPDX-License-Identifier: GPL-2.0-only /* iptables module for using new netfilter netlink queue * * (C) 2005 by Harald Welte <laforge@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFQUEUE.h> #include <net/netfilter/nf_queue.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet forwarding to netlink"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_NFQUEUE"); MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info *tinfo = par->targinfo; return NF_QUEUE_NR(tinfo->queuenum); } static unsigned int nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; if (info->queues_total > 1) { queue = nfqueue_hash(skb, queue, info->queues_total, xt_family(par), jhash_initval); } return NF_QUEUE_NR(queue); } static unsigned int nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v2 *info = par->targinfo; unsigned int ret = nfqueue_tg_v1(skb, par); if (info->bypass) ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; return ret; } static int nfqueue_tg_check(const struct xt_tgchk_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; init_hashrandom(&jhash_initval); if (info->queues_total == 0) { pr_info_ratelimited("number of total queues is 0\n"); return -EINVAL; } maxid = info->queues_total - 1 + info->queuenum; if (maxid > 0xffff) { pr_info_ratelimited("number of queues (%u) out of range (got %u)\n", info->queues_total, maxid); return -ERANGE; } if (par->target->revision == 2 && info->flags > 1) return -EINVAL; if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK) return -EINVAL; return 0; } static unsigned int nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 queue = info->queuenum; int ret; if (info->queues_total > 1) { if (info->flags & NFQ_FLAG_CPU_FANOUT) { int cpu = smp_processor_id(); queue = info->queuenum + cpu % info->queues_total; } else { queue = nfqueue_hash(skb, queue, info->queues_total, xt_family(par), jhash_initval); } } ret = NF_QUEUE_NR(queue); if (info->flags & NFQ_FLAG_BYPASS) ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; return ret; } static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", .family = NFPROTO_UNSPEC, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, { .name = "NFQUEUE", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = nfqueue_tg_check, .target = nfqueue_tg_v1, .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, { .name = "NFQUEUE", .revision = 2, .family = NFPROTO_UNSPEC, .checkentry = nfqueue_tg_check, .target = nfqueue_tg_v2, .targetsize = sizeof(struct xt_NFQ_info_v2), .me = THIS_MODULE, }, { .name = "NFQUEUE", .revision = 3, .family = NFPROTO_UNSPEC, .checkentry = nfqueue_tg_check, .target = nfqueue_tg_v3, .targetsize = sizeof(struct xt_NFQ_info_v3), .me = THIS_MODULE, }, }; static int __init nfqueue_tg_init(void) { return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } static void __exit nfqueue_tg_exit(void) { xt_unregister_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } module_init(nfqueue_tg_init); module_exit(nfqueue_tg_exit);
245 245 241 245 4 7989 6823 1539 8002 8003 46 7995 7552 661 7989 4167 4173 2563 2234 4165 6 4164 6 1638 1125 1120 795 400 1165 16 1348 3250 206 3191 4 1439 1435 18 518 26 3944 3943 1369 1506 166 158 40 2 1975 1977 1 56 4162 8 6897 1395 2830 842 845 361 1256 4170 4167 4171 4029 195 4170 4162 2263 2233 4166 8 4169 1638 148 45 144 144 61 42 42 41 1 144 4 4 4 4 4 4 4 1123 1043 190 78 165 2754 1429 181 2083 3986 3985 55 3977 3995 5371 5363 2479 2475 29 30 285 284 184 114 184 113 285 10 10 10 1031 1028 1 1031 1034 38 1003 1056 132 1000 1 2 2 1442 112 112 1402 1401 595 80 595 595 246 245 1096 1014 163 165 2001 26 1990 1990 153 154 60 68 362 357 22 356 19 18 1 1414 4277 3480 536 216 938 534 4166 14 5458 795 1 5280 4270 1 4274 4297 24 6 23 263 1478 357 897 24 890 894 895 1259 1259 24 886 70 2051 2324 43 6 282 2053 36 1691 1671 2040 1240 1291 12 1276 1292 324 44 286 639 639 144 557 5 560 502 3742 2052 1928 1 3755 20 1047 12 918 928 930 435 804 217 217 86 160 204 14 517 114 403 347 60 185 55 47 1365 1338 32 819 747 80 222 221 1134 1130 1129 178 7349 7338 67 9197 65 7957 1033 700 7506 7526 4894 3050 51 9 3019 631 3017 236 233 36 1511 1443 42 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 1997 Linus Torvalds * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) */ #include <linux/export.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/mm.h> #include <linux/backing-dev.h> #include <linux/hash.h> #include <linux/swap.h> #include <linux/security.h> #include <linux/cdev.h> #include <linux/memblock.h> #include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/posix_acl.h> #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> #include <linux/list_lru.h> #include <linux/iversion.h> #include <linux/rw_hint.h> #include <linux/seq_file.h> #include <linux/debugfs.h> #include <trace/events/writeback.h> #define CREATE_TRACE_POINTS #include <trace/events/timestamp.h> #include "internal.h" /* * Inode locking rules: * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget(), inode->i_io_list * Inode LRU list locks protect: * inode->i_sb->s_inode_lru, inode->i_lru * inode->i_sb->s_inode_list_lock protects: * inode->i_sb->s_inodes, inode->i_sb_list * bdi->wb.list_lock protects: * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list * inode_hash_lock protects: * inode_hashtable, inode->i_hash * * Lock ordering: * * inode->i_sb->s_inode_list_lock * inode->i_lock * Inode LRU list locks * * bdi->wb.list_lock * inode->i_lock * * inode_hash_lock * inode->i_sb->s_inode_list_lock * inode->i_lock * * iunique_lock * inode_hash_lock */ static unsigned int i_hash_mask __ro_after_init; static unsigned int i_hash_shift __ro_after_init; static struct hlist_head *inode_hashtable __ro_after_init; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); /* * Empty aops. Can be used for the cases where the user does not * define any of the address_space operations. */ const struct address_space_operations empty_aops = { }; EXPORT_SYMBOL(empty_aops); static DEFINE_PER_CPU(unsigned long, nr_inodes); static DEFINE_PER_CPU(unsigned long, nr_unused); static struct kmem_cache *inode_cachep __ro_after_init; static long get_nr_inodes(void) { int i; long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_inodes, i); return sum < 0 ? 0 : sum; } static inline long get_nr_inodes_unused(void) { int i; long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_unused, i); return sum < 0 ? 0 : sum; } long get_nr_dirty_inodes(void) { /* not actually dirty inodes, but a wild approximation */ long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); return nr_dirty > 0 ? nr_dirty : 0; } #ifdef CONFIG_DEBUG_FS static DEFINE_PER_CPU(long, mg_ctime_updates); static DEFINE_PER_CPU(long, mg_fine_stamps); static DEFINE_PER_CPU(long, mg_ctime_swaps); static unsigned long get_mg_ctime_updates(void) { unsigned long sum = 0; int i; for_each_possible_cpu(i) sum += data_race(per_cpu(mg_ctime_updates, i)); return sum; } static unsigned long get_mg_fine_stamps(void) { unsigned long sum = 0; int i; for_each_possible_cpu(i) sum += data_race(per_cpu(mg_fine_stamps, i)); return sum; } static unsigned long get_mg_ctime_swaps(void) { unsigned long sum = 0; int i; for_each_possible_cpu(i) sum += data_race(per_cpu(mg_ctime_swaps, i)); return sum; } #define mgtime_counter_inc(__var) this_cpu_inc(__var) static int mgts_show(struct seq_file *s, void *p) { unsigned long ctime_updates = get_mg_ctime_updates(); unsigned long ctime_swaps = get_mg_ctime_swaps(); unsigned long fine_stamps = get_mg_fine_stamps(); unsigned long floor_swaps = timekeeping_get_mg_floor_swaps(); seq_printf(s, "%lu %lu %lu %lu\n", ctime_updates, ctime_swaps, fine_stamps, floor_swaps); return 0; } DEFINE_SHOW_ATTRIBUTE(mgts); static int __init mg_debugfs_init(void) { debugfs_create_file("multigrain_timestamps", S_IFREG | S_IRUGO, NULL, NULL, &mgts_fops); return 0; } late_initcall(mg_debugfs_init); #else /* ! CONFIG_DEBUG_FS */ #define mgtime_counter_inc(__var) do { } while (0) #endif /* CONFIG_DEBUG_FS */ /* * Handle nr_inode sysctl */ #ifdef CONFIG_SYSCTL /* * Statistics gathering.. */ static struct inodes_stat_t inodes_stat; static int proc_nr_inodes(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); inodes_stat.nr_unused = get_nr_inodes_unused(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static const struct ctl_table inodes_sysctls[] = { { .procname = "inode-nr", .data = &inodes_stat, .maxlen = 2*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, { .procname = "inode-state", .data = &inodes_stat, .maxlen = 7*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, }; static int __init init_fs_inode_sysctls(void) { register_sysctl_init("fs", inodes_sysctls); return 0; } early_initcall(init_fs_inode_sysctls); #endif static int no_open(struct inode *inode, struct file *file) { return -ENXIO; } /** * inode_init_always_gfp - perform inode structure initialisation * @sb: superblock inode belongs to * @inode: inode to initialise * @gfp: allocation flags * * These are initializations that need to be done on every inode * allocation as the fields are not initialised by slab allocation. * If there are additional allocations required @gfp is used. */ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp) { static const struct inode_operations empty_iops; static const struct file_operations no_open_fops = {.open = no_open}; struct address_space *const mapping = &inode->i_data; inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; inode->i_state = 0; atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; inode->i_fop = &no_open_fops; inode->i_ino = 0; inode->__i_nlink = 1; inode->i_opflags = 0; if (sb->s_xattr) inode->i_opflags |= IOP_XATTR; if (sb->s_type->fs_flags & FS_MGTIME) inode->i_opflags |= IOP_MGTIME; i_uid_write(inode, 0); i_gid_write(inode, 0); atomic_set(&inode->i_writecount, 0); inode->i_size = 0; inode->i_write_hint = WRITE_LIFE_NOT_SET; inode->i_blocks = 0; inode->i_bytes = 0; inode->i_generation = 0; inode->i_pipe = NULL; inode->i_cdev = NULL; inode->i_link = NULL; inode->i_dir_seq = 0; inode->i_rdev = 0; inode->dirtied_when = 0; #ifdef CONFIG_CGROUP_WRITEBACK inode->i_wb_frn_winner = 0; inode->i_wb_frn_avg_time = 0; inode->i_wb_frn_history = 0; #endif spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); init_rwsem(&inode->i_rwsem); lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key); atomic_set(&inode->i_dio_count, 0); mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); #ifdef CONFIG_READ_ONLY_THP_FOR_FS atomic_set(&mapping->nr_thps, 0); #endif mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->i_private_data = NULL; mapping->writeback_index = 0; init_rwsem(&mapping->invalidate_lock); lockdep_set_class_and_name(&mapping->invalidate_lock, &sb->s_type->invalidate_lock_key, "mapping.invalidate_lock"); if (sb->s_iflags & SB_I_STABLE_WRITES) mapping_set_stable_writes(mapping); inode->i_private = NULL; inode->i_mapping = mapping; INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ #ifdef CONFIG_FS_POSIX_ACL inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; #endif #ifdef CONFIG_FSNOTIFY inode->i_fsnotify_mask = 0; #endif inode->i_flctx = NULL; if (unlikely(security_inode_alloc(inode, gfp))) return -ENOMEM; this_cpu_inc(nr_inodes); return 0; } EXPORT_SYMBOL(inode_init_always_gfp); void free_inode_nonrcu(struct inode *inode) { kmem_cache_free(inode_cachep, inode); } EXPORT_SYMBOL(free_inode_nonrcu); static void i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); if (inode->free_inode) inode->free_inode(inode); else free_inode_nonrcu(inode); } /** * alloc_inode - obtain an inode * @sb: superblock * * Allocates a new inode for given superblock. * Inode wont be chained in superblock s_inodes list * This means : * - fs can't be unmount * - quotas, fsnotify, writeback can't work */ struct inode *alloc_inode(struct super_block *sb) { const struct super_operations *ops = sb->s_op; struct inode *inode; if (ops->alloc_inode) inode = ops->alloc_inode(sb); else inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL); if (!inode) return NULL; if (unlikely(inode_init_always(sb, inode))) { if (ops->destroy_inode) { ops->destroy_inode(inode); if (!ops->free_inode) return NULL; } inode->free_inode = ops->free_inode; i_callback(&inode->i_rcu); return NULL; } return inode; } void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); inode_detach_wb(inode); security_inode_free(inode); fsnotify_inode_delete(inode); locks_free_lock_context(inode); if (!inode->i_nlink) { WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); atomic_long_dec(&inode->i_sb->s_remove_count); } #ifdef CONFIG_FS_POSIX_ACL if (inode->i_acl && !is_uncached_acl(inode->i_acl)) posix_acl_release(inode->i_acl); if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl)) posix_acl_release(inode->i_default_acl); #endif this_cpu_dec(nr_inodes); } EXPORT_SYMBOL(__destroy_inode); static void destroy_inode(struct inode *inode) { const struct super_operations *ops = inode->i_sb->s_op; BUG_ON(!list_empty(&inode->i_lru)); __destroy_inode(inode); if (ops->destroy_inode) { ops->destroy_inode(inode); if (!ops->free_inode) return; } inode->free_inode = ops->free_inode; call_rcu(&inode->i_rcu, i_callback); } /** * drop_nlink - directly drop an inode's link count * @inode: inode * * This is a low-level filesystem helper to replace any * direct filesystem manipulation of i_nlink. In cases * where we are attempting to track writes to the * filesystem, a decrement to zero means an imminent * write when the file is truncated and actually unlinked * on the filesystem. */ void drop_nlink(struct inode *inode) { WARN_ON(inode->i_nlink == 0); inode->__i_nlink--; if (!inode->i_nlink) atomic_long_inc(&inode->i_sb->s_remove_count); } EXPORT_SYMBOL(drop_nlink); /** * clear_nlink - directly zero an inode's link count * @inode: inode * * This is a low-level filesystem helper to replace any * direct filesystem manipulation of i_nlink. See * drop_nlink() for why we care about i_nlink hitting zero. */ void clear_nlink(struct inode *inode) { if (inode->i_nlink) { inode->__i_nlink = 0; atomic_long_inc(&inode->i_sb->s_remove_count); } } EXPORT_SYMBOL(clear_nlink); /** * set_nlink - directly set an inode's link count * @inode: inode * @nlink: new nlink (should be non-zero) * * This is a low-level filesystem helper to replace any * direct filesystem manipulation of i_nlink. */ void set_nlink(struct inode *inode, unsigned int nlink) { if (!nlink) { clear_nlink(inode); } else { /* Yes, some filesystems do change nlink from zero to one */ if (inode->i_nlink == 0) atomic_long_dec(&inode->i_sb->s_remove_count); inode->__i_nlink = nlink; } } EXPORT_SYMBOL(set_nlink); /** * inc_nlink - directly increment an inode's link count * @inode: inode * * This is a low-level filesystem helper to replace any * direct filesystem manipulation of i_nlink. Currently, * it is only here for parity with dec_nlink(). */ void inc_nlink(struct inode *inode) { if (unlikely(inode->i_nlink == 0)) { WARN_ON(!(inode->i_state & I_LINKABLE)); atomic_long_dec(&inode->i_sb->s_remove_count); } inode->__i_nlink++; } EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->i_private_list); spin_lock_init(&mapping->i_private_lock); mapping->i_mmap = RB_ROOT_CACHED; } void address_space_init_once(struct address_space *mapping) { memset(mapping, 0, sizeof(*mapping)); __address_space_init_once(mapping); } EXPORT_SYMBOL(address_space_init_once); /* * These are initializations that only need to be done * once, because the fields are idempotent across use * of the inode, so let the slab aware of that. */ void inode_init_once(struct inode *inode) { memset(inode, 0, sizeof(*inode)); INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_devices); INIT_LIST_HEAD(&inode->i_io_list); INIT_LIST_HEAD(&inode->i_wb_list); INIT_LIST_HEAD(&inode->i_lru); INIT_LIST_HEAD(&inode->i_sb_list); __address_space_init_once(&inode->i_data); i_size_ordered_init(inode); } EXPORT_SYMBOL(inode_init_once); static void init_once(void *foo) { struct inode *inode = (struct inode *) foo; inode_init_once(inode); } /* * get additional reference to inode; caller must already hold one. */ void ihold(struct inode *inode) { WARN_ON(atomic_inc_return(&inode->i_count) < 2); } EXPORT_SYMBOL(ihold); static void __inode_add_lru(struct inode *inode, bool rotate) { if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) return; if (icount_read(inode)) return; if (!(inode->i_sb->s_flags & SB_ACTIVE)) return; if (!mapping_shrinkable(&inode->i_data)) return; if (list_lru_add_obj(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); else if (rotate) inode->i_state |= I_REFERENCED; } struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, struct inode *inode, u32 bit) { void *bit_address; bit_address = inode_state_wait_address(inode, bit); init_wait_var_entry(wqe, bit_address, 0); return __var_waitqueue(bit_address); } EXPORT_SYMBOL(inode_bit_waitqueue); /* * Add inode to LRU if needed (inode is unused and clean). * * Needs inode->i_lock held. */ void inode_add_lru(struct inode *inode) { __inode_add_lru(inode, false); } static void inode_lru_list_del(struct inode *inode) { if (list_lru_del_obj(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_dec(nr_unused); } static void inode_pin_lru_isolating(struct inode *inode) { lockdep_assert_held(&inode->i_lock); WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); inode->i_state |= I_LRU_ISOLATING; } static void inode_unpin_lru_isolating(struct inode *inode) { spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); inode->i_state &= ~I_LRU_ISOLATING; /* Called with inode->i_lock which ensures memory ordering. */ inode_wake_up_bit(inode, __I_LRU_ISOLATING); spin_unlock(&inode->i_lock); } static void inode_wait_for_lru_isolating(struct inode *inode) { struct wait_bit_queue_entry wqe; struct wait_queue_head *wq_head; lockdep_assert_held(&inode->i_lock); if (!(inode->i_state & I_LRU_ISOLATING)) return; wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); for (;;) { prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); /* * Checking I_LRU_ISOLATING with inode->i_lock guarantees * memory ordering. */ if (!(inode->i_state & I_LRU_ISOLATING)) break; spin_unlock(&inode->i_lock); schedule(); spin_lock(&inode->i_lock); } finish_wait(wq_head, &wqe.wq_entry); WARN_ON(inode->i_state & I_LRU_ISOLATING); } /** * inode_sb_list_add - add inode to the superblock list of inodes * @inode: inode to add */ void inode_sb_list_add(struct inode *inode) { struct super_block *sb = inode->i_sb; spin_lock(&sb->s_inode_list_lock); list_add(&inode->i_sb_list, &sb->s_inodes); spin_unlock(&sb->s_inode_list_lock); } EXPORT_SYMBOL_GPL(inode_sb_list_add); static inline void inode_sb_list_del(struct inode *inode) { struct super_block *sb = inode->i_sb; if (!list_empty(&inode->i_sb_list)) { spin_lock(&sb->s_inode_list_lock); list_del_init(&inode->i_sb_list); spin_unlock(&sb->s_inode_list_lock); } } static unsigned long hash(struct super_block *sb, unsigned long hashval) { unsigned long tmp; tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / L1_CACHE_BYTES; tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); return tmp & i_hash_mask; } /** * __insert_inode_hash - hash an inode * @inode: unhashed inode * @hashval: unsigned long value used to locate this object in the * inode_hashtable. * * Add an inode to the inode hash for this superblock. */ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); hlist_add_head_rcu(&inode->i_hash, b); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } EXPORT_SYMBOL(__insert_inode_hash); /** * __remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * * Remove an inode from the superblock. */ void __remove_inode_hash(struct inode *inode) { spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); hlist_del_init_rcu(&inode->i_hash); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } EXPORT_SYMBOL(__remove_inode_hash); void dump_mapping(const struct address_space *mapping) { struct inode *host; const struct address_space_operations *a_ops; struct hlist_node *dentry_first; struct dentry *dentry_ptr; struct dentry dentry; char fname[64] = {}; unsigned long ino; /* * If mapping is an invalid pointer, we don't want to crash * accessing it, so probe everything depending on it carefully. */ if (get_kernel_nofault(host, &mapping->host) || get_kernel_nofault(a_ops, &mapping->a_ops)) { pr_warn("invalid mapping:%px\n", mapping); return; } if (!host) { pr_warn("aops:%ps\n", a_ops); return; } if (get_kernel_nofault(dentry_first, &host->i_dentry.first) || get_kernel_nofault(ino, &host->i_ino)) { pr_warn("aops:%ps invalid inode:%px\n", a_ops, host); return; } if (!dentry_first) { pr_warn("aops:%ps ino:%lx\n", a_ops, ino); return; } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); if (get_kernel_nofault(dentry, dentry_ptr) || !dentry.d_parent || !dentry.d_name.name) { pr_warn("aops:%ps ino:%lx invalid dentry:%px\n", a_ops, ino, dentry_ptr); return; } if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0) strscpy(fname, "<invalid>"); /* * Even if strncpy_from_kernel_nofault() succeeded, * the fname could be unreliable */ pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n", a_ops, ino, fname); } void clear_inode(struct inode *inode) { /* * We have to cycle the i_pages lock here because reclaim can be in the * process of removing the last page (in __filemap_remove_folio()) * and we must not free the mapping under it. */ xa_lock_irq(&inode->i_data.i_pages); BUG_ON(inode->i_data.nrpages); /* * Almost always, mapping_empty(&inode->i_data) here; but there are * two known and long-standing ways in which nodes may get left behind * (when deep radix-tree node allocation failed partway; or when THP * collapse_file() failed). Until those two known cases are cleaned up, * or a cleanup function is called here, do not BUG_ON(!mapping_empty), * nor even WARN_ON(!mapping_empty). */ xa_unlock_irq(&inode->i_data.i_pages); BUG_ON(!list_empty(&inode->i_data.i_private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); BUG_ON(!list_empty(&inode->i_wb_list)); /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } EXPORT_SYMBOL(clear_inode); /* * Free the inode passed in, removing it from the lists it is still connected * to. We remove any pages still attached to the inode and wait for any IO that * is still in progress before finally destroying the inode. * * An inode must already be marked I_FREEING so that we avoid the inode being * moved back onto lists if we race with other code that manipulates the lists * (e.g. writeback_single_inode). The caller is responsible for setting this. * * An inode must already be removed from the LRU list before being evicted from * the cache. This should occur atomically with setting the I_FREEING state * flag, so no inodes here should ever be on the LRU when being evicted. */ static void evict(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!list_empty(&inode->i_lru)); if (!list_empty(&inode->i_io_list)) inode_io_list_del(inode); inode_sb_list_del(inode); spin_lock(&inode->i_lock); inode_wait_for_lru_isolating(inode); /* * Wait for flusher thread to be done with the inode so that filesystem * does not start destroying it while writeback is still running. Since * the inode has I_FREEING set, flusher thread won't start new work on * the inode. We just have to wait for running writeback to finish. */ inode_wait_for_writeback(inode); spin_unlock(&inode->i_lock); if (op->evict_inode) { op->evict_inode(inode); } else { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); } if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); remove_inode_hash(inode); /* * Wake up waiters in __wait_on_freeing_inode(). * * It is an invariant that any thread we need to wake up is already * accounted for before remove_inode_hash() acquires ->i_lock -- both * sides take the lock and sleep is aborted if the inode is found * unhashed. Thus either the sleeper wins and goes off CPU, or removal * wins and the sleeper aborts after testing with the lock. * * This also means we don't need any fences for the call below. */ inode_wake_up_bit(inode, __I_NEW); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); destroy_inode(inode); } /* * dispose_list - dispose of the contents of a local list * @head: the head of the list to free * * Dispose-list gets a local list with local inodes in it, so it doesn't * need to worry about list corruption and SMP locks. */ static void dispose_list(struct list_head *head) { while (!list_empty(head)) { struct inode *inode; inode = list_first_entry(head, struct inode, i_lru); list_del_init(&inode->i_lru); evict(inode); cond_resched(); } } /** * evict_inodes - evict all evictable inodes for a superblock * @sb: superblock to operate on * * Make sure that no inodes with zero refcount are retained. This is * called by superblock shutdown after having SB_ACTIVE flag removed, * so any inode reaching zero refcount during or after that call will * be immediately evicted. */ void evict_inodes(struct super_block *sb) { struct inode *inode; LIST_HEAD(dispose); again: spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (icount_read(inode)) continue; spin_lock(&inode->i_lock); if (icount_read(inode)) { spin_unlock(&inode->i_lock); continue; } if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { spin_unlock(&inode->i_lock); continue; } inode->i_state |= I_FREEING; inode_lru_list_del(inode); spin_unlock(&inode->i_lock); list_add(&inode->i_lru, &dispose); /* * We can have a ton of inodes to evict at unmount time given * enough memory, check to see if we need to go to sleep for a * bit so we don't livelock. */ if (need_resched()) { spin_unlock(&sb->s_inode_list_lock); cond_resched(); dispose_list(&dispose); goto again; } } spin_unlock(&sb->s_inode_list_lock); dispose_list(&dispose); } EXPORT_SYMBOL_GPL(evict_inodes); /* * Isolate the inode from the LRU in preparation for freeing it. * * If the inode has the I_REFERENCED flag set, then it means that it has been * used recently - the flag is set in iput_final(). When we encounter such an * inode, clear the flag and move it to the back of the LRU so it gets another * pass through the LRU before it gets reclaimed. This is necessary because of * the fact we are doing lazy LRU updates to minimise lock contention so the * LRU does not have strict ordering. Hence we don't want to reclaim inodes * with this flag set because they are the inodes that are out of order. */ static enum lru_status inode_lru_isolate(struct list_head *item, struct list_lru_one *lru, void *arg) { struct list_head *freeable = arg; struct inode *inode = container_of(item, struct inode, i_lru); /* * We are inverting the lru lock/inode->i_lock here, so use a * trylock. If we fail to get the lock, just skip it. */ if (!spin_trylock(&inode->i_lock)) return LRU_SKIP; /* * Inodes can get referenced, redirtied, or repopulated while * they're already on the LRU, and this can make them * unreclaimable for a while. Remove them lazily here; iput, * sync, or the last page cache deletion will requeue them. */ if (icount_read(inode) || (inode->i_state & ~I_REFERENCED) || !mapping_shrinkable(&inode->i_data)) { list_lru_isolate(lru, &inode->i_lru); spin_unlock(&inode->i_lock); this_cpu_dec(nr_unused); return LRU_REMOVED; } /* Recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; } /* * On highmem systems, mapping_shrinkable() permits dropping * page cache in order to free up struct inodes: lowmem might * be under pressure before the cache inside the highmem zone. */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { inode_pin_lru_isolating(inode); spin_unlock(&inode->i_lock); spin_unlock(&lru->lock); if (remove_inode_buffers(inode)) { unsigned long reap; reap = invalidate_mapping_pages(&inode->i_data, 0, -1); if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else __count_vm_events(PGINODESTEAL, reap); mm_account_reclaimed_pages(reap); } inode_unpin_lru_isolating(inode); return LRU_RETRY; } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; list_lru_isolate_move(lru, &inode->i_lru, freeable); spin_unlock(&inode->i_lock); this_cpu_dec(nr_unused); return LRU_REMOVED; } /* * Walk the superblock inode LRU for freeable inodes and attempt to free them. * This is called from the superblock shrinker function with a number of inodes * to trim from the LRU. Inodes to be freed are moved to a temporary list and * then are freed outside inode_lock by dispose_list(). */ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) { LIST_HEAD(freeable); long freed; freed = list_lru_shrink_walk(&sb->s_inode_lru, sc, inode_lru_isolate, &freeable); dispose_list(&freeable); return freed; } static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked); /* * Called with the inode lock held. */ static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data, bool is_inode_hash_locked) { struct inode *inode = NULL; if (is_inode_hash_locked) lockdep_assert_held(&inode_hash_lock); else lockdep_assert_not_held(&inode_hash_lock); rcu_read_lock(); repeat: hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_sb != sb) continue; if (!test(inode, data)) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { spin_unlock(&inode->i_lock); rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); spin_unlock(&inode->i_lock); rcu_read_unlock(); return inode; } rcu_read_unlock(); return NULL; } /* * find_inode_fast is the fast path version of find_inode, see the comment at * iget_locked for details. */ static struct inode *find_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino, bool is_inode_hash_locked) { struct inode *inode = NULL; if (is_inode_hash_locked) lockdep_assert_held(&inode_hash_lock); else lockdep_assert_not_held(&inode_hash_lock); rcu_read_lock(); repeat: hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { spin_unlock(&inode->i_lock); rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); spin_unlock(&inode->i_lock); rcu_read_unlock(); return inode; } rcu_read_unlock(); return NULL; } /* * Each cpu owns a range of LAST_INO_BATCH numbers. * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, * to renew the exhausted range. * * This does not significantly increase overflow rate because every CPU can * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the * 2^32 range, and is a worst-case. Even a 50% wastage would only increase * overflow rate by 2x, which does not seem too significant. * * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW * error if st_ino won't fit in target struct field. Use 32bit counter * here to attempt to avoid that. */ #define LAST_INO_BATCH 1024 static DEFINE_PER_CPU(unsigned int, last_ino); unsigned int get_next_ino(void) { unsigned int *p = &get_cpu_var(last_ino); unsigned int res = *p; #ifdef CONFIG_SMP if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { static atomic_t shared_last_ino; int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); res = next - LAST_INO_BATCH; } #endif res++; /* get_next_ino should not provide a 0 inode number */ if (unlikely(!res)) res++; *p = res; put_cpu_var(last_ino); return res; } EXPORT_SYMBOL(get_next_ino); /** * new_inode - obtain an inode * @sb: superblock * * Allocates a new inode for given superblock. The default gfp_mask * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. * If HIGHMEM pages are unsuitable or it is known that pages allocated * for the page cache are not reclaimable or migratable, * mapping_set_gfp_mask() must be called with suitable flags on the * newly created inode's mapping * */ struct inode *new_inode(struct super_block *sb) { struct inode *inode; inode = alloc_inode(sb); if (inode) inode_sb_list_add(inode); return inode; } EXPORT_SYMBOL(new_inode); #ifdef CONFIG_DEBUG_LOCK_ALLOC void lockdep_annotate_inode_mutex_key(struct inode *inode) { if (S_ISDIR(inode->i_mode)) { struct file_system_type *type = inode->i_sb->s_type; /* Set new key only if filesystem hasn't already changed it */ if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) { /* * ensure nobody is actually holding i_rwsem */ init_rwsem(&inode->i_rwsem); lockdep_set_class(&inode->i_rwsem, &type->i_mutex_dir_key); } } } EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key); #endif /** * unlock_new_inode - clear the I_NEW state and wake up any waiters * @inode: new inode to unlock * * Called when the inode is fully initialised to clear the new state of the * inode and wake up anyone waiting for the inode to finish initialisation. */ void unlock_new_inode(struct inode *inode) { lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW & ~I_CREATING; /* * Pairs with the barrier in prepare_to_wait_event() to make sure * ___wait_var_event() either sees the bit cleared or * waitqueue_active() check in wake_up_var() sees the waiter. */ smp_mb(); inode_wake_up_bit(inode, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(unlock_new_inode); void discard_new_inode(struct inode *inode) { lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW; /* * Pairs with the barrier in prepare_to_wait_event() to make sure * ___wait_var_event() either sees the bit cleared or * waitqueue_active() check in wake_up_var() sees the waiter. */ smp_mb(); inode_wake_up_bit(inode, __I_NEW); spin_unlock(&inode->i_lock); iput(inode); } EXPORT_SYMBOL(discard_new_inode); /** * lock_two_nondirectories - take two i_mutexes on non-directory objects * * Lock any non-NULL argument. Passed objects must not be directories. * Zero, one or two objects may be locked by this function. * * @inode1: first inode to lock * @inode2: second inode to lock */ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) { if (inode1) WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); if (inode2) WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); if (inode1 > inode2) swap(inode1, inode2); if (inode1) inode_lock(inode1); if (inode2 && inode2 != inode1) inode_lock_nested(inode2, I_MUTEX_NONDIR2); } EXPORT_SYMBOL(lock_two_nondirectories); /** * unlock_two_nondirectories - release locks from lock_two_nondirectories() * @inode1: first inode to unlock * @inode2: second inode to unlock */ void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) { if (inode1) { WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); inode_unlock(inode1); } if (inode2 && inode2 != inode1) { WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); inode_unlock(inode2); } } EXPORT_SYMBOL(unlock_two_nondirectories); /** * inode_insert5 - obtain an inode from a mounted file system * @inode: pre-allocated inode to use for insert to cache * @hashval: hash value (usually inode number) to get * @test: callback used for comparisons between inodes * @set: callback used to initialize a new struct inode * @data: opaque data pointer to pass to @test and @set * * Search for the inode specified by @hashval and @data in the inode cache, * and if present return it with an increased reference count. This is a * variant of iget5_locked() that doesn't allocate an inode. * * If the inode is not present in the cache, insert the pre-allocated inode and * return it locked, hashed, and with the I_NEW flag set. The file system gets * to fill it in before unlocking it via unlock_new_inode(). * * Note that both @test and @set are called with the inode_hash_lock held, so * they can't sleep. */ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); struct inode *old; might_sleep(); again: spin_lock(&inode_hash_lock); old = find_inode(inode->i_sb, head, test, data, true); if (unlikely(old)) { /* * Uhhuh, somebody else created the same inode under us. * Use the old inode instead of the preallocated one. */ spin_unlock(&inode_hash_lock); if (IS_ERR(old)) return NULL; wait_on_inode(old); if (unlikely(inode_unhashed(old))) { iput(old); goto again; } return old; } if (set && unlikely(set(inode, data))) { spin_unlock(&inode_hash_lock); return NULL; } /* * Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents */ spin_lock(&inode->i_lock); inode->i_state |= I_NEW; hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); /* * Add inode to the sb list if it's not already. It has I_NEW at this * point, so it should be safe to test i_sb_list locklessly. */ if (list_empty(&inode->i_sb_list)) inode_sb_list_add(inode); return inode; } EXPORT_SYMBOL(inode_insert5); /** * iget5_locked - obtain an inode from a mounted file system * @sb: super block of file system * @hashval: hash value (usually inode number) to get * @test: callback used for comparisons between inodes * @set: callback used to initialize a new struct inode * @data: opaque data pointer to pass to @test and @set * * Search for the inode specified by @hashval and @data in the inode cache, * and if present return it with an increased reference count. This is a * generalized version of iget_locked() for file systems where the inode * number is not sufficient for unique identification of an inode. * * If the inode is not present in the cache, allocate and insert a new inode * and return it locked, hashed, and with the I_NEW flag set. The file system * gets to fill it in before unlocking it via unlock_new_inode(). * * Note that both @test and @set are called with the inode_hash_lock held, so * they can't sleep. */ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { struct inode *inode = ilookup5(sb, hashval, test, data); if (!inode) { struct inode *new = alloc_inode(sb); if (new) { inode = inode_insert5(new, hashval, test, set, data); if (unlikely(inode != new)) destroy_inode(new); } } return inode; } EXPORT_SYMBOL(iget5_locked); /** * iget5_locked_rcu - obtain an inode from a mounted file system * @sb: super block of file system * @hashval: hash value (usually inode number) to get * @test: callback used for comparisons between inodes * @set: callback used to initialize a new struct inode * @data: opaque data pointer to pass to @test and @set * * This is equivalent to iget5_locked, except the @test callback must * tolerate the inode not being stable, including being mid-teardown. */ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode, *new; might_sleep(); again: inode = find_inode(sb, head, test, data, false); if (inode) { if (IS_ERR(inode)) return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; } return inode; } new = alloc_inode(sb); if (new) { inode = inode_insert5(new, hashval, test, set, data); if (unlikely(inode != new)) destroy_inode(new); } return inode; } EXPORT_SYMBOL_GPL(iget5_locked_rcu); /** * iget_locked - obtain an inode from a mounted file system * @sb: super block of file system * @ino: inode number to get * * Search for the inode specified by @ino in the inode cache and if present * return it with an increased reference count. This is for file systems * where the inode number is sufficient for unique identification of an inode. * * If the inode is not in cache, allocate a new inode and return it locked, * hashed, and with the I_NEW flag set. The file system gets to fill it in * before unlocking it via unlock_new_inode(). */ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; might_sleep(); again: inode = find_inode_fast(sb, head, ino, false); if (inode) { if (IS_ERR(inode)) return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; } return inode; } inode = alloc_inode(sb); if (inode) { struct inode *old; spin_lock(&inode_hash_lock); /* We released the lock, so.. */ old = find_inode_fast(sb, head, ino, true); if (!old) { inode->i_ino = ino; spin_lock(&inode->i_lock); inode->i_state = I_NEW; hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); inode_sb_list_add(inode); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents */ return inode; } /* * Uhhuh, somebody else created the same inode under * us. Use the old inode instead of the one we just * allocated. */ spin_unlock(&inode_hash_lock); destroy_inode(inode); if (IS_ERR(old)) return NULL; inode = old; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; } } return inode; } EXPORT_SYMBOL(iget_locked); /* * search the inode cache for a matching inode number. * If we find one, then the inode number we are trying to * allocate is not unique and so we should not use it. * * Returns 1 if the inode number is unique, 0 if it is not. */ static int test_inode_iunique(struct super_block *sb, unsigned long ino) { struct hlist_head *b = inode_hashtable + hash(sb, ino); struct inode *inode; hlist_for_each_entry_rcu(inode, b, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb) return 0; } return 1; } /** * iunique - get a unique inode number * @sb: superblock * @max_reserved: highest reserved inode number * * Obtain an inode number that is unique on the system for a given * superblock. This is used by file systems that have no natural * permanent inode numbering system. An inode number is returned that * is higher than the reserved limit but unique. * * BUGS: * With a large number of inodes live on the file system this function * currently becomes quite slow. */ ino_t iunique(struct super_block *sb, ino_t max_reserved) { /* * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW * error if st_ino won't fit in target struct field. Use 32bit counter * here to attempt to avoid that. */ static DEFINE_SPINLOCK(iunique_lock); static unsigned int counter; ino_t res; rcu_read_lock(); spin_lock(&iunique_lock); do { if (counter <= max_reserved) counter = max_reserved + 1; res = counter++; } while (!test_inode_iunique(sb, res)); spin_unlock(&iunique_lock); rcu_read_unlock(); return res; } EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode->i_lock); if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { __iget(inode); spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); /* * Handle the case where s_op->clear_inode is not been * called yet, and somebody is calling igrab * while the inode is getting freed. */ inode = NULL; } return inode; } EXPORT_SYMBOL(igrab); /** * ilookup5_nowait - search for an inode in the inode cache * @sb: super block of file system to search * @hashval: hash value (usually inode number) to search for * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * * Search for the inode specified by @hashval and @data in the inode cache. * If the inode is in the cache, the inode is returned with an incremented * reference count. * * Note: I_NEW is not waited upon so you have to be very careful what you do * with the returned inode. You probably should be using ilookup5() instead. * * Note2: @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; spin_lock(&inode_hash_lock); inode = find_inode(sb, head, test, data, true); spin_unlock(&inode_hash_lock); return IS_ERR(inode) ? NULL : inode; } EXPORT_SYMBOL(ilookup5_nowait); /** * ilookup5 - search for an inode in the inode cache * @sb: super block of file system to search * @hashval: hash value (usually inode number) to search for * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * * Search for the inode specified by @hashval and @data in the inode cache, * and if the inode is in the cache, return the inode with an incremented * reference count. Waits on I_NEW before returning the inode. * returned with an incremented reference count. * * This is a generalized version of ilookup() for file systems where the * inode number is not sufficient for unique identification of an inode. * * Note: @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct inode *inode; might_sleep(); again: inode = ilookup5_nowait(sb, hashval, test, data); if (inode) { wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; } } return inode; } EXPORT_SYMBOL(ilookup5); /** * ilookup - search for an inode in the inode cache * @sb: super block of file system to search * @ino: inode number to search for * * Search for the inode @ino in the inode cache, and if the inode is in the * cache, the inode is returned with an incremented reference count. */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; might_sleep(); again: inode = find_inode_fast(sb, head, ino, false); if (inode) { if (IS_ERR(inode)) return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; } } return inode; } EXPORT_SYMBOL(ilookup); /** * find_inode_nowait - find an inode in the inode cache * @sb: super block of file system to search * @hashval: hash value (usually inode number) to search for * @match: callback used for comparisons between inodes * @data: opaque data pointer to pass to @match * * Search for the inode specified by @hashval and @data in the inode * cache, where the helper function @match will return 0 if the inode * does not match, 1 if the inode does match, and -1 if the search * should be stopped. The @match function must be responsible for * taking the i_lock spin_lock and checking i_state for an inode being * freed or being initialized, and incrementing the reference count * before returning 1. It also must not sleep, since it is called with * the inode_hash_lock spinlock held. * * This is a even more generalized version of ilookup5() when the * function must never block --- find_inode() can block in * __wait_on_freeing_inode() --- or when the caller can not increment * the reference count because the resulting iput() might cause an * inode eviction. The tradeoff is that the @match funtion must be * very carefully implemented. */ struct inode *find_inode_nowait(struct super_block *sb, unsigned long hashval, int (*match)(struct inode *, unsigned long, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode, *ret_inode = NULL; int mval; spin_lock(&inode_hash_lock); hlist_for_each_entry(inode, head, i_hash) { if (inode->i_sb != sb) continue; mval = match(inode, hashval, data); if (mval == 0) continue; if (mval == 1) ret_inode = inode; goto out; } out: spin_unlock(&inode_hash_lock); return ret_inode; } EXPORT_SYMBOL(find_inode_nowait); /** * find_inode_rcu - find an inode in the inode cache * @sb: Super block of file system to search * @hashval: Key to hash * @test: Function to test match on an inode * @data: Data for test function * * Search for the inode specified by @hashval and @data in the inode cache, * where the helper function @test will return 0 if the inode does not match * and 1 if it does. The @test function must be responsible for taking the * i_lock spin_lock and checking i_state for an inode being freed or being * initialized. * * If successful, this will return the inode for which the @test function * returned 1 and NULL otherwise. * * The @test function is not permitted to take a ref on any inode presented. * It is also not permitted to sleep. * * The caller must hold the RCU read lock. */ struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious find_inode_rcu() usage"); hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_sb == sb && !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) && test(inode, data)) return inode; } return NULL; } EXPORT_SYMBOL(find_inode_rcu); /** * find_inode_by_ino_rcu - Find an inode in the inode cache * @sb: Super block of file system to search * @ino: The inode number to match * * Search for the inode specified by @hashval and @data in the inode cache, * where the helper function @test will return 0 if the inode does not match * and 1 if it does. The @test function must be responsible for taking the * i_lock spin_lock and checking i_state for an inode being freed or being * initialized. * * If successful, this will return the inode for which the @test function * returned 1 and NULL otherwise. * * The @test function is not permitted to take a ref on any inode presented. * It is also not permitted to sleep. * * The caller must hold the RCU read lock. */ struct inode *find_inode_by_ino_rcu(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious find_inode_by_ino_rcu() usage"); hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb && !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE))) return inode; } return NULL; } EXPORT_SYMBOL(find_inode_by_ino_rcu); int insert_inode_locked(struct inode *inode) { struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; struct hlist_head *head = inode_hashtable + hash(sb, ino); might_sleep(); while (1) { struct inode *old = NULL; spin_lock(&inode_hash_lock); hlist_for_each_entry(old, head, i_hash) { if (old->i_ino != ino) continue; if (old->i_sb != sb) continue; spin_lock(&old->i_lock); if (old->i_state & (I_FREEING|I_WILL_FREE)) { spin_unlock(&old->i_lock); continue; } break; } if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW | I_CREATING; hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); return 0; } if (unlikely(old->i_state & I_CREATING)) { spin_unlock(&old->i_lock); spin_unlock(&inode_hash_lock); return -EBUSY; } __iget(old); spin_unlock(&old->i_lock); spin_unlock(&inode_hash_lock); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; } iput(old); } } EXPORT_SYMBOL(insert_inode_locked); int insert_inode_locked4(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct inode *old; might_sleep(); inode->i_state |= I_CREATING; old = inode_insert5(inode, hashval, test, NULL, data); if (old != inode) { iput(old); return -EBUSY; } return 0; } EXPORT_SYMBOL(insert_inode_locked4); int inode_just_drop(struct inode *inode) { return 1; } EXPORT_SYMBOL(inode_just_drop); /* * Called when we're dropping the last reference * to an inode. * * Call the FS "drop_inode()" function, defaulting to * the legacy UNIX filesystem behaviour. If it tells * us to evict inode, do so. Otherwise, retain inode * in cache if fs is alive, sync and evict if fs is * shutting down. */ static void iput_final(struct inode *inode) { struct super_block *sb = inode->i_sb; const struct super_operations *op = inode->i_sb->s_op; unsigned long state; int drop; WARN_ON(inode->i_state & I_NEW); if (op->drop_inode) drop = op->drop_inode(inode); else drop = inode_generic_drop(inode); if (!drop && !(inode->i_state & I_DONTCACHE) && (sb->s_flags & SB_ACTIVE)) { __inode_add_lru(inode, true); spin_unlock(&inode->i_lock); return; } state = inode->i_state; if (!drop) { WRITE_ONCE(inode->i_state, state | I_WILL_FREE); spin_unlock(&inode->i_lock); write_inode_now(inode, 1); spin_lock(&inode->i_lock); state = inode->i_state; WARN_ON(state & I_NEW); state &= ~I_WILL_FREE; } WRITE_ONCE(inode->i_state, state | I_FREEING); if (!list_empty(&inode->i_lru)) inode_lru_list_del(inode); spin_unlock(&inode->i_lock); evict(inode); } /** * iput - put an inode * @inode: inode to put * * Puts an inode, dropping its usage count. If the inode use count hits * zero, the inode is then freed and may also be destroyed. * * Consequently, iput() can sleep. */ void iput(struct inode *inode) { might_sleep(); if (unlikely(!inode)) return; retry: lockdep_assert_not_held(&inode->i_lock); VFS_BUG_ON_INODE(inode->i_state & I_CLEAR, inode); /* * Note this assert is technically racy as if the count is bogusly * equal to one, then two CPUs racing to further drop it can both * conclude it's fine. */ VFS_BUG_ON_INODE(atomic_read(&inode->i_count) < 1, inode); if (atomic_add_unless(&inode->i_count, -1, 1)) return; if ((inode->i_state & I_DIRTY_TIME) && inode->i_nlink) { trace_writeback_lazytime_iput(inode); mark_inode_dirty_sync(inode); goto retry; } spin_lock(&inode->i_lock); if (unlikely((inode->i_state & I_DIRTY_TIME) && inode->i_nlink)) { spin_unlock(&inode->i_lock); goto retry; } if (!atomic_dec_and_test(&inode->i_count)) { spin_unlock(&inode->i_lock); return; } /* * iput_final() drops ->i_lock, we can't assert on it as the inode may * be deallocated by the time the call returns. */ iput_final(inode); } EXPORT_SYMBOL(iput); #ifdef CONFIG_BLOCK /** * bmap - find a block number in a file * @inode: inode owning the block number being requested * @block: pointer containing the block to find * * Replaces the value in ``*block`` with the block number on the device holding * corresponding to the requested block number in the file. * That is, asked for block 4 of inode 1 the function will replace the * 4 in ``*block``, with disk block relative to the disk start that holds that * block of the file. * * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a * hole, returns 0 and ``*block`` is also set to 0. */ int bmap(struct inode *inode, sector_t *block) { if (!inode->i_mapping->a_ops->bmap) return -EINVAL; *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block); return 0; } EXPORT_SYMBOL(bmap); #endif /* * With relative atime, only update atime if the previous atime is * earlier than or equal to either the ctime or mtime, * or if at least a day has passed since the last atime update. */ static bool relatime_need_update(struct vfsmount *mnt, struct inode *inode, struct timespec64 now) { struct timespec64 atime, mtime, ctime; if (!(mnt->mnt_flags & MNT_RELATIME)) return true; /* * Is mtime younger than or equal to atime? If yes, update atime: */ atime = inode_get_atime(inode); mtime = inode_get_mtime(inode); if (timespec64_compare(&mtime, &atime) >= 0) return true; /* * Is ctime younger than or equal to atime? If yes, update atime: */ ctime = inode_get_ctime(inode); if (timespec64_compare(&ctime, &atime) >= 0) return true; /* * Is the previous atime value older than a day? If yes, * update atime: */ if ((long)(now.tv_sec - atime.tv_sec) >= 24*60*60) return true; /* * Good, we can skip the atime update: */ return false; } /** * inode_update_timestamps - update the timestamps on the inode * @inode: inode to be updated * @flags: S_* flags that needed to be updated * * The update_time function is called when an inode's timestamps need to be * updated for a read or write operation. This function handles updating the * actual timestamps. It's up to the caller to ensure that the inode is marked * dirty appropriately. * * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated, * attempt to update all three of them. S_ATIME updates can be handled * independently of the rest. * * Returns a set of S_* flags indicating which values changed. */ int inode_update_timestamps(struct inode *inode, int flags) { int updated = 0; struct timespec64 now; if (flags & (S_MTIME|S_CTIME|S_VERSION)) { struct timespec64 ctime = inode_get_ctime(inode); struct timespec64 mtime = inode_get_mtime(inode); now = inode_set_ctime_current(inode); if (!timespec64_equal(&now, &ctime)) updated |= S_CTIME; if (!timespec64_equal(&now, &mtime)) { inode_set_mtime_to_ts(inode, now); updated |= S_MTIME; } if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated)) updated |= S_VERSION; } else { now = current_time(inode); } if (flags & S_ATIME) { struct timespec64 atime = inode_get_atime(inode); if (!timespec64_equal(&now, &atime)) { inode_set_atime_to_ts(inode, now); updated |= S_ATIME; } } return updated; } EXPORT_SYMBOL(inode_update_timestamps); /** * generic_update_time - update the timestamps on the inode * @inode: inode to be updated * @flags: S_* flags that needed to be updated * * The update_time function is called when an inode's timestamps need to be * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME, * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME * updates can be handled done independently of the rest. * * Returns a S_* mask indicating which fields were updated. */ int generic_update_time(struct inode *inode, int flags) { int updated = inode_update_timestamps(inode, flags); int dirty_flags = 0; if (updated & (S_ATIME|S_MTIME|S_CTIME)) dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC; if (updated & S_VERSION) dirty_flags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, dirty_flags); return updated; } EXPORT_SYMBOL(generic_update_time); /* * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ int inode_update_time(struct inode *inode, int flags) { if (inode->i_op->update_time) return inode->i_op->update_time(inode, flags); generic_update_time(inode, flags); return 0; } EXPORT_SYMBOL(inode_update_time); /** * atime_needs_update - update the access time * @path: the &struct path to update * @inode: inode to update * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ bool atime_needs_update(const struct path *path, struct inode *inode) { struct vfsmount *mnt = path->mnt; struct timespec64 now, atime; if (inode->i_flags & S_NOATIME) return false; /* Atime updates will likely cause i_uid and i_gid to be written * back improprely if their true value is unknown to the vfs. */ if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode)) return false; if (IS_NOATIME(inode)) return false; if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)) return false; if (mnt->mnt_flags & MNT_NOATIME) return false; if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) return false; now = current_time(inode); if (!relatime_need_update(mnt, inode, now)) return false; atime = inode_get_atime(inode); if (timespec64_equal(&atime, &now)) return false; return true; } void touch_atime(const struct path *path) { struct vfsmount *mnt = path->mnt; struct inode *inode = d_inode(path->dentry); if (!atime_needs_update(path, inode)) return; if (!sb_start_write_trylock(inode->i_sb)) return; if (mnt_get_write_access(mnt) != 0) goto skip_update; /* * File systems can error out when updating inodes if they need to * allocate new space to modify an inode (such is the case for * Btrfs), but since we touch atime while walking down the path we * really don't care if we failed to update the atime of the file, * so just ignore the return value. * We may also fail on filesystems that have the ability to make parts * of the fs read only, e.g. subvolumes in Btrfs. */ inode_update_time(inode, S_ATIME); mnt_put_write_access(mnt); skip_update: sb_end_write(inode->i_sb); } EXPORT_SYMBOL(touch_atime); /* * Return mask of changes for notify_change() that need to be done as a * response to write or truncate. Return 0 if nothing has to be changed. * Negative value on error (change should be denied). */ int dentry_needs_remove_privs(struct mnt_idmap *idmap, struct dentry *dentry) { struct inode *inode = d_inode(dentry); int mask = 0; int ret; if (IS_NOSEC(inode)) return 0; mask = setattr_should_drop_suidgid(idmap, inode); ret = security_inode_need_killpriv(dentry); if (ret < 0) return ret; if (ret) mask |= ATTR_KILL_PRIV; return mask; } static int __remove_privs(struct mnt_idmap *idmap, struct dentry *dentry, int kill) { struct iattr newattrs; newattrs.ia_valid = ATTR_FORCE | kill; /* * Note we call this on write, so notify_change will not * encounter any conflicting delegations: */ return notify_change(idmap, dentry, &newattrs, NULL); } static int file_remove_privs_flags(struct file *file, unsigned int flags) { struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); int error = 0; int kill; if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode)) return 0; kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry); if (kill < 0) return kill; if (kill) { if (flags & IOCB_NOWAIT) return -EAGAIN; error = __remove_privs(file_mnt_idmap(file), dentry, kill); } if (!error) inode_has_no_xattr(inode); return error; } /** * file_remove_privs - remove special file privileges (suid, capabilities) * @file: file to remove privileges from * * When file is modified by a write or truncation ensure that special * file privileges are removed. * * Return: 0 on success, negative errno on failure. */ int file_remove_privs(struct file *file) { return file_remove_privs_flags(file, 0); } EXPORT_SYMBOL(file_remove_privs); /** * current_time - Return FS time (possibly fine-grained) * @inode: inode. * * Return the current time truncated to the time granularity supported by * the fs, as suitable for a ctime/mtime change. If the ctime is flagged * as having been QUERIED, get a fine-grained timestamp, but don't update * the floor. * * For a multigrain inode, this is effectively an estimate of the timestamp * that a file would receive. An actual update must go through * inode_set_ctime_current(). */ struct timespec64 current_time(struct inode *inode) { struct timespec64 now; u32 cns; ktime_get_coarse_real_ts64_mg(&now); if (!is_mgtime(inode)) goto out; /* If nothing has queried it, then coarse time is fine */ cns = smp_load_acquire(&inode->i_ctime_nsec); if (cns & I_CTIME_QUERIED) { /* * If there is no apparent change, then get a fine-grained * timestamp. */ if (now.tv_nsec == (cns & ~I_CTIME_QUERIED)) ktime_get_real_ts64(&now); } out: return timestamp_truncate(now, inode); } EXPORT_SYMBOL(current_time); static int inode_needs_update_time(struct inode *inode) { struct timespec64 now, ts; int sync_it = 0; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return 0; now = current_time(inode); ts = inode_get_mtime(inode); if (!timespec64_equal(&ts, &now)) sync_it |= S_MTIME; ts = inode_get_ctime(inode); if (!timespec64_equal(&ts, &now)) sync_it |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) sync_it |= S_VERSION; return sync_it; } static int __file_update_time(struct file *file, int sync_mode) { int ret = 0; struct inode *inode = file_inode(file); /* try to update time settings */ if (!mnt_get_write_access_file(file)) { ret = inode_update_time(inode, sync_mode); mnt_put_write_access_file(file); } return ret; } /** * file_update_time - update mtime and ctime time * @file: file accessed * * Update the mtime and ctime members of an inode and mark the inode for * writeback. Note that this function is meant exclusively for usage in * the file write path of filesystems, and filesystems may choose to * explicitly ignore updates via this function with the _NOCMTIME inode * flag, e.g. for network filesystem where these imestamps are handled * by the server. This can return an error for file systems who need to * allocate space in order to update an inode. * * Return: 0 on success, negative errno on failure. */ int file_update_time(struct file *file) { int ret; struct inode *inode = file_inode(file); ret = inode_needs_update_time(inode); if (ret <= 0) return ret; return __file_update_time(file, ret); } EXPORT_SYMBOL(file_update_time); /** * file_modified_flags - handle mandated vfs changes when modifying a file * @file: file that was modified * @flags: kiocb flags * * When file has been modified ensure that special * file privileges are removed and time settings are updated. * * If IOCB_NOWAIT is set, special file privileges will not be removed and * time settings will not be updated. It will return -EAGAIN. * * Context: Caller must hold the file's inode lock. * * Return: 0 on success, negative errno on failure. */ static int file_modified_flags(struct file *file, int flags) { int ret; struct inode *inode = file_inode(file); /* * Clear the security bits if the process is not being run by root. * This keeps people from modifying setuid and setgid binaries. */ ret = file_remove_privs_flags(file, flags); if (ret) return ret; if (unlikely(file->f_mode & FMODE_NOCMTIME)) return 0; ret = inode_needs_update_time(inode); if (ret <= 0) return ret; if (flags & IOCB_NOWAIT) return -EAGAIN; return __file_update_time(file, ret); } /** * file_modified - handle mandated vfs changes when modifying a file * @file: file that was modified * * When file has been modified ensure that special * file privileges are removed and time settings are updated. * * Context: Caller must hold the file's inode lock. * * Return: 0 on success, negative errno on failure. */ int file_modified(struct file *file) { return file_modified_flags(file, 0); } EXPORT_SYMBOL(file_modified); /** * kiocb_modified - handle mandated vfs changes when modifying a file * @iocb: iocb that was modified * * When file has been modified ensure that special * file privileges are removed and time settings are updated. * * Context: Caller must hold the file's inode lock. * * Return: 0 on success, negative errno on failure. */ int kiocb_modified(struct kiocb *iocb) { return file_modified_flags(iocb->ki_filp, iocb->ki_flags); } EXPORT_SYMBOL_GPL(kiocb_modified); int inode_needs_sync(struct inode *inode) { if (IS_SYNC(inode)) return 1; if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) return 1; return 0; } EXPORT_SYMBOL(inode_needs_sync); /* * If we try to find an inode in the inode hash while it is being * deleted, we have to wait until the filesystem completes its * deletion before reporting that it isn't found. This function waits * until the deletion _might_ have completed. Callers are responsible * to recheck inode state. * * It doesn't matter if I_NEW is not set initially, a call to * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list * will DTRT. */ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked) { struct wait_bit_queue_entry wqe; struct wait_queue_head *wq_head; /* * Handle racing against evict(), see that routine for more details. */ if (unlikely(inode_unhashed(inode))) { WARN_ON(is_inode_hash_locked); spin_unlock(&inode->i_lock); return; } wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW); prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); rcu_read_unlock(); if (is_inode_hash_locked) spin_unlock(&inode_hash_lock); schedule(); finish_wait(wq_head, &wqe.wq_entry); if (is_inode_hash_locked) spin_lock(&inode_hash_lock); rcu_read_lock(); } static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { if (!str) return 0; ihash_entries = simple_strtoul(str, &str, 0); return 1; } __setup("ihash_entries=", set_ihash_entries); /* * Initialize the waitqueues and inode hash table. */ void __init inode_init_early(void) { /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ if (hashdist) return; inode_hashtable = alloc_large_system_hash("Inode-cache", sizeof(struct hlist_head), ihash_entries, 14, HASH_EARLY | HASH_ZERO, &i_hash_shift, &i_hash_mask, 0, 0); } void __init inode_init(void) { /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), 0, (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| SLAB_ACCOUNT), init_once); /* Hash may have been set up in inode_init_early */ if (!hashdist) return; inode_hashtable = alloc_large_system_hash("Inode-cache", sizeof(struct hlist_head), ihash_entries, 14, HASH_ZERO, &i_hash_shift, &i_hash_mask, 0, 0); } void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_mode = mode; switch (inode->i_mode & S_IFMT) { case S_IFCHR: inode->i_fop = &def_chr_fops; inode->i_rdev = rdev; break; case S_IFBLK: if (IS_ENABLED(CONFIG_BLOCK)) inode->i_fop = &def_blk_fops; inode->i_rdev = rdev; break; case S_IFIFO: inode->i_fop = &pipefifo_fops; break; case S_IFSOCK: /* leave it no_open_fops */ break; default: printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" " inode %s:%lu\n", mode, inode->i_sb->s_id, inode->i_ino); break; } } EXPORT_SYMBOL(init_special_inode); /** * inode_init_owner - Init uid,gid,mode for new inode according to posix standards * @idmap: idmap of the mount the inode was created from * @inode: New inode * @dir: Directory inode * @mode: mode of the new inode * * If the inode has been created through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions * and initializing i_uid and i_gid. On non-idmapped mounts or if permission * checking is to be performed on the raw inode simply pass @nop_mnt_idmap. */ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, const struct inode *dir, umode_t mode) { inode_fsuid_set(inode, idmap); if (dir && dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; /* Directories are special, and always inherit S_ISGID */ if (S_ISDIR(mode)) mode |= S_ISGID; } else inode_fsgid_set(inode, idmap); inode->i_mode = mode; } EXPORT_SYMBOL(inode_init_owner); /** * inode_owner_or_capable - check current task permissions to inode * @idmap: idmap of the mount the inode was found from * @inode: inode being checked * * Return true if current either has CAP_FOWNER in a namespace with the * inode owner uid mapped, or owns the file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ bool inode_owner_or_capable(struct mnt_idmap *idmap, const struct inode *inode) { vfsuid_t vfsuid; struct user_namespace *ns; vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) return true; ns = current_user_ns(); if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER)) return true; return false; } EXPORT_SYMBOL(inode_owner_or_capable); /* * Direct i/o helper functions */ bool inode_dio_finished(const struct inode *inode) { return atomic_read(&inode->i_dio_count) == 0; } EXPORT_SYMBOL(inode_dio_finished); /** * inode_dio_wait - wait for outstanding DIO requests to finish * @inode: inode to wait for * * Waits for all pending direct I/O requests to finish so that we can * proceed with a truncate or equivalent operation. * * Must be called under a lock that serializes taking new references * to i_dio_count, usually by inode->i_rwsem. */ void inode_dio_wait(struct inode *inode) { wait_var_event(&inode->i_dio_count, inode_dio_finished(inode)); } EXPORT_SYMBOL(inode_dio_wait); void inode_dio_wait_interruptible(struct inode *inode) { wait_var_event_interruptible(&inode->i_dio_count, inode_dio_finished(inode)); } EXPORT_SYMBOL(inode_dio_wait_interruptible); /* * inode_set_flags - atomically set some inode flags * * Note: the caller should be holding i_rwsem exclusively, or else be sure that * they have exclusive access to the inode structure (i.e., while the * inode is being instantiated). The reason for the cmpxchg() loop * --- which wouldn't be necessary if all code paths which modify * i_flags actually followed this rule, is that there is at least one * code path which doesn't today so we use cmpxchg() out of an abundance * of caution. * * In the long run, i_rwsem is overkill, and we should probably look * at using the i_lock spinlock to protect i_flags, and then make sure * it is so documented in include/linux/fs.h and that all code follows * the locking convention!! */ void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask) { WARN_ON_ONCE(flags & ~mask); set_mask_bits(&inode->i_flags, mask, flags); } EXPORT_SYMBOL(inode_set_flags); void inode_nohighmem(struct inode *inode) { mapping_set_gfp_mask(inode->i_mapping, GFP_USER); } EXPORT_SYMBOL(inode_nohighmem); struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { trace_inode_set_ctime_to_ts(inode, &ts); set_normalized_timespec64(&ts, ts.tv_sec, ts.tv_nsec); inode->i_ctime_sec = ts.tv_sec; inode->i_ctime_nsec = ts.tv_nsec; return ts; } EXPORT_SYMBOL(inode_set_ctime_to_ts); /** * timestamp_truncate - Truncate timespec to a granularity * @t: Timespec * @inode: inode being updated * * Truncate a timespec to the granularity supported by the fs * containing the inode. Always rounds down. gran must * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns). */ struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode) { struct super_block *sb = inode->i_sb; unsigned int gran = sb->s_time_gran; t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max); if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min)) t.tv_nsec = 0; /* Avoid division in the common cases 1 ns and 1 s. */ if (gran == 1) ; /* nothing */ else if (gran == NSEC_PER_SEC) t.tv_nsec = 0; else if (gran > 1 && gran < NSEC_PER_SEC) t.tv_nsec -= t.tv_nsec % gran; else WARN(1, "invalid file time granularity: %u", gran); return t; } EXPORT_SYMBOL(timestamp_truncate); /** * inode_set_ctime_current - set the ctime to current_time * @inode: inode * * Set the inode's ctime to the current value for the inode. Returns the * current value that was assigned. If this is not a multigrain inode, then we * set it to the later of the coarse time and floor value. * * If it is multigrain, then we first see if the coarse-grained timestamp is * distinct from what is already there. If so, then use that. Otherwise, get a * fine-grained timestamp. * * After that, try to swap the new value into i_ctime_nsec. Accept the * resulting ctime, regardless of the outcome of the swap. If it has * already been replaced, then that timestamp is later than the earlier * unacceptable one, and is thus acceptable. */ struct timespec64 inode_set_ctime_current(struct inode *inode) { struct timespec64 now; u32 cns, cur; ktime_get_coarse_real_ts64_mg(&now); now = timestamp_truncate(now, inode); /* Just return that if this is not a multigrain fs */ if (!is_mgtime(inode)) { inode_set_ctime_to_ts(inode, now); goto out; } /* * A fine-grained time is only needed if someone has queried * for timestamps, and the current coarse grained time isn't * later than what's already there. */ cns = smp_load_acquire(&inode->i_ctime_nsec); if (cns & I_CTIME_QUERIED) { struct timespec64 ctime = { .tv_sec = inode->i_ctime_sec, .tv_nsec = cns & ~I_CTIME_QUERIED }; if (timespec64_compare(&now, &ctime) <= 0) { ktime_get_real_ts64_mg(&now); now = timestamp_truncate(now, inode); mgtime_counter_inc(mg_fine_stamps); } } mgtime_counter_inc(mg_ctime_updates); /* No need to cmpxchg if it's exactly the same */ if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) { trace_ctime_xchg_skip(inode, &now); goto out; } cur = cns; retry: /* Try to swap the nsec value into place. */ if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) { /* If swap occurred, then we're (mostly) done */ inode->i_ctime_sec = now.tv_sec; trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur); mgtime_counter_inc(mg_ctime_swaps); } else { /* * Was the change due to someone marking the old ctime QUERIED? * If so then retry the swap. This can only happen once since * the only way to clear I_CTIME_QUERIED is to stamp the inode * with a new ctime. */ if (!(cns & I_CTIME_QUERIED) && (cns | I_CTIME_QUERIED) == cur) { cns = cur; goto retry; } /* Otherwise, keep the existing ctime */ now.tv_sec = inode->i_ctime_sec; now.tv_nsec = cur & ~I_CTIME_QUERIED; } out: return now; } EXPORT_SYMBOL(inode_set_ctime_current); /** * inode_set_ctime_deleg - try to update the ctime on a delegated inode * @inode: inode to update * @update: timespec64 to set the ctime * * Attempt to atomically update the ctime on behalf of a delegation holder. * * The nfs server can call back the holder of a delegation to get updated * inode attributes, including the mtime. When updating the mtime, update * the ctime to a value at least equal to that. * * This can race with concurrent updates to the inode, in which * case the update is skipped. * * Note that this works even when multigrain timestamps are not enabled, * so it is used in either case. */ struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update) { struct timespec64 now, cur_ts; u32 cur, old; /* pairs with try_cmpxchg below */ cur = smp_load_acquire(&inode->i_ctime_nsec); cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; cur_ts.tv_sec = inode->i_ctime_sec; /* If the update is older than the existing value, skip it. */ if (timespec64_compare(&update, &cur_ts) <= 0) return cur_ts; ktime_get_coarse_real_ts64_mg(&now); /* Clamp the update to "now" if it's in the future */ if (timespec64_compare(&update, &now) > 0) update = now; update = timestamp_truncate(update, inode); /* No need to update if the values are already the same */ if (timespec64_equal(&update, &cur_ts)) return cur_ts; /* * Try to swap the nsec value into place. If it fails, that means * it raced with an update due to a write or similar activity. That * stamp takes precedence, so just skip the update. */ retry: old = cur; if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) { inode->i_ctime_sec = update.tv_sec; mgtime_counter_inc(mg_ctime_swaps); return update; } /* * Was the change due to another task marking the old ctime QUERIED? * * If so, then retry the swap. This can only happen once since * the only way to clear I_CTIME_QUERIED is to stamp the inode * with a new ctime. */ if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED))) goto retry; /* Otherwise, it was a new timestamp. */ cur_ts.tv_sec = inode->i_ctime_sec; cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; return cur_ts; } EXPORT_SYMBOL(inode_set_ctime_deleg); /** * in_group_or_capable - check whether caller is CAP_FSETID privileged * @idmap: idmap of the mount @inode was found from * @inode: inode to check * @vfsgid: the new/current vfsgid of @inode * * Check whether @vfsgid is in the caller's group list or if the caller is * privileged with CAP_FSETID over @inode. This can be used to determine * whether the setgid bit can be kept or must be dropped. * * Return: true if the caller is sufficiently privileged, false if not. */ bool in_group_or_capable(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t vfsgid) { if (vfsgid_in_group_p(vfsgid)) return true; if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) return true; return false; } EXPORT_SYMBOL(in_group_or_capable); /** * mode_strip_sgid - handle the sgid bit for non-directories * @idmap: idmap of the mount the inode was created from * @dir: parent directory inode * @mode: mode of the file to be created in @dir * * If the @mode of the new file has both the S_ISGID and S_IXGRP bit * raised and @dir has the S_ISGID bit raised ensure that the caller is * either in the group of the parent directory or they have CAP_FSETID * in their user namespace and are privileged over the parent directory. * In all other cases, strip the S_ISGID bit from @mode. * * Return: the new mode to use for the file */ umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode) { if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP)) return mode; if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID)) return mode; if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir))) return mode; return mode & ~S_ISGID; } EXPORT_SYMBOL(mode_strip_sgid); #ifdef CONFIG_DEBUG_VFS /* * Dump an inode. * * TODO: add a proper inode dumping routine, this is a stub to get debug off the * ground. * * TODO: handle getting to fs type with get_kernel_nofault()? * See dump_mapping() above. */ void dump_inode(struct inode *inode, const char *reason) { struct super_block *sb = inode->i_sb; pr_warn("%s encountered for inode %px\n" "fs %s mode %ho opflags 0x%hx flags 0x%x state 0x%x count %d\n", reason, inode, sb->s_type->name, inode->i_mode, inode->i_opflags, inode->i_flags, inode->i_state, atomic_read(&inode->i_count)); } EXPORT_SYMBOL(dump_inode); #endif
11 7 1 2 11 1 22 1 5 7 15 11 1 2 8 2 19 32 11 22 2 11 16 1 20 14 14 3 1 1 1 3 3 32 5 31 4 31 1 28 2 23 26 28 3 27 4 30 1 29 3 29 5 5 8 9 20 3 26 3 9 4 4 4 4 1 3 1 3 3 1 6 1 5 1 4 4 1 31 41 41 9 5 24 3 1 31 26 7 1 33 43 42 11 43 41 31 9 31 37 42 3 1 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018-2019 HUAWEI, Inc. * https://www.huawei.com/ */ #include "internal.h" #include <linux/unaligned.h> #include <trace/events/erofs.h> struct z_erofs_maprecorder { struct inode *inode; struct erofs_map_blocks *map; unsigned long lcn; /* compression extent information gathered */ u8 type, headtype; u16 clusterofs; u16 delta[2]; erofs_blk_t pblk, compressedblks; erofs_off_t nextpackoff; bool partialref, in_mbox; }; static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, unsigned long lcn) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize) + lcn * sizeof(struct z_erofs_lcluster_index); struct z_erofs_lcluster_index *di; unsigned int advise; di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(di)) return PTR_ERR(di); m->lcn = lcn; m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index); advise = le16_to_cpu(di->di_advise); m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << vi->z_lclusterbits; m->delta[0] = le16_to_cpu(di->di_u.delta[0]); if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) { if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { DBG_BUGON(1); return -EFSCORRUPTED; } m->compressedblks = m->delta[0] & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; } m->delta[1] = le16_to_cpu(di->di_u.delta[1]); } else { m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF); m->clusterofs = le16_to_cpu(di->di_clusterofs); if (m->clusterofs >= 1 << vi->z_lclusterbits) { DBG_BUGON(1); return -EFSCORRUPTED; } m->pblk = le32_to_cpu(di->di_u.blkaddr); } return 0; } static unsigned int decode_compactedbits(unsigned int lobits, u8 *in, unsigned int pos, u8 *type) { const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); const unsigned int lo = v & ((1 << lobits) - 1); *type = (v >> lobits) & 3; return lo; } static int get_compacted_la_distance(unsigned int lobits, unsigned int encodebits, unsigned int vcnt, u8 *in, int i) { unsigned int lo, d1 = 0; u8 type; DBG_BUGON(i >= vcnt); do { lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) return d1; ++d1; } while (++i < vcnt); /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */ if (!(lo & Z_EROFS_LI_D0_CBLKCNT)) d1 += lo - 1; return d1; } static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, unsigned long lcn, bool lookahead) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize); const unsigned int lclusterbits = vi->z_lclusterbits; const unsigned int totalidx = erofs_iblks(inode); unsigned int compacted_4b_initial, compacted_2b, amortizedshift; unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; erofs_off_t pos; u8 *in, type; int i; if (lcn >= totalidx || lclusterbits > 14) return -EINVAL; m->lcn = lcn; /* used to align to 32-byte (compacted_2b) alignment */ compacted_4b_initial = ((32 - ebase % 32) / 4) & 7; compacted_2b = 0; if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && compacted_4b_initial < totalidx) compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); pos = ebase; amortizedshift = 2; /* compact_4b */ if (lcn >= compacted_4b_initial) { pos += compacted_4b_initial * 4; lcn -= compacted_4b_initial; if (lcn < compacted_2b) { amortizedshift = 1; } else { pos += compacted_2b * 2; lcn -= compacted_2b; } } pos += lcn * (1 << amortizedshift); /* figure out the lcluster count in this pack */ if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; else if (1 << amortizedshift == 2 && lclusterbits <= 12) vcnt = 16; else return -EOPNOTSUPP; in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(in)) return PTR_ERR(in); /* it doesn't equal to round_up(..) */ m->nextpackoff = round_down(pos, vcnt << amortizedshift) + (vcnt << amortizedshift); lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U); encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; bytes = pos & ((vcnt << amortizedshift) - 1); in -= bytes; i = bytes >> amortizedshift; lo = decode_compactedbits(lobits, in, encodebits * i, &type); m->type = type; if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << lclusterbits; /* figure out lookahead_distance: delta[1] if needed */ if (lookahead) m->delta[1] = get_compacted_la_distance(lobits, encodebits, vcnt, in, i); if (lo & Z_EROFS_LI_D0_CBLKCNT) { if (!big_pcluster) { DBG_BUGON(1); return -EFSCORRUPTED; } m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; return 0; } else if (i + 1 != (int)vcnt) { m->delta[0] = lo; return 0; } /* * since the last lcluster in the pack is special, * of which lo saves delta[1] rather than delta[0]. * Hence, get delta[0] by the previous lcluster indirectly. */ lo = decode_compactedbits(lobits, in, encodebits * (i - 1), &type); if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) lo = 0; else if (lo & Z_EROFS_LI_D0_CBLKCNT) lo = 1; m->delta[0] = lo + 1; return 0; } m->clusterofs = lo; m->delta[0] = 0; /* figout out blkaddr (pblk) for HEAD lclusters */ if (!big_pcluster) { nblk = 1; while (i > 0) { --i; lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) i -= lo; if (i >= 0) ++nblk; } } else { nblk = 0; while (i > 0) { --i; lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { if (lo & Z_EROFS_LI_D0_CBLKCNT) { --i; nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT; continue; } /* bigpcluster shouldn't have plain d0 == 1 */ if (lo <= 1) { DBG_BUGON(1); return -EFSCORRUPTED; } i -= lo - 2; continue; } ++nblk; } } in += (vcnt << amortizedshift) - sizeof(__le32); m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; return 0; } static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu", m->type, lcn, EROFS_I(m->inode)->nid); DBG_BUGON(1); return -EOPNOTSUPP; } switch (EROFS_I(m->inode)->datalayout) { case EROFS_INODE_COMPRESSED_FULL: return z_erofs_load_full_lcluster(m, lcn); case EROFS_INODE_COMPRESSED_COMPACT: return z_erofs_load_compact_lcluster(m, lcn, lookahead); default: return -EINVAL; } } static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned int lookback_distance) { struct super_block *sb = m->inode->i_sb; struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_lclusterbits; while (m->lcn >= lookback_distance) { unsigned long lcn = m->lcn - lookback_distance; int err; err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { lookback_distance = m->delta[0]; if (!lookback_distance) break; continue; } else { m->headtype = m->type; m->map->m_la = (lcn << lclusterbits) | m->clusterofs; return 0; } } erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", lookback_distance, m->lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, unsigned int initial_lcn) { struct inode *inode = m->inode; struct super_block *sb = inode->i_sb; struct erofs_inode *vi = EROFS_I(inode); bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2; unsigned long lcn = m->lcn + 1; int err; DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); DBG_BUGON(m->type != m->headtype); if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) || ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) || (lcn << vi->z_lclusterbits) >= inode->i_size) m->compressedblks = 1; if (m->compressedblks) goto out; err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; /* * If the 1st NONHEAD lcluster has already been handled initially w/o * valid compressedblks, which means at least it mustn't be CBLKCNT, or * an internal implemenatation error is detected. * * The following code can also handle it properly anyway, but let's * BUG_ON in the debugging mode only for developers to notice that. */ DBG_BUGON(lcn == initial_lcn && m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->delta[0] != 1) { erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } /* * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type rather * than CBLKCNT, it's a 1 block-sized pcluster. */ if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD || !m->compressedblks) m->compressedblks = 1; out: m->map->m_plen = erofs_pos(sb, m->compressedblks); return 0; } static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) { struct inode *inode = m->inode; struct erofs_inode *vi = EROFS_I(inode); struct erofs_map_blocks *map = m->map; unsigned int lclusterbits = vi->z_lclusterbits; u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits; int err; while (1) { /* handle the last EOF pcluster (no next HEAD lcluster) */ if ((lcn << lclusterbits) >= inode->i_size) { map->m_llen = inode->i_size - map->m_la; return 0; } err = z_erofs_load_lcluster_from_disk(m, lcn, true); if (err) return err; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { /* work around invalid d1 generated by pre-1.0 mkfs */ if (unlikely(!m->delta[1])) { m->delta[1] = 1; DBG_BUGON(1); } } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { if (lcn != headlcn) break; /* ends at the next HEAD lcluster */ m->delta[1] = 1; } lcn += m->delta[1]; } map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la; return 0; } static int z_erofs_map_blocks_fo(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *vi = EROFS_I(inode); struct super_block *sb = inode->i_sb; bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; bool ztailpacking = vi->z_idata_size; unsigned int lclusterbits = vi->z_lclusterbits; struct z_erofs_maprecorder m = { .inode = inode, .map = map, .in_mbox = erofs_inode_in_metabox(inode), }; unsigned int endoff; unsigned long initial_lcn; unsigned long long ofs, end; int err; ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la; if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) && !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; map->m_flags = EROFS_MAP_FRAGMENT; return 0; } initial_lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false); if (err) goto unmap_out; if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking) vi->z_fragmentoff = m.nextpackoff; map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) { m.headtype = m.type; map->m_la = (m.lcn << lclusterbits) | m.clusterofs; /* * For ztailpacking files, in order to inline data more * effectively, special EOF lclusters are now supported * which can have three parts at most. */ if (ztailpacking && end > inode->i_size) end = inode->i_size; } else { if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) { /* m.lcn should be >= 1 if endoff < m.clusterofs */ if (!m.lcn) { erofs_err(sb, "invalid logical cluster 0 at nid %llu", vi->nid); err = -EFSCORRUPTED; goto unmap_out; } end = (m.lcn << lclusterbits) | m.clusterofs; map->m_flags |= EROFS_MAP_FULL_MAPPED; m.delta[0] = 1; } /* get the corresponding first chunk */ err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) goto unmap_out; } if (m.partialref) map->m_flags |= EROFS_MAP_PARTIAL_REF; map->m_llen = end - map->m_la; if (flags & EROFS_GET_BLOCKS_FINDTAIL) { vi->z_tailextent_headlcn = m.lcn; /* for non-compact indexes, fragmentoff is 64 bits */ if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL) vi->z_fragmentoff |= (u64)m.pblk << 32; } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_META; map->m_pa = vi->z_fragmentoff; map->m_plen = vi->z_idata_size; if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { erofs_err(sb, "ztailpacking inline data across blocks @ nid %llu", vi->nid); err = -EFSCORRUPTED; goto unmap_out; } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { map->m_flags = EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); if (err) goto unmap_out; } if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) { if (map->m_llen > map->m_plen) { DBG_BUGON(1); err = -EFSCORRUPTED; goto unmap_out; } if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; else map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; } else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) { map->m_algorithmformat = vi->z_algorithmtype[1]; } else { map->m_algorithmformat = vi->z_algorithmtype[0]; } if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA || map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE || map->m_algorithmformat == Z_EROFS_COMPRESSION_ZSTD) && map->m_llen >= i_blocksize(inode))) { err = z_erofs_get_extent_decompressedlen(&m); if (!err) map->m_flags |= EROFS_MAP_FULL_MAPPED; } unmap_out: erofs_unmap_metabuf(&m.map->buf); return err; } static int z_erofs_map_blocks_ext(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *vi = EROFS_I(inode); struct super_block *sb = inode->i_sb; bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER; unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize), recsz); bool in_mbox = erofs_inode_in_metabox(inode); erofs_off_t lend = inode->i_size; erofs_off_t l, r, mid, pa, la, lstart; struct z_erofs_extent *ext; unsigned int fmt; bool last; map->m_flags = 0; if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) { if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) { ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); pa = le64_to_cpu(*(__le64 *)ext); pos += sizeof(__le64); lstart = 0; } else { lstart = round_down(map->m_la, 1 << vi->z_lclusterbits); pos += (lstart >> vi->z_lclusterbits) * recsz; pa = EROFS_NULL_ADDR; } for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) { ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); map->m_plen = le32_to_cpu(ext->plen); if (pa != EROFS_NULL_ADDR) { map->m_pa = pa; pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK; } else { map->m_pa = le32_to_cpu(ext->pstart_lo); } pos += recsz; } last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits)); lend = min(lstart, lend); lstart -= 1 << vi->z_lclusterbits; } else { lstart = lend; for (l = 0, r = vi->z_extents; l < r; ) { mid = l + (r - l) / 2; ext = erofs_read_metabuf(&map->buf, sb, pos + mid * recsz, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); la = le32_to_cpu(ext->lstart_lo); pa = le32_to_cpu(ext->pstart_lo) | (u64)le32_to_cpu(ext->pstart_hi) << 32; if (recsz > offsetof(struct z_erofs_extent, lstart_hi)) la |= (u64)le32_to_cpu(ext->lstart_hi) << 32; if (la > map->m_la) { r = mid; if (la > lend) { DBG_BUGON(1); return -EFSCORRUPTED; } lend = la; } else { l = mid + 1; if (map->m_la == la) r = min(l + 1, r); lstart = la; map->m_plen = le32_to_cpu(ext->plen); map->m_pa = pa; } } last = (l >= vi->z_extents); } if (lstart < lend) { map->m_la = lstart; if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { map->m_flags = EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; } else if (map->m_plen) { map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; if (fmt) map->m_algorithmformat = fmt - 1; else if (interlaced && !erofs_blkoff(sb, map->m_pa)) map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; else map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) map->m_flags |= EROFS_MAP_PARTIAL_REF; map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; } } map->m_llen = lend - map->m_la; return 0; } static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; struct z_erofs_map_header *h; erofs_off_t pos; int err = 0; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { /* * paired with smp_mb() at the end of the function to ensure * fields will only be observed after the bit is set. */ smp_mb(); return 0; } if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) return -ERESTARTSYS; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) goto out_unlock; pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode)); if (IS_ERR(h)) { err = PTR_ERR(h); goto out_unlock; } /* * if the highest bit of the 8-byte map header is set, the whole file * is stored in the packed inode. The rest bits keeps z_fragmentoff. */ if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) { vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); vi->z_tailextent_headlcn = 0; goto done; } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15); if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) { vi->z_extents = le32_to_cpu(h->h_extents_lo) | ((u64)le16_to_cpu(h->h_extents_hi) << 32); goto done; } vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) vi->z_idata_size = le16_to_cpu(h->h_idata_size); if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; goto out_unlock; } if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; goto out_unlock; } if (vi->z_idata_size || (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { struct erofs_map_blocks tm = { .buf = __EROFS_BUF_INITIALIZER }; err = z_erofs_map_blocks_fo(inode, &tm, EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&tm.buf); if (err < 0) goto out_unlock; } done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; } static int z_erofs_map_sanity_check(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_sb_info *sbi = EROFS_I_SB(inode); if (!(map->m_flags & EROFS_MAP_ENCODED)) return 0; if (unlikely(map->m_algorithmformat >= Z_EROFS_COMPRESSION_RUNTIME_MAX)) { erofs_err(inode->i_sb, "unknown algorithm %d @ pos %llu for nid %llu, please upgrade kernel", map->m_algorithmformat, map->m_la, EROFS_I(inode)->nid); return -EOPNOTSUPP; } if (unlikely(map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX && !(sbi->available_compr_algs & (1 << map->m_algorithmformat)))) { erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", map->m_algorithmformat, EROFS_I(inode)->nid); return -EFSCORRUPTED; } if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE)) return -EOPNOTSUPP; return 0; } int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *const vi = EROFS_I(inode); int err = 0; trace_erofs_map_blocks_enter(inode, map, flags); if (map->m_la >= inode->i_size) { /* post-EOF unmapped extent */ map->m_llen = map->m_la + 1 - inode->i_size; map->m_la = inode->i_size; map->m_flags = 0; } else { err = z_erofs_fill_inode(inode, map); if (!err) { if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) err = z_erofs_map_blocks_ext(inode, map, flags); else err = z_erofs_map_blocks_fo(inode, map, flags); } if (!err) err = z_erofs_map_sanity_check(inode, map); if (err) map->m_llen = 0; } trace_erofs_map_blocks_exit(inode, map, flags, err); return err; } static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; struct erofs_map_blocks map = { .m_la = offset }; ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP); erofs_put_metabuf(&map.buf); if (ret < 0) return ret; iomap->bdev = inode->i_sb->s_bdev; iomap->offset = map.m_la; iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; /* * No strict rule on how to describe extents for post EOF, yet * we need to do like below. Otherwise, iomap itself will get * into an endless loop on post EOF. * * Calculate the effective offset by subtracting extent start * (map.m_la) from the requested offset, and add it to length. * (NB: offset >= map.m_la always) */ if (iomap->offset >= inode->i_size) iomap->length = length + offset - map.m_la; } iomap->flags = 0; return 0; } const struct iomap_ops z_erofs_iomap_report_ops = { .iomap_begin = z_erofs_iomap_begin_report, };
1 32 32 32 31 32 32 3377 3391 32 32 32 32 32 32 32 161 136 27 3 3 3 3 3 11 11 11 18 33 51 12 4 1 2 32 1 31 24 8 32 22 13 10 10 50 14 11 4 1 9 10 9 1 10 1 9 4 6 28 22 1 1 1 52 51 41 52 4 49 37 39 39 50 10 49 19 15 18 16 17 2 2 61 62 2 18 33 4 3 44 56 61 61 59 59 6 44 60 50 4 19 2 4 1 2 8 3 5 3 2 14 31 36 5 8 1 16 12 30 3 33 26 7 44 43 39 4 35 2 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/exit.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/mm.h> #include <linux/slab.h> #include <linux/sched/autogroup.h> #include <linux/sched/mm.h> #include <linux/sched/stat.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/sched/cputime.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/capability.h> #include <linux/completion.h> #include <linux/personality.h> #include <linux/tty.h> #include <linux/iocontext.h> #include <linux/key.h> #include <linux/cpu.h> #include <linux/acct.h> #include <linux/tsacct_kern.h> #include <linux/file.h> #include <linux/freezer.h> #include <linux/binfmts.h> #include <linux/nsproxy.h> #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/profile.h> #include <linux/mount.h> #include <linux/proc_fs.h> #include <linux/kthread.h> #include <linux/mempolicy.h> #include <linux/taskstats_kern.h> #include <linux/delayacct.h> #include <linux/cgroup.h> #include <linux/syscalls.h> #include <linux/signal.h> #include <linux/posix-timers.h> #include <linux/cn_proc.h> #include <linux/mutex.h> #include <linux/futex.h> #include <linux/pipe_fs_i.h> #include <linux/audit.h> /* for audit_free() */ #include <linux/resource.h> #include <linux/task_io_accounting_ops.h> #include <linux/blkdev.h> #include <linux/task_work.h> #include <linux/fs_struct.h> #include <linux/init_task.h> #include <linux/perf_event.h> #include <trace/events/sched.h> #include <linux/hw_breakpoint.h> #include <linux/oom.h> #include <linux/writeback.h> #include <linux/shm.h> #include <linux/kcov.h> #include <linux/kmsan.h> #include <linux/random.h> #include <linux/rcuwait.h> #include <linux/compat.h> #include <linux/io_uring.h> #include <linux/kprobes.h> #include <linux/rethook.h> #include <linux/sysfs.h> #include <linux/user_events.h> #include <linux/unwind_deferred.h> #include <linux/uaccess.h> #include <linux/pidfs.h> #include <uapi/linux/wait.h> #include <asm/unistd.h> #include <asm/mmu_context.h> #include "exit.h" /* * The default value should be high enough to not crash a system that randomly * crashes its kernel from time to time, but low enough to at least not permit * overflowing 32-bit refcounts or the ldsem writer count. */ static unsigned int oops_limit = 10000; #ifdef CONFIG_SYSCTL static const struct ctl_table kern_exit_table[] = { { .procname = "oops_limit", .data = &oops_limit, .maxlen = sizeof(oops_limit), .mode = 0644, .proc_handler = proc_douintvec, }, }; static __init int kernel_exit_sysctls_init(void) { register_sysctl_init("kernel", kern_exit_table); return 0; } late_initcall(kernel_exit_sysctls_init); #endif static atomic_t oops_count = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { return sysfs_emit(page, "%d\n", atomic_read(&oops_count)); } static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count); static __init int kernel_exit_sysfs_init(void) { sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL); return 0; } late_initcall(kernel_exit_sysfs_init); #endif /* * For things release_task() would like to do *after* tasklist_lock is released. */ struct release_task_post { struct pid *pids[PIDTYPE_MAX]; }; static void __unhash_process(struct release_task_post *post, struct task_struct *p, bool group_dead) { struct pid *pid = task_pid(p); nr_threads--; detach_pid(post->pids, p, PIDTYPE_PID); wake_up_all(&pid->wait_pidfd); if (group_dead) { detach_pid(post->pids, p, PIDTYPE_TGID); detach_pid(post->pids, p, PIDTYPE_PGID); detach_pid(post->pids, p, PIDTYPE_SID); list_del_rcu(&p->tasks); list_del_init(&p->sibling); __this_cpu_dec(process_counts); } list_del_rcu(&p->thread_node); } /* * This function expects the tasklist_lock write-locked. */ static void __exit_signal(struct release_task_post *post, struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; bool group_dead = thread_group_leader(tsk); struct sighand_struct *sighand; struct tty_struct *tty; u64 utime, stime; sighand = rcu_dereference_check(tsk->sighand, lockdep_tasklist_lock_is_held()); spin_lock(&sighand->siglock); #ifdef CONFIG_POSIX_TIMERS posix_cpu_timers_exit(tsk); if (group_dead) posix_cpu_timers_exit_group(tsk); #endif if (group_dead) { tty = sig->tty; sig->tty = NULL; } else { /* * If there is any task waiting for the group exit * then notify it: */ if (sig->notify_count > 0 && !--sig->notify_count) wake_up_process(sig->group_exec_task); if (tsk == sig->curr_target) sig->curr_target = next_thread(tsk); } /* * Accumulate here the counters for all threads as they die. We could * skip the group leader because it is the last user of signal_struct, * but we want to avoid the race with thread_group_cputime() which can * see the empty ->thread_head list. */ task_cputime(tsk, &utime, &stime); write_seqlock(&sig->stats_lock); sig->utime += utime; sig->stime += stime; sig->gtime += task_gtime(tsk); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; sig->nivcsw += tsk->nivcsw; sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig->nr_threads--; __unhash_process(post, tsk, group_dead); write_sequnlock(&sig->stats_lock); tsk->sighand = NULL; spin_unlock(&sighand->siglock); __cleanup_sighand(sighand); if (group_dead) tty_kref_put(tty); } static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); kprobe_flush_task(tsk); rethook_flush_task(tsk); perf_event_delayed_put(tsk); trace_sched_process_free(tsk); put_task_struct(tsk); } void put_task_struct_rcu_user(struct task_struct *task) { if (refcount_dec_and_test(&task->rcu_users)) call_rcu(&task->rcu, delayed_put_task_struct); } void __weak release_thread(struct task_struct *dead_task) { } void release_task(struct task_struct *p) { struct release_task_post post; struct task_struct *leader; struct pid *thread_pid; int zap_leader; repeat: memset(&post, 0, sizeof(post)); /* don't need to get the RCU readlock here - the process is dead and * can't be modifying its own credentials. But shut RCU-lockdep up */ rcu_read_lock(); dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); rcu_read_unlock(); pidfs_exit(p); cgroup_release(p); /* Retrieve @thread_pid before __unhash_process() may set it to NULL. */ thread_pid = task_pid(p); write_lock_irq(&tasklist_lock); ptrace_release_task(p); __exit_signal(&post, p); /* * If we are the last non-leader member of the thread * group, and the leader is zombie, then notify the * group leader's parent process. (if it wants notification.) */ zap_leader = 0; leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { /* for pidfs_exit() and do_notify_parent() */ if (leader->signal->flags & SIGNAL_GROUP_EXIT) leader->exit_code = leader->signal->group_exit_code; /* * If we were the last child thread and the leader has * exited already, and the leader's parent ignores SIGCHLD, * then we are the one who should release the leader. */ zap_leader = do_notify_parent(leader, leader->exit_signal); if (zap_leader) leader->exit_state = EXIT_DEAD; } write_unlock_irq(&tasklist_lock); /* @thread_pid can't go away until free_pids() below */ proc_flush_pid(thread_pid); add_device_randomness(&p->se.sum_exec_runtime, sizeof(p->se.sum_exec_runtime)); free_pids(post.pids); release_thread(p); /* * This task was already removed from the process/thread/pid lists * and lock_task_sighand(p) can't succeed. Nobody else can touch * ->pending or, if group dead, signal->shared_pending. We can call * flush_sigqueue() lockless. */ flush_sigqueue(&p->pending); if (thread_group_leader(p)) flush_sigqueue(&p->signal->shared_pending); put_task_struct_rcu_user(p); p = leader; if (unlikely(zap_leader)) goto repeat; } int rcuwait_wake_up(struct rcuwait *w) { int ret = 0; struct task_struct *task; rcu_read_lock(); /* * Order condition vs @task, such that everything prior to the load * of @task is visible. This is the condition as to why the user called * rcuwait_wake() in the first place. Pairs with set_current_state() * barrier (A) in rcuwait_wait_event(). * * WAIT WAKE * [S] tsk = current [S] cond = true * MB (A) MB (B) * [L] cond [L] tsk */ smp_mb(); /* (B) */ task = rcu_dereference(w->task); if (task) ret = wake_up_process(task); rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(rcuwait_wake_up); /* * Determine if a process group is "orphaned", according to the POSIX * definition in 2.2.2.52. Orphaned process groups are not to be affected * by terminal-generated stop signals. Newly orphaned process groups are * to receive a SIGHUP and a SIGCONT. * * "I ask you, have you ever known what it is to be an orphan?" */ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) { struct task_struct *p; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { if ((p == ignored_task) || (p->exit_state && thread_group_empty(p)) || is_global_init(p->real_parent)) continue; if (task_pgrp(p->real_parent) != pgrp && task_session(p->real_parent) == task_session(p)) return 0; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); return 1; } int is_current_pgrp_orphaned(void) { int retval; read_lock(&tasklist_lock); retval = will_become_orphaned_pgrp(task_pgrp(current), NULL); read_unlock(&tasklist_lock); return retval; } static bool has_stopped_jobs(struct pid *pgrp) { struct task_struct *p; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { if (p->signal->flags & SIGNAL_STOP_STOPPED) return true; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); return false; } /* * Check to see if any process groups have become orphaned as * a result of our exiting, and if they have any stopped jobs, * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) */ static void kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) { struct pid *pgrp = task_pgrp(tsk); struct task_struct *ignored_task = tsk; if (!parent) /* exit: our father is in a different pgrp than * we are and we were the only connection outside. */ parent = tsk->real_parent; else /* reparent: our child is in a different pgrp than * we are, and it was the only connection outside. */ ignored_task = NULL; if (task_pgrp(parent) != pgrp && task_session(parent) == task_session(tsk) && will_become_orphaned_pgrp(pgrp, ignored_task) && has_stopped_jobs(pgrp)) { __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); } } static void coredump_task_exit(struct task_struct *tsk, struct core_state *core_state) { struct core_thread self; self.task = tsk; if (self.task->flags & PF_SIGNALED) self.next = xchg(&core_state->dumper.next, &self); else self.task = NULL; /* * Implies mb(), the result of xchg() must be visible * to core_state->dumper. */ if (atomic_dec_and_test(&core_state->nr_threads)) complete(&core_state->startup); for (;;) { set_current_state(TASK_IDLE|TASK_FREEZABLE); if (!self.task) /* see coredump_finish() */ break; schedule(); } __set_current_state(TASK_RUNNING); } #ifdef CONFIG_MEMCG /* drops tasklist_lock if succeeds */ static bool __try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm) { bool ret = false; task_lock(tsk); if (likely(tsk->mm == mm)) { /* tsk can't pass exit_mm/exec_mmap and exit */ read_unlock(&tasklist_lock); WRITE_ONCE(mm->owner, tsk); lru_gen_migrate_mm(mm); ret = true; } task_unlock(tsk); return ret; } static bool try_to_set_owner(struct task_struct *g, struct mm_struct *mm) { struct task_struct *t; for_each_thread(g, t) { struct mm_struct *t_mm = READ_ONCE(t->mm); if (t_mm == mm) { if (__try_to_set_owner(t, mm)) return true; } else if (t_mm) break; } return false; } /* * A task is exiting. If it owned this mm, find a new owner for the mm. */ void mm_update_next_owner(struct mm_struct *mm) { struct task_struct *g, *p = current; /* * If the exiting or execing task is not the owner, it's * someone else's problem. */ if (mm->owner != p) return; /* * The current owner is exiting/execing and there are no other * candidates. Do not leave the mm pointing to a possibly * freed task structure. */ if (atomic_read(&mm->mm_users) <= 1) { WRITE_ONCE(mm->owner, NULL); return; } read_lock(&tasklist_lock); /* * Search in the children */ list_for_each_entry(g, &p->children, sibling) { if (try_to_set_owner(g, mm)) goto ret; } /* * Search in the siblings */ list_for_each_entry(g, &p->real_parent->children, sibling) { if (try_to_set_owner(g, mm)) goto ret; } /* * Search through everything else, we should not get here often. */ for_each_process(g) { if (atomic_read(&mm->mm_users) <= 1) break; if (g->flags & PF_KTHREAD) continue; if (try_to_set_owner(g, mm)) goto ret; } read_unlock(&tasklist_lock); /* * We found no owner yet mm_users > 1: this implies that we are * most likely racing with swapoff (try_to_unuse()) or /proc or * ptrace or page migration (get_task_mm()). Mark owner as NULL. */ WRITE_ONCE(mm->owner, NULL); ret: return; } #endif /* CONFIG_MEMCG */ /* * Turn us into a lazy TLB process if we * aren't already.. */ static void exit_mm(void) { struct mm_struct *mm = current->mm; exit_mm_release(current, mm); if (!mm) return; mmap_read_lock(mm); mmgrab_lazy_tlb(mm); BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ task_lock(current); /* * When a thread stops operating on an address space, the loop * in membarrier_private_expedited() may not observe that * tsk->mm, and the loop in membarrier_global_expedited() may * not observe a MEMBARRIER_STATE_GLOBAL_EXPEDITED * rq->membarrier_state, so those would not issue an IPI. * Membarrier requires a memory barrier after accessing * user-space memory, before clearing tsk->mm or the * rq->membarrier_state. */ smp_mb__after_spinlock(); local_irq_disable(); current->mm = NULL; membarrier_update_current_mm(NULL); enter_lazy_tlb(mm, current); local_irq_enable(); task_unlock(current); mmap_read_unlock(mm); mm_update_next_owner(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) exit_oom_victim(); } static struct task_struct *find_alive_thread(struct task_struct *p) { struct task_struct *t; for_each_thread(p, t) { if (!(t->flags & PF_EXITING)) return t; } return NULL; } static struct task_struct *find_child_reaper(struct task_struct *father, struct list_head *dead) __releases(&tasklist_lock) __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *reaper = pid_ns->child_reaper; struct task_struct *p, *n; if (likely(reaper != father)) return reaper; reaper = find_alive_thread(father); if (reaper) { pid_ns->child_reaper = reaper; return reaper; } write_unlock_irq(&tasklist_lock); list_for_each_entry_safe(p, n, dead, ptrace_entry) { list_del_init(&p->ptrace_entry); release_task(p); } zap_pid_ns_processes(pid_ns); write_lock_irq(&tasklist_lock); return father; } /* * When we die, we re-parent all our children, and try to: * 1. give them to another thread in our thread group, if such a member exists * 2. give it to the first ancestor process which prctl'd itself as a * child_subreaper for its children (like a service manager) * 3. give it to the init process (PID 1) in our pid namespace */ static struct task_struct *find_new_reaper(struct task_struct *father, struct task_struct *child_reaper) { struct task_struct *thread, *reaper; thread = find_alive_thread(father); if (thread) return thread; if (father->signal->has_child_subreaper) { unsigned int ns_level = task_pid(father)->level; /* * Find the first ->is_child_subreaper ancestor in our pid_ns. * We can't check reaper != child_reaper to ensure we do not * cross the namespaces, the exiting parent could be injected * by setns() + fork(). * We check pid->level, this is slightly more efficient than * task_active_pid_ns(reaper) != task_active_pid_ns(father). */ for (reaper = father->real_parent; task_pid(reaper)->level == ns_level; reaper = reaper->real_parent) { if (reaper == &init_task) break; if (!reaper->signal->is_child_subreaper) continue; thread = find_alive_thread(reaper); if (thread) return thread; } } return child_reaper; } /* * Any that need to be release_task'd are put on the @dead list. */ static void reparent_leader(struct task_struct *father, struct task_struct *p, struct list_head *dead) { if (unlikely(p->exit_state == EXIT_DEAD)) return; /* We don't want people slaying init. */ p->exit_signal = SIGCHLD; /* If it has exited notify the new parent about this child's death. */ if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { if (do_notify_parent(p, p->exit_signal)) { p->exit_state = EXIT_DEAD; list_add(&p->ptrace_entry, dead); } } kill_orphaned_pgrp(p, father); } /* * Make init inherit all the child processes */ static void forget_original_parent(struct task_struct *father, struct list_head *dead) { struct task_struct *p, *t, *reaper; if (unlikely(!list_empty(&father->ptraced))) exit_ptrace(father, dead); /* Can drop and reacquire tasklist_lock */ reaper = find_child_reaper(father, dead); if (list_empty(&father->children)) return; reaper = find_new_reaper(father, reaper); list_for_each_entry(p, &father->children, sibling) { for_each_thread(p, t) { RCU_INIT_POINTER(t->real_parent, reaper); BUG_ON((!t->ptrace) != (rcu_access_pointer(t->parent) == father)); if (likely(!t->ptrace)) t->parent = t->real_parent; if (t->pdeath_signal) group_send_sig_info(t->pdeath_signal, SEND_SIG_NOINFO, t, PIDTYPE_TGID); } /* * If this is a threaded reparent there is no need to * notify anyone anything has happened. */ if (!same_thread_group(reaper, father)) reparent_leader(father, p, dead); } list_splice_tail_init(&father->children, &reaper->children); } /* * Send signals to all our closest relatives so that they know * to properly mourn us.. */ static void exit_notify(struct task_struct *tsk, int group_dead) { bool autoreap; struct task_struct *p, *n; LIST_HEAD(dead); write_lock_irq(&tasklist_lock); forget_original_parent(tsk, &dead); if (group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); tsk->exit_state = EXIT_ZOMBIE; if (unlikely(tsk->ptrace)) { int sig = thread_group_leader(tsk) && thread_group_empty(tsk) && !ptrace_reparented(tsk) ? tsk->exit_signal : SIGCHLD; autoreap = do_notify_parent(tsk, sig); } else if (thread_group_leader(tsk)) { autoreap = thread_group_empty(tsk) && do_notify_parent(tsk, tsk->exit_signal); } else { autoreap = true; /* untraced sub-thread */ do_notify_pidfd(tsk); } if (autoreap) { tsk->exit_state = EXIT_DEAD; list_add(&tsk->ptrace_entry, &dead); } /* mt-exec, de_thread() is waiting for group leader */ if (unlikely(tsk->signal->notify_count < 0)) wake_up_process(tsk->signal->group_exec_task); write_unlock_irq(&tasklist_lock); list_for_each_entry_safe(p, n, &dead, ptrace_entry) { list_del_init(&p->ptrace_entry); release_task(p); } } #ifdef CONFIG_DEBUG_STACK_USAGE #ifdef CONFIG_STACK_GROWSUP unsigned long stack_not_used(struct task_struct *p) { unsigned long *n = end_of_stack(p); do { /* Skip over canary */ n--; } while (!*n); return (unsigned long)end_of_stack(p) - (unsigned long)n; } #else /* !CONFIG_STACK_GROWSUP */ unsigned long stack_not_used(struct task_struct *p) { unsigned long *n = end_of_stack(p); do { /* Skip over canary */ n++; } while (!*n); return (unsigned long)n - (unsigned long)end_of_stack(p); } #endif /* CONFIG_STACK_GROWSUP */ /* Count the maximum pages reached in kernel stacks */ static inline void kstack_histogram(unsigned long used_stack) { #ifdef CONFIG_VM_EVENT_COUNTERS if (used_stack <= 1024) count_vm_event(KSTACK_1K); #if THREAD_SIZE > 1024 else if (used_stack <= 2048) count_vm_event(KSTACK_2K); #endif #if THREAD_SIZE > 2048 else if (used_stack <= 4096) count_vm_event(KSTACK_4K); #endif #if THREAD_SIZE > 4096 else if (used_stack <= 8192) count_vm_event(KSTACK_8K); #endif #if THREAD_SIZE > 8192 else if (used_stack <= 16384) count_vm_event(KSTACK_16K); #endif #if THREAD_SIZE > 16384 else if (used_stack <= 32768) count_vm_event(KSTACK_32K); #endif #if THREAD_SIZE > 32768 else if (used_stack <= 65536) count_vm_event(KSTACK_64K); #endif #if THREAD_SIZE > 65536 else count_vm_event(KSTACK_REST); #endif #endif /* CONFIG_VM_EVENT_COUNTERS */ } static void check_stack_usage(void) { static DEFINE_SPINLOCK(low_water_lock); static int lowest_to_date = THREAD_SIZE; unsigned long free; free = stack_not_used(current); kstack_histogram(THREAD_SIZE - free); if (free >= lowest_to_date) return; spin_lock(&low_water_lock); if (free < lowest_to_date) { pr_info("%s (%d) used greatest stack depth: %lu bytes left\n", current->comm, task_pid_nr(current), free); lowest_to_date = free; } spin_unlock(&low_water_lock); } #else /* !CONFIG_DEBUG_STACK_USAGE */ static inline void check_stack_usage(void) {} #endif /* CONFIG_DEBUG_STACK_USAGE */ static void synchronize_group_exit(struct task_struct *tsk, long code) { struct sighand_struct *sighand = tsk->sighand; struct signal_struct *signal = tsk->signal; struct core_state *core_state; spin_lock_irq(&sighand->siglock); signal->quick_threads--; if ((signal->quick_threads == 0) && !(signal->flags & SIGNAL_GROUP_EXIT)) { signal->flags = SIGNAL_GROUP_EXIT; signal->group_exit_code = code; signal->group_stop_count = 0; } /* * Serialize with any possible pending coredump. * We must hold siglock around checking core_state * and setting PF_POSTCOREDUMP. The core-inducing thread * will increment ->nr_threads for each thread in the * group without PF_POSTCOREDUMP set. */ tsk->flags |= PF_POSTCOREDUMP; core_state = signal->core_state; spin_unlock_irq(&sighand->siglock); if (unlikely(core_state)) coredump_task_exit(tsk, core_state); } void __noreturn do_exit(long code) { struct task_struct *tsk = current; int group_dead; WARN_ON(irqs_disabled()); WARN_ON(tsk->plug); kcov_task_exit(tsk); kmsan_task_exit(tsk); synchronize_group_exit(tsk, code); ptrace_event(PTRACE_EVENT_EXIT, code); user_events_exit(tsk); io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ seccomp_filter_release(tsk); acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { /* * If the last thread of global init has exited, panic * immediately to get a useable coredump. */ if (unlikely(is_global_init(tsk))) panic("Attempted to kill init! exitcode=0x%08x\n", tsk->signal->group_exit_code ?: (int)code); #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk); #endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); } acct_collect(code, group_dead); if (group_dead) tty_audit_exit(); audit_free(tsk); tsk->exit_code = code; taskstats_exit(tsk, group_dead); unwind_deferred_task_exit(tsk); trace_sched_process_exit(tsk, group_dead); /* * Since sampling can touch ->mm, make sure to stop everything before we * tear it down. * * Also flushes inherited counters to the parent - before the parent * gets woken up by child-exit notifications. */ perf_event_exit_task(tsk); exit_mm(); if (group_dead) acct_process(); exit_sem(tsk); exit_shm(tsk); exit_files(tsk); exit_fs(tsk); if (group_dead) disassociate_ctty(1); exit_task_namespaces(tsk); exit_task_work(tsk); exit_thread(tsk); sched_autogroup_exit_task(tsk); cgroup_exit(tsk); /* * FIXME: do that only when needed, using sched_exit tracepoint */ flush_ptrace_hw_breakpoint(tsk); exit_tasks_rcu_start(); exit_notify(tsk, group_dead); proc_exit_connector(tsk); mpol_put_task_policy(tsk); #ifdef CONFIG_FUTEX if (unlikely(current->pi_state_cache)) kfree(current->pi_state_cache); #endif /* * Make sure we are holding no locks: */ debug_check_no_locks_held(); if (tsk->io_context) exit_io_context(tsk); if (tsk->splice_pipe) free_pipe_info(tsk->splice_pipe); if (tsk->task_frag.page) put_page(tsk->task_frag.page); exit_task_stack_account(tsk); check_stack_usage(); preempt_disable(); if (tsk->nr_dirtied) __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); exit_rcu(); exit_tasks_rcu_finish(); lockdep_free_task(tsk); do_task_dead(); } void __noreturn make_task_dead(int signr) { /* * Take the task off the cpu after something catastrophic has * happened. * * We can get here from a kernel oops, sometimes with preemption off. * Start by checking for critical errors. * Then fix up important state like USER_DS and preemption. * Then do everything else. */ struct task_struct *tsk = current; unsigned int limit; if (unlikely(in_interrupt())) panic("Aiee, killing interrupt handler!"); if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); if (unlikely(irqs_disabled())) { pr_info("note: %s[%d] exited with irqs disabled\n", current->comm, task_pid_nr(current)); local_irq_enable(); } if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", current->comm, task_pid_nr(current), preempt_count()); preempt_count_set(PREEMPT_ENABLED); } /* * Every time the system oopses, if the oops happens while a reference * to an object was held, the reference leaks. * If the oops doesn't also leak memory, repeated oopsing can cause * reference counters to wrap around (if they're not using refcount_t). * This means that repeated oopsing can make unexploitable-looking bugs * exploitable through repeated oopsing. * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ limit = READ_ONCE(oops_limit); if (atomic_inc_return(&oops_count) >= limit && limit) panic("Oopsed too often (kernel.oops_limit is %d)", limit); /* * We're taking recursive faults here in make_task_dead. Safest is to just * leave this task alone and wait for reboot. */ if (unlikely(tsk->flags & PF_EXITING)) { pr_alert("Fixing recursive fault but reboot is needed!\n"); futex_exit_recursive(tsk); tsk->exit_state = EXIT_DEAD; refcount_inc(&tsk->rcu_users); do_task_dead(); } do_exit(signr); } SYSCALL_DEFINE1(exit, int, error_code) { do_exit((error_code&0xff)<<8); } /* * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). */ void __noreturn do_group_exit(int exit_code) { struct signal_struct *sig = current->signal; if (sig->flags & SIGNAL_GROUP_EXIT) exit_code = sig->group_exit_code; else if (sig->group_exec_task) exit_code = 0; else { struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); if (sig->flags & SIGNAL_GROUP_EXIT) /* Another thread got here before we took the lock. */ exit_code = sig->group_exit_code; else if (sig->group_exec_task) exit_code = 0; else { sig->group_exit_code = exit_code; sig->flags = SIGNAL_GROUP_EXIT; zap_other_threads(current); } spin_unlock_irq(&sighand->siglock); } do_exit(exit_code); /* NOTREACHED */ } /* * this kills every thread in the thread group. Note that any externally * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ SYSCALL_DEFINE1(exit_group, int, error_code) { do_group_exit((error_code & 0xff) << 8); /* NOTREACHED */ return 0; } static int eligible_pid(struct wait_opts *wo, struct task_struct *p) { return wo->wo_type == PIDTYPE_MAX || task_pid_type(p, wo->wo_type) == wo->wo_pid; } static int eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p) { if (!eligible_pid(wo, p)) return 0; /* * Wait for all children (clone and not) if __WALL is set or * if it is traced by us. */ if (ptrace || (wo->wo_flags & __WALL)) return 1; /* * Otherwise, wait for clone children *only* if __WCLONE is set; * otherwise, wait for non-clone children *only*. * * Note: a "clone" child here is one that reports to its parent * using a signal other than SIGCHLD, or a non-leader thread which * we can only see if it is traced by us. */ if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) return 0; return 1; } /* * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold * read_lock(&tasklist_lock) on entry. If we return zero, we still hold * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) { int state, status; pid_t pid = task_pid_vnr(p); uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); struct waitid_info *infop; if (!likely(wo->wo_flags & WEXITED)) return 0; if (unlikely(wo->wo_flags & WNOWAIT)) { status = (p->signal->flags & SIGNAL_GROUP_EXIT) ? p->signal->group_exit_code : p->exit_code; get_task_struct(p); read_unlock(&tasklist_lock); sched_annotate_sleep(); if (wo->wo_rusage) getrusage(p, RUSAGE_BOTH, wo->wo_rusage); put_task_struct(p); goto out_info; } /* * Move the task's state to DEAD/TRACE, only one thread can do this. */ state = (ptrace_reparented(p) && thread_group_leader(p)) ? EXIT_TRACE : EXIT_DEAD; if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) return 0; /* * We own this thread, nobody else can reap it. */ read_unlock(&tasklist_lock); sched_annotate_sleep(); /* * Check thread_group_leader() to exclude the traced sub-threads. */ if (state == EXIT_DEAD && thread_group_leader(p)) { struct signal_struct *sig = p->signal; struct signal_struct *psig = current->signal; unsigned long maxrss; u64 tgutime, tgstime; /* * The resource counters for the group leader are in its * own task_struct. Those for dead threads in the group * are in its signal_struct, as are those for the child * processes it has previously reaped. All these * accumulate in the parent's signal_struct c* fields. * * We don't bother to take a lock here to protect these * p->signal fields because the whole thread group is dead * and nobody can change them. * * psig->stats_lock also protects us from our sub-threads * which can reap other children at the same time. * * We use thread_group_cputime_adjusted() to get times for * the thread group, which consolidates times for all threads * in the group including the group leader. */ thread_group_cputime_adjusted(p, &tgutime, &tgstime); write_seqlock_irq(&psig->stats_lock); psig->cutime += tgutime + sig->cutime; psig->cstime += tgstime + sig->cstime; psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; psig->cmin_flt += p->min_flt + sig->min_flt + sig->cmin_flt; psig->cmaj_flt += p->maj_flt + sig->maj_flt + sig->cmaj_flt; psig->cnvcsw += p->nvcsw + sig->nvcsw + sig->cnvcsw; psig->cnivcsw += p->nivcsw + sig->nivcsw + sig->cnivcsw; psig->cinblock += task_io_get_inblock(p) + sig->inblock + sig->cinblock; psig->coublock += task_io_get_oublock(p) + sig->oublock + sig->coublock; maxrss = max(sig->maxrss, sig->cmaxrss); if (psig->cmaxrss < maxrss) psig->cmaxrss = maxrss; task_io_accounting_add(&psig->ioac, &p->ioac); task_io_accounting_add(&psig->ioac, &sig->ioac); write_sequnlock_irq(&psig->stats_lock); } if (wo->wo_rusage) getrusage(p, RUSAGE_BOTH, wo->wo_rusage); status = (p->signal->flags & SIGNAL_GROUP_EXIT) ? p->signal->group_exit_code : p->exit_code; wo->wo_stat = status; if (state == EXIT_TRACE) { write_lock_irq(&tasklist_lock); /* We dropped tasklist, ptracer could die and untrace */ ptrace_unlink(p); /* If parent wants a zombie, don't release it now */ state = EXIT_ZOMBIE; if (do_notify_parent(p, p->exit_signal)) state = EXIT_DEAD; p->exit_state = state; write_unlock_irq(&tasklist_lock); } if (state == EXIT_DEAD) release_task(p); out_info: infop = wo->wo_info; if (infop) { if ((status & 0x7f) == 0) { infop->cause = CLD_EXITED; infop->status = status >> 8; } else { infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; infop->status = status & 0x7f; } infop->pid = pid; infop->uid = uid; } return pid; } static int *task_stopped_code(struct task_struct *p, bool ptrace) { if (ptrace) { if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING)) return &p->exit_code; } else { if (p->signal->flags & SIGNAL_STOP_STOPPED) return &p->signal->group_exit_code; } return NULL; } /** * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED * @wo: wait options * @ptrace: is the wait for ptrace * @p: task to wait for * * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. * * CONTEXT: * read_lock(&tasklist_lock), which is released if return value is * non-zero. Also, grabs and releases @p->sighand->siglock. * * RETURNS: * 0 if wait condition didn't exist and search for other wait conditions * should continue. Non-zero return, -errno on failure and @p's pid on * success, implies that tasklist_lock is released and wait condition * search should terminate. */ static int wait_task_stopped(struct wait_opts *wo, int ptrace, struct task_struct *p) { struct waitid_info *infop; int exit_code, *p_code, why; uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; /* * Traditionally we see ptrace'd stopped tasks regardless of options. */ if (!ptrace && !(wo->wo_flags & WUNTRACED)) return 0; if (!task_stopped_code(p, ptrace)) return 0; exit_code = 0; spin_lock_irq(&p->sighand->siglock); p_code = task_stopped_code(p, ptrace); if (unlikely(!p_code)) goto unlock_sig; exit_code = *p_code; if (!exit_code) goto unlock_sig; if (!unlikely(wo->wo_flags & WNOWAIT)) *p_code = 0; uid = from_kuid_munged(current_user_ns(), task_uid(p)); unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) return 0; /* * Now we are pretty sure this task is interesting. * Make sure it doesn't get reaped out from under us while we * give up the lock and then examine it below. We don't want to * keep holding onto the tasklist_lock while we call getrusage and * possibly take page faults for user memory. */ get_task_struct(p); pid = task_pid_vnr(p); why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); sched_annotate_sleep(); if (wo->wo_rusage) getrusage(p, RUSAGE_BOTH, wo->wo_rusage); put_task_struct(p); if (likely(!(wo->wo_flags & WNOWAIT))) wo->wo_stat = (exit_code << 8) | 0x7f; infop = wo->wo_info; if (infop) { infop->cause = why; infop->status = exit_code; infop->pid = pid; infop->uid = uid; } return pid; } /* * Handle do_wait work for one task in a live, non-stopped state. * read_lock(&tasklist_lock) on entry. If we return zero, we still hold * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) { struct waitid_info *infop; pid_t pid; uid_t uid; if (!unlikely(wo->wo_flags & WCONTINUED)) return 0; if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) return 0; spin_lock_irq(&p->sighand->siglock); /* Re-check with the lock held. */ if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) { spin_unlock_irq(&p->sighand->siglock); return 0; } if (!unlikely(wo->wo_flags & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; uid = from_kuid_munged(current_user_ns(), task_uid(p)); spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); get_task_struct(p); read_unlock(&tasklist_lock); sched_annotate_sleep(); if (wo->wo_rusage) getrusage(p, RUSAGE_BOTH, wo->wo_rusage); put_task_struct(p); infop = wo->wo_info; if (!infop) { wo->wo_stat = 0xffff; } else { infop->cause = CLD_CONTINUED; infop->pid = pid; infop->uid = uid; infop->status = SIGCONT; } return pid; } /* * Consider @p for a wait by @parent. * * -ECHILD should be in ->notask_error before the first call. * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; * then ->notask_error is 0 if @p is an eligible child, * or still -ECHILD. */ static int wait_consider_task(struct wait_opts *wo, int ptrace, struct task_struct *p) { /* * We can race with wait_task_zombie() from another thread. * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition * can't confuse the checks below. */ int exit_state = READ_ONCE(p->exit_state); int ret; if (unlikely(exit_state == EXIT_DEAD)) return 0; ret = eligible_child(wo, ptrace, p); if (!ret) return ret; if (unlikely(exit_state == EXIT_TRACE)) { /* * ptrace == 0 means we are the natural parent. In this case * we should clear notask_error, debugger will notify us. */ if (likely(!ptrace)) wo->notask_error = 0; return 0; } if (likely(!ptrace) && unlikely(p->ptrace)) { /* * If it is traced by its real parent's group, just pretend * the caller is ptrace_do_wait() and reap this child if it * is zombie. * * This also hides group stop state from real parent; otherwise * a single stop can be reported twice as group and ptrace stop. * If a ptracer wants to distinguish these two events for its * own children it should create a separate process which takes * the role of real parent. */ if (!ptrace_reparented(p)) ptrace = 1; } /* slay zombie? */ if (exit_state == EXIT_ZOMBIE) { /* we don't reap group leaders with subthreads */ if (!delay_group_leader(p)) { /* * A zombie ptracee is only visible to its ptracer. * Notification and reaping will be cascaded to the * real parent when the ptracer detaches. */ if (unlikely(ptrace) || likely(!p->ptrace)) return wait_task_zombie(wo, p); } /* * Allow access to stopped/continued state via zombie by * falling through. Clearing of notask_error is complex. * * When !@ptrace: * * If WEXITED is set, notask_error should naturally be * cleared. If not, subset of WSTOPPED|WCONTINUED is set, * so, if there are live subthreads, there are events to * wait for. If all subthreads are dead, it's still safe * to clear - this function will be called again in finite * amount time once all the subthreads are released and * will then return without clearing. * * When @ptrace: * * Stopped state is per-task and thus can't change once the * target task dies. Only continued and exited can happen. * Clear notask_error if WCONTINUED | WEXITED. */ if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) wo->notask_error = 0; } else { /* * @p is alive and it's gonna stop, continue or exit, so * there always is something to wait for. */ wo->notask_error = 0; } /* * Wait for stopped. Depending on @ptrace, different stopped state * is used and the two don't interact with each other. */ ret = wait_task_stopped(wo, ptrace, p); if (ret) return ret; /* * Wait for continued. There's only one continued state and the * ptracer can consume it which can confuse the real parent. Don't * use WCONTINUED from ptracer. You don't need or want it. */ return wait_task_continued(wo, p); } /* * Do the work of do_wait() for one thread in the group, @tsk. * * -ECHILD should be in ->notask_error before the first call. * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; then * ->notask_error is 0 if there were any eligible children, * or still -ECHILD. */ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) { struct task_struct *p; list_for_each_entry(p, &tsk->children, sibling) { int ret = wait_consider_task(wo, 0, p); if (ret) return ret; } return 0; } static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) { struct task_struct *p; list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { int ret = wait_consider_task(wo, 1, p); if (ret) return ret; } return 0; } bool pid_child_should_wake(struct wait_opts *wo, struct task_struct *p) { if (!eligible_pid(wo, p)) return false; if ((wo->wo_flags & __WNOTHREAD) && wo->child_wait.private != p->parent) return false; return true; } static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait); struct task_struct *p = key; if (pid_child_should_wake(wo, p)) return default_wake_function(wait, mode, sync, key); return 0; } void __wake_up_parent(struct task_struct *p, struct task_struct *parent) { __wake_up_sync_key(&parent->signal->wait_chldexit, TASK_INTERRUPTIBLE, p); } static bool is_effectively_child(struct wait_opts *wo, bool ptrace, struct task_struct *target) { struct task_struct *parent = !ptrace ? target->real_parent : target->parent; return current == parent || (!(wo->wo_flags & __WNOTHREAD) && same_thread_group(current, parent)); } /* * Optimization for waiting on PIDTYPE_PID. No need to iterate through child * and tracee lists to find the target task. */ static int do_wait_pid(struct wait_opts *wo) { bool ptrace; struct task_struct *target; int retval; ptrace = false; target = pid_task(wo->wo_pid, PIDTYPE_TGID); if (target && is_effectively_child(wo, ptrace, target)) { retval = wait_consider_task(wo, ptrace, target); if (retval) return retval; } ptrace = true; target = pid_task(wo->wo_pid, PIDTYPE_PID); if (target && target->ptrace && is_effectively_child(wo, ptrace, target)) { retval = wait_consider_task(wo, ptrace, target); if (retval) return retval; } return 0; } long __do_wait(struct wait_opts *wo) { long retval; /* * If there is nothing that can match our criteria, just get out. * We will clear ->notask_error to zero if we see any child that * might later match our criteria, even if we are not able to reap * it yet. */ wo->notask_error = -ECHILD; if ((wo->wo_type < PIDTYPE_MAX) && (!wo->wo_pid || !pid_has_task(wo->wo_pid, wo->wo_type))) goto notask; read_lock(&tasklist_lock); if (wo->wo_type == PIDTYPE_PID) { retval = do_wait_pid(wo); if (retval) return retval; } else { struct task_struct *tsk = current; do { retval = do_wait_thread(wo, tsk); if (retval) return retval; retval = ptrace_do_wait(wo, tsk); if (retval) return retval; if (wo->wo_flags & __WNOTHREAD) break; } while_each_thread(current, tsk); } read_unlock(&tasklist_lock); notask: retval = wo->notask_error; if (!retval && !(wo->wo_flags & WNOHANG)) return -ERESTARTSYS; return retval; } static long do_wait(struct wait_opts *wo) { int retval; trace_sched_process_wait(wo->wo_pid); init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); wo->child_wait.private = current; add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait); do { set_current_state(TASK_INTERRUPTIBLE); retval = __do_wait(wo); if (retval != -ERESTARTSYS) break; if (signal_pending(current)) break; schedule(); } while (1); __set_current_state(TASK_RUNNING); remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait); return retval; } int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid, struct waitid_info *infop, int options, struct rusage *ru) { unsigned int f_flags = 0; struct pid *pid = NULL; enum pid_type type; if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED| __WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; if (!(options & (WEXITED|WSTOPPED|WCONTINUED))) return -EINVAL; switch (which) { case P_ALL: type = PIDTYPE_MAX; break; case P_PID: type = PIDTYPE_PID; if (upid <= 0) return -EINVAL; pid = find_get_pid(upid); break; case P_PGID: type = PIDTYPE_PGID; if (upid < 0) return -EINVAL; if (upid) pid = find_get_pid(upid); else pid = get_task_pid(current, PIDTYPE_PGID); break; case P_PIDFD: type = PIDTYPE_PID; if (upid < 0) return -EINVAL; pid = pidfd_get_pid(upid, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); break; default: return -EINVAL; } wo->wo_type = type; wo->wo_pid = pid; wo->wo_flags = options; wo->wo_info = infop; wo->wo_rusage = ru; if (f_flags & O_NONBLOCK) wo->wo_flags |= WNOHANG; return 0; } static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, int options, struct rusage *ru) { struct wait_opts wo; long ret; ret = kernel_waitid_prepare(&wo, which, upid, infop, options, ru); if (ret) return ret; ret = do_wait(&wo); if (!ret && !(options & WNOHANG) && (wo.wo_flags & WNOHANG)) ret = -EAGAIN; put_pid(wo.wo_pid); return ret; } SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, infop, int, options, struct rusage __user *, ru) { struct rusage r; struct waitid_info info = {.status = 0}; long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); int signo = 0; if (err > 0) { signo = SIGCHLD; err = 0; if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) return -EFAULT; } if (!infop) return err; if (!user_write_access_begin(infop, sizeof(*infop))) return -EFAULT; unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); unsafe_put_user(info.cause, &infop->si_code, Efault); unsafe_put_user(info.pid, &infop->si_pid, Efault); unsafe_put_user(info.uid, &infop->si_uid, Efault); unsafe_put_user(info.status, &infop->si_status, Efault); user_write_access_end(); return err; Efault: user_write_access_end(); return -EFAULT; } long kernel_wait4(pid_t upid, int __user *stat_addr, int options, struct rusage *ru) { struct wait_opts wo; struct pid *pid = NULL; enum pid_type type; long ret; if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| __WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; /* -INT_MIN is not defined */ if (upid == INT_MIN) return -ESRCH; if (upid == -1) type = PIDTYPE_MAX; else if (upid < 0) { type = PIDTYPE_PGID; pid = find_get_pid(-upid); } else if (upid == 0) { type = PIDTYPE_PGID; pid = get_task_pid(current, PIDTYPE_PGID); } else /* upid > 0 */ { type = PIDTYPE_PID; pid = find_get_pid(upid); } wo.wo_type = type; wo.wo_pid = pid; wo.wo_flags = options | WEXITED; wo.wo_info = NULL; wo.wo_stat = 0; wo.wo_rusage = ru; ret = do_wait(&wo); put_pid(pid); if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr)) ret = -EFAULT; return ret; } int kernel_wait(pid_t pid, int *stat) { struct wait_opts wo = { .wo_type = PIDTYPE_PID, .wo_pid = find_get_pid(pid), .wo_flags = WEXITED, }; int ret; ret = do_wait(&wo); if (ret > 0 && wo.wo_stat) *stat = wo.wo_stat; put_pid(wo.wo_pid); return ret; } SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, int, options, struct rusage __user *, ru) { struct rusage r; long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL); if (err > 0) { if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) return -EFAULT; } return err; } #ifdef __ARCH_WANT_SYS_WAITPID /* * sys_waitpid() remains for compatibility. waitpid() should be * implemented by calling sys_wait4() from libc.a. */ SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) { return kernel_wait4(pid, stat_addr, options, NULL); } #endif #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(wait4, compat_pid_t, pid, compat_uint_t __user *, stat_addr, int, options, struct compat_rusage __user *, ru) { struct rusage r; long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL); if (err > 0) { if (ru && put_compat_rusage(&r, ru)) return -EFAULT; } return err; } COMPAT_SYSCALL_DEFINE5(waitid, int, which, compat_pid_t, pid, struct compat_siginfo __user *, infop, int, options, struct compat_rusage __user *, uru) { struct rusage ru; struct waitid_info info = {.status = 0}; long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL); int signo = 0; if (err > 0) { signo = SIGCHLD; err = 0; if (uru) { /* kernel_waitid() overwrites everything in ru */ if (COMPAT_USE_64BIT_TIME) err = copy_to_user(uru, &ru, sizeof(ru)); else err = put_compat_rusage(&ru, uru); if (err) return -EFAULT; } } if (!infop) return err; if (!user_write_access_begin(infop, sizeof(*infop))) return -EFAULT; unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); unsafe_put_user(info.cause, &infop->si_code, Efault); unsafe_put_user(info.pid, &infop->si_pid, Efault); unsafe_put_user(info.uid, &infop->si_uid, Efault); unsafe_put_user(info.status, &infop->si_status, Efault); user_write_access_end(); return err; Efault: user_write_access_end(); return -EFAULT; } #endif /* * This needs to be __function_aligned as GCC implicitly makes any * implementation of abort() cold and drops alignment specified by * -falign-functions=N. * * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c11 */ __weak __function_aligned void abort(void) { BUG(); /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } EXPORT_SYMBOL(abort);
8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 // SPDX-License-Identifier: GPL-2.0 /* * This file contains functions which emulate a local clock-event * device via a broadcast event source. * * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/module.h> #include "tick-internal.h" /* * Broadcast support for broken x86 hardware, where the local apic * timer stops in C3 state. */ static struct tick_device tick_broadcast_device; static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; static cpumask_var_t tmpmask __cpumask_var_read_mostly; static int tick_broadcast_forced; static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); #ifdef CONFIG_TICK_ONESHOT static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic); static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); # ifdef CONFIG_HOTPLUG_CPU static void tick_broadcast_oneshot_offline(unsigned int cpu); # endif #else static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); } static inline void tick_broadcast_clear_oneshot(int cpu) { } static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } # ifdef CONFIG_HOTPLUG_CPU static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } # endif #endif /* * Debugging: see timer_list.c */ struct tick_device *tick_get_broadcast_device(void) { return &tick_broadcast_device; } struct cpumask *tick_get_broadcast_mask(void) { return tick_broadcast_mask; } static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); const struct clock_event_device *tick_get_wakeup_device(int cpu) { return tick_get_oneshot_wakeup_device(cpu); } /* * Start the device in periodic mode */ static void tick_broadcast_start_periodic(struct clock_event_device *bc) { if (bc) tick_setup_periodic(bc, 1); } /* * Check, if the device can be utilized as broadcast device: */ static bool tick_check_broadcast_device(struct clock_event_device *curdev, struct clock_event_device *newdev) { if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || (newdev->features & CLOCK_EVT_FEAT_PERCPU) || (newdev->features & CLOCK_EVT_FEAT_C3STOP)) return false; if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) return false; return !curdev || newdev->rating > curdev->rating; } #ifdef CONFIG_TICK_ONESHOT static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) { return per_cpu(tick_oneshot_wakeup_device, cpu); } static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) { /* * If we woke up early and the tick was reprogrammed in the * meantime then this may be spurious but harmless. */ tick_receive_broadcast(); } static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, int cpu) { struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); if (!newdev) goto set_device; if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || (newdev->features & CLOCK_EVT_FEAT_C3STOP)) return false; if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) return false; if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) return false; if (curdev && newdev->rating <= curdev->rating) return false; if (!try_module_get(newdev->owner)) return false; newdev->event_handler = tick_oneshot_wakeup_handler; set_device: clockevents_exchange_device(curdev, newdev); per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; return true; } #else static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) { return NULL; } static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, int cpu) { return false; } #endif /* * Conditionally install/replace broadcast device */ void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) { struct clock_event_device *cur = tick_broadcast_device.evtdev; if (tick_set_oneshot_wakeup_device(dev, cpu)) return; if (!tick_check_broadcast_device(cur, dev)) return; if (!try_module_get(dev->owner)) return; clockevents_exchange_device(cur, dev); if (cur) cur->event_handler = clockevents_handle_noop; tick_broadcast_device.evtdev = dev; if (!cpumask_empty(tick_broadcast_mask)) tick_broadcast_start_periodic(dev); if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return; /* * If the system already runs in oneshot mode, switch the newly * registered broadcast device to oneshot mode explicitly. */ if (tick_broadcast_oneshot_active()) { tick_broadcast_switch_to_oneshot(); return; } /* * Inform all cpus about this. We might be in a situation * where we did not switch to oneshot mode because the per cpu * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack * of a oneshot capable broadcast device. Without that * notification the systems stays stuck in periodic mode * forever. */ tick_clock_notify(); } /* * Check, if the device is the broadcast device */ int tick_is_broadcast_device(struct clock_event_device *dev) { return (dev && tick_broadcast_device.evtdev == dev); } int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { int ret = -ENODEV; if (tick_is_broadcast_device(dev)) { raw_spin_lock(&tick_broadcast_lock); ret = __clockevents_update_freq(dev, freq); raw_spin_unlock(&tick_broadcast_lock); } return ret; } static void err_broadcast(const struct cpumask *mask) { pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); } static void tick_device_setup_broadcast_func(struct clock_event_device *dev) { if (!dev->broadcast) dev->broadcast = tick_broadcast; if (!dev->broadcast) { pr_warn_once("%s depends on broadcast, but no broadcast function available\n", dev->name); dev->broadcast = err_broadcast; } } /* * Check, if the device is dysfunctional and a placeholder, which * needs to be handled by the broadcast device. */ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Devices might be registered with both periodic and oneshot * mode disabled. This signals, that the device needs to be * operated from the broadcast device and is a placeholder for * the cpu local device. */ if (!tick_device_is_functional(dev)) { dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc, false); ret = 1; } else { /* * Clear the broadcast bit for this cpu if the * device is not power state affected. */ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) cpumask_clear_cpu(cpu, tick_broadcast_mask); else tick_device_setup_broadcast_func(dev); /* * Clear the broadcast bit if the CPU is not in * periodic broadcast on state. */ if (!cpumask_test_cpu(cpu, tick_broadcast_on)) cpumask_clear_cpu(cpu, tick_broadcast_mask); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_ONESHOT: /* * If the system is in oneshot mode we can * unconditionally clear the oneshot mask bit, * because the CPU is running and therefore * not in an idle state which causes the power * state affected device to stop. Let the * caller initialize the device. */ tick_broadcast_clear_oneshot(cpu); ret = 0; break; case TICKDEV_MODE_PERIODIC: /* * If the system is in periodic mode, check * whether the broadcast device can be * switched off now. */ if (cpumask_empty(tick_broadcast_mask) && bc) clockevents_shutdown(bc); /* * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt * is delivered by the broadcast device, if * the broadcast device exists and is not * hrtimer based. */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; } int tick_receive_broadcast(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *evt = td->evtdev; if (!evt) return -ENODEV; if (!evt->event_handler) return -EINVAL; evt->event_handler(evt); return 0; } /* * Broadcast the event to the cpus, which are set in the mask (mangled). */ static bool tick_do_broadcast(struct cpumask *mask) { int cpu = smp_processor_id(); struct tick_device *td; bool local = false; /* * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { struct clock_event_device *bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, mask); /* * We only run the local handler, if the broadcast * device is not hrtimer based. Otherwise we run into * a hrtimer recursion. * * local timer_interrupt() * local_handler() * expire_hrtimers() * bc_handler() * local_handler() * expire_hrtimers() */ local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { /* * It might be necessary to actually check whether the devices * have different broadcast functions. For now, just use the * one of the first device. This works as long as we have this * misfeature only on x86 (lapic) */ td = &per_cpu(tick_cpu_device, cpumask_first(mask)); td->evtdev->broadcast(mask); } return local; } /* * Periodic broadcast: * - invoke the broadcast handlers */ static bool tick_do_periodic_broadcast(void) { cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); return tick_do_broadcast(tmpmask); } /* * Event handler for periodic broadcast ticks */ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); bool bc_local; raw_spin_lock(&tick_broadcast_lock); /* Handle spurious interrupts gracefully */ if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { raw_spin_unlock(&tick_broadcast_lock); return; } bc_local = tick_do_periodic_broadcast(); if (clockevent_state_oneshot(dev)) { ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); clockevents_program_event(dev, next, true); } raw_spin_unlock(&tick_broadcast_lock); /* * We run the handler of the local cpu after dropping * tick_broadcast_lock because the handler might deadlock when * trying to switch to oneshot mode. */ if (bc_local) td->evtdev->event_handler(td->evtdev); } /** * tick_broadcast_control - Enable/disable or force broadcast mode * @mode: The selected broadcast mode * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; unsigned long flags; /* Protects also the local clockevent device. */ raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) goto out; if (!tick_device_is_functional(dev)) goto out; cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); switch (mode) { case TICK_BROADCAST_FORCE: tick_broadcast_forced = 1; fallthrough; case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { /* * Only shutdown the cpu local device, if: * * - the broadcast device exists * - the broadcast device is not a hrtimer based one * - the broadcast device is in periodic mode to * avoid a hiccup during switch to oneshot mode */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; case TICK_BROADCAST_OFF: if (tick_broadcast_forced) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); } break; } if (bc) { if (cpumask_empty(tick_broadcast_mask)) { if (!bc_stopped) clockevents_shutdown(bc); } else if (bc_stopped) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc, false); } } out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } EXPORT_SYMBOL_GPL(tick_broadcast_control); /* * Set the periodic handler depending on broadcast on/off */ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) { if (!broadcast) dev->event_handler = tick_handle_periodic; else dev->event_handler = tick_handle_periodic_broadcast; } #ifdef CONFIG_HOTPLUG_CPU static void tick_shutdown_broadcast(void) { struct clock_event_device *bc = tick_broadcast_device.evtdev; if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { if (bc && cpumask_empty(tick_broadcast_mask)) clockevents_shutdown(bc); } } /* * Remove a CPU from broadcasting */ void tick_broadcast_offline(unsigned int cpu) { raw_spin_lock(&tick_broadcast_lock); cpumask_clear_cpu(cpu, tick_broadcast_mask); cpumask_clear_cpu(cpu, tick_broadcast_on); tick_broadcast_oneshot_offline(cpu); tick_shutdown_broadcast(); raw_spin_unlock(&tick_broadcast_lock); } #endif void tick_suspend_broadcast(void) { struct clock_event_device *bc; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc) clockevents_shutdown(bc); raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* * This is called from tick_resume_local() on a resuming CPU. That's * called from the core resume function, tick_unfreeze() and the magic XEN * resume hackery. * * In none of these cases the broadcast device mode can change and the * bit of the resuming CPU in the broadcast mask is safe as well. */ bool tick_resume_check_broadcast(void) { if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) return false; else return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); } void tick_resume_broadcast(void) { struct clock_event_device *bc; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc) { clockevents_tick_resume(bc); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_PERIODIC: if (!cpumask_empty(tick_broadcast_mask)) tick_broadcast_start_periodic(bc); break; case TICKDEV_MODE_ONESHOT: if (!cpumask_empty(tick_broadcast_mask)) tick_resume_broadcast_oneshot(bc); break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } #ifdef CONFIG_TICK_ONESHOT static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; /* * Exposed for debugging: see timer_list.c */ struct cpumask *tick_get_broadcast_oneshot_mask(void) { return tick_broadcast_oneshot_mask; } /* * Called before going idle with interrupts disabled. Checks whether a * broadcast event from the other core is about to happen. We detected * that in tick_broadcast_oneshot_control(). The callsite can use this * to avoid a deep idle transition as we are about to get the * broadcast IPI right away. */ noinstr int tick_check_broadcast_expired(void) { #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask)); #else return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); #endif } /* * Set broadcast interrupt affinity */ static void tick_broadcast_set_affinity(struct clock_event_device *bc, const struct cpumask *cpumask) { if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) return; if (cpumask_equal(bc->cpumask, cpumask)) return; bc->cpumask = cpumask; irq_set_affinity(bc->irq, bc->cpumask); } static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, ktime_t expires) { if (!clockevent_state_oneshot(bc)) clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(bc, expires, 1); tick_broadcast_set_affinity(bc, cpumask_of(cpu)); } static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); } /* * Called from irq_enter() when idle was interrupted to reenable the * per cpu device. */ void tick_check_oneshot_broadcast_this_cpu(void) { if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); /* * We might be in the middle of switching over from * periodic to oneshot. If the CPU has not yet * switched over, leave the device alone. */ if (td->mode == TICKDEV_MODE_ONESHOT) { clockevents_switch_state(td->evtdev, CLOCK_EVT_STATE_ONESHOT); } } } /* * Handle oneshot mode broadcasting */ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) { struct tick_device *td; ktime_t now, next_event; int cpu, next_cpu = 0; bool bc_local; raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; next_event = KTIME_MAX; cpumask_clear(tmpmask); now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { /* * Required for !SMP because for_each_cpu() reports * unconditionally CPU0 as set on UP kernels. */ if (!IS_ENABLED(CONFIG_SMP) && cpumask_empty(tick_broadcast_oneshot_mask)) break; td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event <= now) { cpumask_set_cpu(cpu, tmpmask); /* * Mark the remote cpu in the pending mask, so * it can avoid reprogramming the cpu local * timer in tick_broadcast_oneshot_control(). */ cpumask_set_cpu(cpu, tick_broadcast_pending_mask); } else if (td->evtdev->next_event < next_event) { next_event = td->evtdev->next_event; next_cpu = cpu; } } /* * Remove the current cpu from the pending mask. The event is * delivered immediately in tick_do_broadcast() ! */ cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); /* Take care of enforced broadcast requests */ cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); cpumask_clear(tick_broadcast_force_mask); /* * Sanity check. Catch the case where we try to broadcast to * offline cpus. */ if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) cpumask_and(tmpmask, tmpmask, cpu_online_mask); /* * Wakeup the cpus which have an expired event. */ bc_local = tick_do_broadcast(tmpmask); /* * Two reasons for reprogram: * * - The global event did not expire any CPU local * events. This happens in dyntick mode, as the maximum PIT * delta is quite small. * * - There are pending events on sleeping CPUs which were not * in the event mask */ if (next_event != KTIME_MAX) tick_broadcast_set_event(dev, next_cpu, next_event); raw_spin_unlock(&tick_broadcast_lock); if (bc_local) { td = this_cpu_ptr(&tick_cpu_device); td->evtdev->event_handler(td->evtdev); } } static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) { if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) return 0; if (bc->next_event == KTIME_MAX) return 0; return bc->bound_on == cpu ? -EBUSY : 0; } static void broadcast_shutdown_local(struct clock_event_device *bc, struct clock_event_device *dev) { /* * For hrtimer based broadcasting we cannot shutdown the cpu * local device if our own event is the first one to expire or * if we own the broadcast timer. */ if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { if (broadcast_needs_cpu(bc, smp_processor_id())) return; if (dev->next_event < bc->next_event) return; } clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, struct tick_device *td, int cpu) { struct clock_event_device *bc, *dev = td->evtdev; int ret = 0; ktime_t now; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; if (state == TICK_BROADCAST_ENTER) { /* * If the current CPU owns the hrtimer broadcast * mechanism, it cannot go deep idle and we do not add * the CPU to the broadcast mask. We don't have to go * through the EXIT path as the local timer is not * shutdown. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) goto out; /* * If the broadcast device is in periodic mode, we * return. */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { /* If it is a hrtimer based broadcast, return busy */ if (bc->features & CLOCK_EVT_FEAT_HRTIMER) ret = -EBUSY; goto out; } if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be * woken by the IPI right away; we return * busy, so the CPU does not try to go deep * idle. */ if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { ret = -EBUSY; } else if (dev->next_event < bc->next_event) { tick_broadcast_set_event(bc, cpu, dev->next_event); /* * In case of hrtimer broadcasts the * programming might have moved the * timer to this cpu. If yes, remove * us from the broadcast mask and * return busy. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) { cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } } } } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); /* * The cpu which was handling the broadcast * timer marked this cpu in the broadcast * pending mask and fired the broadcast * IPI. So we are going to handle the expired * event anyway via the broadcast IPI * handler. No need to reprogram the timer * with an already expired event. */ if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_pending_mask)) goto out; /* * Bail out if there is no next event. */ if (dev->next_event == KTIME_MAX) goto out; /* * If the pending bit is not set, then we are * either the CPU handling the broadcast * interrupt or we got woken by something else. * * We are no longer in the broadcast mask, so * if the cpu local expiry time is already * reached, we would reprogram the cpu local * timer with an already expired event. * * This can lead to a ping-pong when we return * to idle and therefore rearm the broadcast * timer before the cpu local timer was able * to fire. This happens because the forced * reprogramming makes sure that the event * will happen in the future and depending on * the min_delta setting this might be far * enough out that the ping-pong starts. * * If the cpu local next_event has expired * then we know that the broadcast timer * next_event has expired as well and * broadcast is about to be handled. So we * avoid reprogramming and enforce that the * broadcast handler, which did not run yet, * will invoke the cpu local handler. * * We cannot call the handler directly from * here, because we might be in a NOHZ phase * and we did not go through the irq_enter() * nohz fixups. */ now = ktime_get(); if (dev->next_event <= now) { cpumask_set_cpu(cpu, tick_broadcast_force_mask); goto out; } /* * We got woken by something else. Reprogram * the cpu local timer device. */ tick_program_event(dev->next_event, 1); } } out: raw_spin_unlock(&tick_broadcast_lock); return ret; } static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, struct tick_device *td, int cpu) { struct clock_event_device *dev, *wd; dev = td->evtdev; if (td->mode != TICKDEV_MODE_ONESHOT) return -EINVAL; wd = tick_get_oneshot_wakeup_device(cpu); if (!wd) return -ENODEV; switch (state) { case TICK_BROADCAST_ENTER: clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(wd, dev->next_event, 1); break; case TICK_BROADCAST_EXIT: /* We may have transitioned to oneshot mode while idle */ if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) return -ENODEV; } return 0; } int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); int cpu = smp_processor_id(); if (!tick_oneshot_wakeup_control(state, td, cpu)) return 0; if (tick_broadcast_device.evtdev) return ___tick_broadcast_oneshot_control(state, td, cpu); /* * If there is no broadcast or wakeup device, tell the caller not * to go into deep idle. */ return -EBUSY; } /* * Reset the one shot broadcast for a cpu * * Called with tick_broadcast_lock held */ static void tick_broadcast_clear_oneshot(int cpu) { cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); } static void tick_broadcast_init_next_event(struct cpumask *mask, ktime_t expires) { struct tick_device *td; int cpu; for_each_cpu(cpu, mask) { td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev) td->evtdev->next_event = expires; } } static inline ktime_t tick_get_next_period(void) { ktime_t next; /* * Protect against concurrent updates (store /load tearing on * 32bit). It does not matter if the time is already in the * past. The broadcast device which is about to be programmed will * fire in any case. */ raw_spin_lock(&jiffies_lock); next = tick_next_period; raw_spin_unlock(&jiffies_lock); return next; } /** * tick_broadcast_setup_oneshot - setup the broadcast device * @bc: the broadcast device * @from_periodic: true if called from periodic mode */ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { int cpu = smp_processor_id(); ktime_t nexttick = 0; if (!bc) return; /* * When the broadcast device was switched to oneshot by the first * CPU handling the NOHZ change, the other CPUs will reach this * code via hrtimer_run_queues() -> tick_check_oneshot_change() * too. Set up the broadcast device only once! */ if (bc->event_handler == tick_handle_oneshot_broadcast) { /* * The CPU which switched from periodic to oneshot mode * set the broadcast oneshot bit for all other CPUs which * are in the general (periodic) broadcast mask to ensure * that CPUs which wait for the periodic broadcast are * woken up. * * Clear the bit for the local CPU as the set bit would * prevent the first tick_broadcast_enter() after this CPU * switched to oneshot state to program the broadcast * device. * * This code can also be reached via tick_broadcast_control(), * but this cannot avoid the tick_broadcast_clear_oneshot() * as that would break the periodic to oneshot transition of * secondary CPUs. But that's harmless as the below only * clears already cleared bits. */ tick_broadcast_clear_oneshot(cpu); return; } bc->event_handler = tick_handle_oneshot_broadcast; bc->next_event = KTIME_MAX; /* * When the tick mode is switched from periodic to oneshot it must * be ensured that CPUs which are waiting for periodic broadcast * get their wake-up at the next tick. This is achieved by ORing * tick_broadcast_mask into tick_broadcast_oneshot_mask. * * For other callers, e.g. broadcast device replacement, * tick_broadcast_oneshot_mask must not be touched as this would * set bits for CPUs which are already NOHZ, but not idle. Their * next tick_broadcast_enter() would observe the bit set and fail * to update the expiry time and the broadcast event device. */ if (from_periodic) { cpumask_copy(tmpmask, tick_broadcast_mask); /* Remove the local CPU as it is obviously not idle */ cpumask_clear_cpu(cpu, tmpmask); cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask); /* * Ensure that the oneshot broadcast handler will wake the * CPUs which are still waiting for periodic broadcast. */ nexttick = tick_get_next_period(); tick_broadcast_init_next_event(tmpmask, nexttick); /* * If the underlying broadcast clock event device is * already in oneshot state, then there is nothing to do. * The device was already armed for the next tick * in tick_handle_broadcast_periodic() */ if (clockevent_state_oneshot(bc)) return; } /* * When switching from periodic to oneshot mode arm the broadcast * device for the next tick. * * If the broadcast device has been replaced in oneshot mode and * the oneshot broadcast mask is not empty, then arm it to expire * immediately in order to reevaluate the next expiring timer. * @nexttick is 0 and therefore in the past which will cause the * clockevent code to force an event. * * For both cases the programming can be avoided when the oneshot * broadcast mask is empty. * * tick_broadcast_set_event() implicitly switches the broadcast * device to oneshot state. */ if (!cpumask_empty(tick_broadcast_oneshot_mask)) tick_broadcast_set_event(bc, cpu, nexttick); } /* * Select oneshot operating mode for the broadcast device */ void tick_broadcast_switch_to_oneshot(void) { struct clock_event_device *bc; enum tick_device_mode oldmode; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); oldmode = tick_broadcast_device.mode; tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; bc = tick_broadcast_device.evtdev; if (bc) tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC); raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { struct clock_event_device *bc; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc && broadcast_needs_cpu(bc, deadcpu)) { /* * If the broadcast force bit of the current CPU is set, * then the current CPU has not yet reprogrammed the local * timer device to avoid a ping-pong race. See * ___tick_broadcast_oneshot_control(). * * If the broadcast device is hrtimer based then * programming the broadcast event below does not have any * effect because the local clockevent device is not * running and not programmed because the broadcast event * is not earlier than the pending event of the local clock * event device. As a consequence all CPUs waiting for a * broadcast event are stuck forever. * * Detect this condition and reprogram the cpu local timer * device to avoid the starvation. */ if (tick_check_broadcast_expired()) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); tick_program_event(td->evtdev->next_event, 1); } /* This moves the broadcast assignment to this CPU: */ clockevents_program_event(bc, bc->next_event, 1); } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* * Remove a dying CPU from broadcasting */ static void tick_broadcast_oneshot_offline(unsigned int cpu) { if (tick_get_oneshot_wakeup_device(cpu)) tick_set_oneshot_wakeup_device(NULL, cpu); /* * Clear the broadcast masks for the dead cpu, but do not stop * the broadcast device! */ cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); cpumask_clear_cpu(cpu, tick_broadcast_force_mask); } #endif /* * Check, whether the broadcast device is in one shot mode */ int tick_broadcast_oneshot_active(void) { return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; } /* * Check whether the broadcast device supports oneshot. */ bool tick_broadcast_oneshot_available(void) { struct clock_event_device *bc = tick_broadcast_device.evtdev; return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; } #else int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc = tick_broadcast_device.evtdev; if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) return -EBUSY; return 0; } #endif void __init tick_broadcast_init(void) { zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); #ifdef CONFIG_TICK_ONESHOT zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); #endif }
91 432 23 20 16 5 5 5 31 2 1 8 1 5 33 34 31 7 34 8 8 7 2 48 48 430 431 432 432 426 2 431 3 432 431 432 2 2 432 432 432 18 17 1 1 18 425 427 1 1 427 427 18 425 427 297 297 297 296 412 413 407 297 297 1 1 5 1 5 5 5 1 1 4 5 91 91 432 432 430 432 430 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 // SPDX-License-Identifier: GPL-2.0-or-later /* audit.c -- Auditing support * Gateway between the kernel (e.g., selinux) and the user-space audit daemon. * System-call specific features have moved to auditsc.c * * Copyright 2003-2007 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * * Written by Rickard E. (Rik) Faith <faith@redhat.com> * * Goals: 1) Integrate fully with Security Modules. * 2) Minimal run-time overhead: * a) Minimal when syscall auditing is disabled (audit_enable=0). * b) Small when syscall auditing is enabled and no audit record * is generated (defer as much work as possible to record * generation time): * i) context is allocated, * ii) names from getname are stored without a copy, and * iii) inode information stored from path_lookup. * 3) Ability to disable syscall auditing at boot time (audit=0). * 4) Usable by other parts of the kernel (if audit_log* is called, * then a syscall record will be generated automatically for the * current syscall). * 5) Netlink interface to user-space. * 6) Support low-overhead kernel-based filtering to minimize the * information that must be passed to user-space. * * Audit userspace, documentation, tests, and bug/issue trackers: * https://github.com/linux-audit */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/file.h> #include <linux/init.h> #include <linux/types.h> #include <linux/atomic.h> #include <linux/mm.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/kthread.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/gfp.h> #include <linux/pid.h> #include <linux/audit.h> #include <net/sock.h> #include <net/netlink.h> #include <linux/skbuff.h> #include <linux/security.h> #include <linux/lsm_hooks.h> #include <linux/freezer.h> #include <linux/pid_namespace.h> #include <net/netns/generic.h> #include "audit.h" /* No auditing will take place until audit_initialized == AUDIT_INITIALIZED. * (Initialization happens after skb_init is called.) */ #define AUDIT_DISABLED -1 #define AUDIT_UNINITIALIZED 0 #define AUDIT_INITIALIZED 1 static int audit_initialized = AUDIT_UNINITIALIZED; u32 audit_enabled = AUDIT_OFF; bool audit_ever_enabled = !!AUDIT_OFF; EXPORT_SYMBOL_GPL(audit_enabled); /* Default state when kernel boots without any parameters. */ static u32 audit_default = AUDIT_OFF; /* If auditing cannot proceed, audit_failure selects what happens. */ static u32 audit_failure = AUDIT_FAIL_PRINTK; /* private audit network namespace index */ static unsigned int audit_net_id; /* Number of modules that provide a security context. List of lsms that provide a security context */ static u32 audit_subj_secctx_cnt; static u32 audit_obj_secctx_cnt; static const struct lsm_id *audit_subj_lsms[MAX_LSM_COUNT]; static const struct lsm_id *audit_obj_lsms[MAX_LSM_COUNT]; /** * struct audit_net - audit private network namespace data * @sk: communication socket */ struct audit_net { struct sock *sk; }; /** * struct auditd_connection - kernel/auditd connection state * @pid: auditd PID * @portid: netlink portid * @net: the associated network namespace * @rcu: RCU head * * Description: * This struct is RCU protected; you must either hold the RCU lock for reading * or the associated spinlock for writing. */ struct auditd_connection { struct pid *pid; u32 portid; struct net *net; struct rcu_head rcu; }; static struct auditd_connection __rcu *auditd_conn; static DEFINE_SPINLOCK(auditd_conn_lock); /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in * audit records being dropped. */ static u32 audit_rate_limit; /* Number of outstanding audit_buffers allowed. * When set to zero, this means unlimited. */ static u32 audit_backlog_limit = 64; #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; /* The identity of the user shutting down the audit system. */ static kuid_t audit_sig_uid = INVALID_UID; static pid_t audit_sig_pid = -1; static struct lsm_prop audit_sig_lsm; /* Records can be lost in several ways: 0) [suppressed in audit_alloc] 1) out of memory in audit_log_start [kmalloc of struct audit_buffer] 2) out of memory in audit_log_move [alloc_skb] 3) suppressed due to audit_rate_limit 4) suppressed due to audit_backlog_limit */ static atomic_t audit_lost = ATOMIC_INIT(0); /* Monotonically increasing sum of time the kernel has spent * waiting while the backlog limit is exceeded. */ static atomic_t audit_backlog_wait_time_actual = ATOMIC_INIT(0); /* Hash for inode-based rules */ struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; static struct kmem_cache *audit_buffer_cache; /* queue msgs to send via kauditd_task */ static struct sk_buff_head audit_queue; /* queue msgs due to temporary unicast send problems */ static struct sk_buff_head audit_retry_queue; /* queue msgs waiting for new auditd connection */ static struct sk_buff_head audit_hold_queue; /* queue servicing thread */ static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); /* waitqueue for callers who are blocked on the audit backlog */ static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, .mask = -1, .features = 0, .lock = 0,}; static char *audit_feature_names[2] = { "only_unset_loginuid", "loginuid_immutable", }; /** * struct audit_ctl_mutex - serialize requests from userspace * @lock: the mutex used for locking * @owner: the task which owns the lock * * Description: * This is the lock struct used to ensure we only process userspace requests * in an orderly fashion. We can't simply use a mutex/lock here because we * need to track lock ownership so we don't end up blocking the lock owner in * audit_log_start() or similar. */ static struct audit_ctl_mutex { struct mutex lock; void *owner; } audit_cmd_mutex; /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting * audit records. Since printk uses a 1024 byte buffer, this buffer * should be at least that large. */ #define AUDIT_BUFSIZ 1024 /* The audit_buffer is used when formatting an audit record. The caller * locks briefly to get the record off the freelist or to allocate the * buffer, and locks briefly to send the buffer to the netlink layer or * to place it on a transmit queue. Multiple audit_buffers can be in * use simultaneously. */ struct audit_buffer { struct sk_buff *skb; /* the skb for audit_log functions */ struct sk_buff_head skb_list; /* formatted skbs, ready to send */ struct audit_context *ctx; /* NULL or associated context */ struct audit_stamp stamp; /* audit stamp for these records */ gfp_t gfp_mask; }; struct audit_reply { __u32 portid; struct net *net; struct sk_buff *skb; }; /** * auditd_test_task - Check to see if a given task is an audit daemon * @task: the task to check * * Description: * Return 1 if the task is a registered audit daemon, 0 otherwise. */ int auditd_test_task(struct task_struct *task) { int rc; struct auditd_connection *ac; rcu_read_lock(); ac = rcu_dereference(auditd_conn); rc = (ac && ac->pid == task_tgid(task) ? 1 : 0); rcu_read_unlock(); return rc; } /** * audit_ctl_lock - Take the audit control lock */ void audit_ctl_lock(void) { mutex_lock(&audit_cmd_mutex.lock); audit_cmd_mutex.owner = current; } /** * audit_ctl_unlock - Drop the audit control lock */ void audit_ctl_unlock(void) { audit_cmd_mutex.owner = NULL; mutex_unlock(&audit_cmd_mutex.lock); } /** * audit_ctl_owner_current - Test to see if the current task owns the lock * * Description: * Return true if the current task owns the audit control lock, false if it * doesn't own the lock. */ static bool audit_ctl_owner_current(void) { return (current == audit_cmd_mutex.owner); } /** * auditd_pid_vnr - Return the auditd PID relative to the namespace * * Description: * Returns the PID in relation to the namespace, 0 on failure. */ static pid_t auditd_pid_vnr(void) { pid_t pid; const struct auditd_connection *ac; rcu_read_lock(); ac = rcu_dereference(auditd_conn); if (!ac || !ac->pid) pid = 0; else pid = pid_vnr(ac->pid); rcu_read_unlock(); return pid; } /** * audit_cfg_lsm - Identify a security module as providing a secctx. * @lsmid: LSM identity * @flags: which contexts are provided * * Description: * Increments the count of the security modules providing a secctx. * If the LSM id is already in the list leave it alone. */ void audit_cfg_lsm(const struct lsm_id *lsmid, int flags) { int i; if (flags & AUDIT_CFG_LSM_SECCTX_SUBJECT) { for (i = 0 ; i < audit_subj_secctx_cnt; i++) if (audit_subj_lsms[i] == lsmid) return; audit_subj_lsms[audit_subj_secctx_cnt++] = lsmid; } if (flags & AUDIT_CFG_LSM_SECCTX_OBJECT) { for (i = 0 ; i < audit_obj_secctx_cnt; i++) if (audit_obj_lsms[i] == lsmid) return; audit_obj_lsms[audit_obj_secctx_cnt++] = lsmid; } } /** * audit_get_sk - Return the audit socket for the given network namespace * @net: the destination network namespace * * Description: * Returns the sock pointer if valid, NULL otherwise. The caller must ensure * that a reference is held for the network namespace while the sock is in use. */ static struct sock *audit_get_sk(const struct net *net) { struct audit_net *aunet; if (!net) return NULL; aunet = net_generic(net, audit_net_id); return aunet->sk; } void audit_panic(const char *message) { switch (audit_failure) { case AUDIT_FAIL_SILENT: break; case AUDIT_FAIL_PRINTK: if (printk_ratelimit()) pr_err("%s\n", message); break; case AUDIT_FAIL_PANIC: panic("audit: %s\n", message); break; } } static inline int audit_rate_check(void) { static unsigned long last_check = 0; static int messages = 0; static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned long now; int retval = 0; if (!audit_rate_limit) return 1; spin_lock_irqsave(&lock, flags); if (++messages < audit_rate_limit) { retval = 1; } else { now = jiffies; if (time_after(now, last_check + HZ)) { last_check = now; messages = 0; retval = 1; } } spin_unlock_irqrestore(&lock, flags); return retval; } /** * audit_log_lost - conditionally log lost audit message event * @message: the message stating reason for lost audit message * * Emit at least 1 message per second, even if audit_rate_check is * throttling. * Always increment the lost messages counter. */ void audit_log_lost(const char *message) { static unsigned long last_msg = 0; static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned long now; int print; atomic_inc(&audit_lost); print = (audit_failure == AUDIT_FAIL_PANIC || !audit_rate_limit); if (!print) { spin_lock_irqsave(&lock, flags); now = jiffies; if (time_after(now, last_msg + HZ)) { print = 1; last_msg = now; } spin_unlock_irqrestore(&lock, flags); } if (print) { if (printk_ratelimit()) pr_warn("audit_lost=%u audit_rate_limit=%u audit_backlog_limit=%u\n", atomic_read(&audit_lost), audit_rate_limit, audit_backlog_limit); audit_panic(message); } } static int audit_log_config_change(char *function_name, u32 new, u32 old, int allow_changes) { struct audit_buffer *ab; int rc = 0; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return rc; audit_log_format(ab, "op=set %s=%u old=%u ", function_name, new, old); audit_log_session_info(ab); rc = audit_log_task_context(ab); if (rc) allow_changes = 0; /* Something weird, deny request */ audit_log_format(ab, " res=%d", allow_changes); audit_log_end(ab); return rc; } static int audit_do_config_change(char *function_name, u32 *to_change, u32 new) { int allow_changes, rc = 0; u32 old = *to_change; /* check if we are locked */ if (audit_enabled == AUDIT_LOCKED) allow_changes = 0; else allow_changes = 1; if (audit_enabled != AUDIT_OFF) { rc = audit_log_config_change(function_name, new, old, allow_changes); if (rc) allow_changes = 0; } /* If we are allowed, make the change */ if (allow_changes == 1) *to_change = new; /* Not allowed, update reason */ else if (rc == 0) rc = -EPERM; return rc; } static int audit_set_rate_limit(u32 limit) { return audit_do_config_change("audit_rate_limit", &audit_rate_limit, limit); } static int audit_set_backlog_limit(u32 limit) { return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, limit); } static int audit_set_backlog_wait_time(u32 timeout) { return audit_do_config_change("audit_backlog_wait_time", &audit_backlog_wait_time, timeout); } static int audit_set_enabled(u32 state) { int rc; if (state > AUDIT_LOCKED) return -EINVAL; rc = audit_do_config_change("audit_enabled", &audit_enabled, state); if (!rc) audit_ever_enabled |= !!state; return rc; } static int audit_set_failure(u32 state) { if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK && state != AUDIT_FAIL_PANIC) return -EINVAL; return audit_do_config_change("audit_failure", &audit_failure, state); } /** * auditd_conn_free - RCU helper to release an auditd connection struct * @rcu: RCU head * * Description: * Drop any references inside the auditd connection tracking struct and free * the memory. */ static void auditd_conn_free(struct rcu_head *rcu) { struct auditd_connection *ac; ac = container_of(rcu, struct auditd_connection, rcu); put_pid(ac->pid); put_net(ac->net); kfree(ac); } /** * auditd_set - Set/Reset the auditd connection state * @pid: auditd PID * @portid: auditd netlink portid * @net: auditd network namespace pointer * @skb: the netlink command from the audit daemon * @ack: netlink ack flag, cleared if ack'd here * * Description: * This function will obtain and drop network namespace references as * necessary. Returns zero on success, negative values on failure. */ static int auditd_set(struct pid *pid, u32 portid, struct net *net, struct sk_buff *skb, bool *ack) { unsigned long flags; struct auditd_connection *ac_old, *ac_new; struct nlmsghdr *nlh; if (!pid || !net) return -EINVAL; ac_new = kzalloc(sizeof(*ac_new), GFP_KERNEL); if (!ac_new) return -ENOMEM; ac_new->pid = get_pid(pid); ac_new->portid = portid; ac_new->net = get_net(net); /* send the ack now to avoid a race with the queue backlog */ if (*ack) { nlh = nlmsg_hdr(skb); netlink_ack(skb, nlh, 0, NULL); *ack = false; } spin_lock_irqsave(&auditd_conn_lock, flags); ac_old = rcu_dereference_protected(auditd_conn, lockdep_is_held(&auditd_conn_lock)); rcu_assign_pointer(auditd_conn, ac_new); spin_unlock_irqrestore(&auditd_conn_lock, flags); if (ac_old) call_rcu(&ac_old->rcu, auditd_conn_free); return 0; } /** * kauditd_printk_skb - Print the audit record to the ring buffer * @skb: audit record * * Whatever the reason, this packet may not make it to the auditd connection * so write it via printk so the information isn't completely lost. */ static void kauditd_printk_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); char *data = nlmsg_data(nlh); if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit()) pr_notice("type=%d %s\n", nlh->nlmsg_type, data); } /** * kauditd_rehold_skb - Handle a audit record send failure in the hold queue * @skb: audit record * @error: error code (unused) * * Description: * This should only be used by the kauditd_thread when it fails to flush the * hold queue. */ static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) { /* put the record back in the queue */ skb_queue_tail(&audit_hold_queue, skb); } /** * kauditd_hold_skb - Queue an audit record, waiting for auditd * @skb: audit record * @error: error code * * Description: * Queue the audit record, waiting for an instance of auditd. When this * function is called we haven't given up yet on sending the record, but things * are not looking good. The first thing we want to do is try to write the * record via printk and then see if we want to try and hold on to the record * and queue it, if we have room. If we want to hold on to the record, but we * don't have room, record a record lost message. */ static void kauditd_hold_skb(struct sk_buff *skb, int error) { /* at this point it is uncertain if we will ever send this to auditd so * try to send the message via printk before we go any further */ kauditd_printk_skb(skb); /* can we just silently drop the message? */ if (!audit_default) goto drop; /* the hold queue is only for when the daemon goes away completely, * not -EAGAIN failures; if we are in a -EAGAIN state requeue the * record on the retry queue unless it's full, in which case drop it */ if (error == -EAGAIN) { if (!audit_backlog_limit || skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { skb_queue_tail(&audit_retry_queue, skb); return; } audit_log_lost("kauditd retry queue overflow"); goto drop; } /* if we have room in the hold queue, queue the message */ if (!audit_backlog_limit || skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { skb_queue_tail(&audit_hold_queue, skb); return; } /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); drop: kfree_skb(skb); } /** * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd * @skb: audit record * @error: error code (unused) * * Description: * Not as serious as kauditd_hold_skb() as we still have a connected auditd, * but for some reason we are having problems sending it audit records so * queue the given record and attempt to resend. */ static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) { if (!audit_backlog_limit || skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { skb_queue_tail(&audit_retry_queue, skb); return; } /* we have to drop the record, send it via printk as a last effort */ kauditd_printk_skb(skb); audit_log_lost("kauditd retry queue overflow"); kfree_skb(skb); } /** * auditd_reset - Disconnect the auditd connection * @ac: auditd connection state * * Description: * Break the auditd/kauditd connection and move all the queued records into the * hold queue in case auditd reconnects. It is important to note that the @ac * pointer should never be dereferenced inside this function as it may be NULL * or invalid, you can only compare the memory address! If @ac is NULL then * the connection will always be reset. */ static void auditd_reset(const struct auditd_connection *ac) { unsigned long flags; struct sk_buff *skb; struct auditd_connection *ac_old; /* if it isn't already broken, break the connection */ spin_lock_irqsave(&auditd_conn_lock, flags); ac_old = rcu_dereference_protected(auditd_conn, lockdep_is_held(&auditd_conn_lock)); if (ac && ac != ac_old) { /* someone already registered a new auditd connection */ spin_unlock_irqrestore(&auditd_conn_lock, flags); return; } rcu_assign_pointer(auditd_conn, NULL); spin_unlock_irqrestore(&auditd_conn_lock, flags); if (ac_old) call_rcu(&ac_old->rcu, auditd_conn_free); /* flush the retry queue to the hold queue, but don't touch the main * queue since we need to process that normally for multicast */ while ((skb = skb_dequeue(&audit_retry_queue))) kauditd_hold_skb(skb, -ECONNREFUSED); } /** * auditd_send_unicast_skb - Send a record via unicast to auditd * @skb: audit record * * Description: * Send a skb to the audit daemon, returns positive/zero values on success and * negative values on failure; in all cases the skb will be consumed by this * function. If the send results in -ECONNREFUSED the connection with auditd * will be reset. This function may sleep so callers should not hold any locks * where this would cause a problem. */ static int auditd_send_unicast_skb(struct sk_buff *skb) { int rc; u32 portid; struct net *net; struct sock *sk; struct auditd_connection *ac; /* NOTE: we can't call netlink_unicast while in the RCU section so * take a reference to the network namespace and grab local * copies of the namespace, the sock, and the portid; the * namespace and sock aren't going to go away while we hold a * reference and if the portid does become invalid after the RCU * section netlink_unicast() should safely return an error */ rcu_read_lock(); ac = rcu_dereference(auditd_conn); if (!ac) { rcu_read_unlock(); kfree_skb(skb); rc = -ECONNREFUSED; goto err; } net = get_net(ac->net); sk = audit_get_sk(net); portid = ac->portid; rcu_read_unlock(); rc = netlink_unicast(sk, skb, portid, 0); put_net(net); if (rc < 0) goto err; return rc; err: if (ac && rc == -ECONNREFUSED) auditd_reset(ac); return rc; } /** * kauditd_send_queue - Helper for kauditd_thread to flush skb queues * @sk: the sending sock * @portid: the netlink destination * @queue: the skb queue to process * @retry_limit: limit on number of netlink unicast failures * @skb_hook: per-skb hook for additional processing * @err_hook: hook called if the skb fails the netlink unicast send * * Description: * Run through the given queue and attempt to send the audit records to auditd, * returns zero on success, negative values on failure. It is up to the caller * to ensure that the @sk is valid for the duration of this function. * */ static int kauditd_send_queue(struct sock *sk, u32 portid, struct sk_buff_head *queue, unsigned int retry_limit, void (*skb_hook)(struct sk_buff *skb), void (*err_hook)(struct sk_buff *skb, int error)) { int rc = 0; struct sk_buff *skb = NULL; struct sk_buff *skb_tail; unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ skb_tail = skb_peek_tail(queue); while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { /* call the skb_hook for each skb we touch */ if (skb_hook) (*skb_hook)(skb); /* can we send to anyone via unicast? */ if (!sk) { if (err_hook) (*err_hook)(skb, -ECONNREFUSED); continue; } retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { sk = NULL; if (err_hook) (*err_hook)(skb, rc); if (rc == -EAGAIN) rc = 0; /* continue to drain the queue */ continue; } else goto retry; } else { /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } } return (rc >= 0 ? 0 : rc); } /* * kauditd_send_multicast_skb - Send a record to any multicast listeners * @skb: audit record * * Description: * Write a multicast message to anyone listening in the initial network * namespace. This function doesn't consume an skb as might be expected since * it has to copy it anyways. */ static void kauditd_send_multicast_skb(struct sk_buff *skb) { struct sk_buff *copy; struct sock *sock = audit_get_sk(&init_net); struct nlmsghdr *nlh; /* NOTE: we are not taking an additional reference for init_net since * we don't have to worry about it going away */ if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) return; /* * The seemingly wasteful skb_copy() rather than bumping the refcount * using skb_get() is necessary because non-standard mods are made to * the skb by the original kaudit unicast socket send routine. The * existing auditd daemon assumes this breakage. Fixing this would * require co-ordinating a change in the established protocol between * the kaudit kernel subsystem and the auditd userspace code. There is * no reason for new multicast clients to continue with this * non-compliance. */ copy = skb_copy(skb, GFP_KERNEL); if (!copy) return; nlh = nlmsg_hdr(copy); nlh->nlmsg_len = skb->len; nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); } /** * kauditd_thread - Worker thread to send audit records to userspace * @dummy: unused */ static int kauditd_thread(void *dummy) { int rc; u32 portid = 0; struct net *net = NULL; struct sock *sk = NULL; struct auditd_connection *ac; #define UNICAST_RETRIES 5 set_freezable(); while (!kthread_should_stop()) { /* NOTE: see the lock comments in auditd_send_unicast_skb() */ rcu_read_lock(); ac = rcu_dereference(auditd_conn); if (!ac) { rcu_read_unlock(); goto main_queue; } net = get_net(ac->net); sk = audit_get_sk(net); portid = ac->portid; rcu_read_unlock(); /* attempt to flush the hold queue */ rc = kauditd_send_queue(sk, portid, &audit_hold_queue, UNICAST_RETRIES, NULL, kauditd_rehold_skb); if (rc < 0) { sk = NULL; auditd_reset(ac); goto main_queue; } /* attempt to flush the retry queue */ rc = kauditd_send_queue(sk, portid, &audit_retry_queue, UNICAST_RETRIES, NULL, kauditd_hold_skb); if (rc < 0) { sk = NULL; auditd_reset(ac); goto main_queue; } main_queue: /* process the main queue - do the multicast send and attempt * unicast, dump failed record sends to the retry queue; if * sk == NULL due to previous failures we will just do the * multicast send and move the record to the hold queue */ rc = kauditd_send_queue(sk, portid, &audit_queue, 1, kauditd_send_multicast_skb, (sk ? kauditd_retry_skb : kauditd_hold_skb)); if (ac && rc < 0) auditd_reset(ac); sk = NULL; /* drop our netns reference, no auditd sends past this line */ if (net) { put_net(net); net = NULL; } /* we have processed all the queues so wake everyone */ wake_up(&audit_backlog_wait); /* NOTE: we want to wake up if there is anything on the queue, * regardless of if an auditd is connected, as we need to * do the multicast send and rotate records from the * main queue to the retry/hold queues */ wait_event_freezable(kauditd_wait, (skb_queue_len(&audit_queue) ? 1 : 0)); } return 0; } int audit_send_list_thread(void *_dest) { struct audit_netlink_list *dest = _dest; struct sk_buff *skb; struct sock *sk = audit_get_sk(dest->net); /* wait for parent to finish and send an ACK */ audit_ctl_lock(); audit_ctl_unlock(); while ((skb = __skb_dequeue(&dest->q)) != NULL) netlink_unicast(sk, skb, dest->portid, 0); put_net(dest->net); kfree(dest); return 0; } struct sk_buff *audit_make_reply(int seq, int type, int done, int multi, const void *payload, int size) { struct sk_buff *skb; struct nlmsghdr *nlh; void *data; int flags = multi ? NLM_F_MULTI : 0; int t = done ? NLMSG_DONE : type; skb = nlmsg_new(size, GFP_KERNEL); if (!skb) return NULL; nlh = nlmsg_put(skb, 0, seq, t, size, flags); if (!nlh) goto out_kfree_skb; data = nlmsg_data(nlh); memcpy(data, payload, size); return skb; out_kfree_skb: kfree_skb(skb); return NULL; } static void audit_free_reply(struct audit_reply *reply) { if (!reply) return; kfree_skb(reply->skb); if (reply->net) put_net(reply->net); kfree(reply); } static int audit_send_reply_thread(void *arg) { struct audit_reply *reply = (struct audit_reply *)arg; audit_ctl_lock(); audit_ctl_unlock(); /* Ignore failure. It'll only happen if the sender goes away, because our timeout is set to infinite. */ netlink_unicast(audit_get_sk(reply->net), reply->skb, reply->portid, 0); reply->skb = NULL; audit_free_reply(reply); return 0; } /** * audit_send_reply - send an audit reply message via netlink * @request_skb: skb of request we are replying to (used to target the reply) * @seq: sequence number * @type: audit message type * @done: done (last) flag * @multi: multi-part message flag * @payload: payload data * @size: payload size * * Allocates a skb, builds the netlink message, and sends it to the port id. */ static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done, int multi, const void *payload, int size) { struct task_struct *tsk; struct audit_reply *reply; reply = kzalloc(sizeof(*reply), GFP_KERNEL); if (!reply) return; reply->skb = audit_make_reply(seq, type, done, multi, payload, size); if (!reply->skb) goto err; reply->net = get_net(sock_net(NETLINK_CB(request_skb).sk)); reply->portid = NETLINK_CB(request_skb).portid; tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); if (IS_ERR(tsk)) goto err; return; err: audit_free_reply(reply); } /* * Check for appropriate CAP_AUDIT_ capabilities on incoming audit * control messages. */ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) { int err = 0; /* Only support initial user namespace for now. */ /* * We return ECONNREFUSED because it tricks userspace into thinking * that audit was not configured into the kernel. Lots of users * configure their PAM stack (because that's what the distro does) * to reject login if unable to send messages to audit. If we return * ECONNREFUSED the PAM stack thinks the kernel does not have audit * configured in and will let login proceed. If we return EPERM * userspace will reject all logins. This should be removed when we * support non init namespaces!! */ if (current_user_ns() != &init_user_ns) return -ECONNREFUSED; switch (msg_type) { case AUDIT_LIST: case AUDIT_ADD: case AUDIT_DEL: return -EOPNOTSUPP; case AUDIT_GET: case AUDIT_SET: case AUDIT_GET_FEATURE: case AUDIT_SET_FEATURE: case AUDIT_LIST_RULES: case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: case AUDIT_SIGNAL_INFO: case AUDIT_TTY_GET: case AUDIT_TTY_SET: case AUDIT_TRIM: case AUDIT_MAKE_EQUIV: /* Only support auditd and auditctl in initial pid namespace * for now. */ if (task_active_pid_ns(current) != &init_pid_ns) return -EPERM; if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) err = -EPERM; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: if (!netlink_capable(skb, CAP_AUDIT_WRITE)) err = -EPERM; break; default: /* bad msg */ err = -EINVAL; } return err; } static void audit_log_common_recv_msg(struct audit_context *context, struct audit_buffer **ab, u16 msg_type) { uid_t uid = from_kuid(&init_user_ns, current_uid()); pid_t pid = task_tgid_nr(current); if (!audit_enabled && msg_type != AUDIT_USER_AVC) { *ab = NULL; return; } *ab = audit_log_start(context, GFP_KERNEL, msg_type); if (unlikely(!*ab)) return; audit_log_format(*ab, "pid=%d uid=%u ", pid, uid); audit_log_session_info(*ab); audit_log_task_context(*ab); } static inline void audit_log_user_recv_msg(struct audit_buffer **ab, u16 msg_type) { audit_log_common_recv_msg(NULL, ab, msg_type); } static int is_audit_feature_set(int i) { return af.features & AUDIT_FEATURE_TO_MASK(i); } static int audit_get_feature(struct sk_buff *skb) { u32 seq; seq = nlmsg_hdr(skb)->nlmsg_seq; audit_send_reply(skb, seq, AUDIT_GET_FEATURE, 0, 0, &af, sizeof(af)); return 0; } static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature, u32 old_lock, u32 new_lock, int res) { struct audit_buffer *ab; if (audit_enabled == AUDIT_OFF) return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_FEATURE_CHANGE); if (!ab) return; audit_log_task_info(ab); audit_log_format(ab, " feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", audit_feature_names[which], !!old_feature, !!new_feature, !!old_lock, !!new_lock, res); audit_log_end(ab); } static int audit_set_feature(struct audit_features *uaf) { int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); /* if there is ever a version 2 we should handle that here */ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { u32 feature = AUDIT_FEATURE_TO_MASK(i); u32 old_feature, new_feature, old_lock, new_lock; /* if we are not changing this feature, move along */ if (!(feature & uaf->mask)) continue; old_feature = af.features & feature; new_feature = uaf->features & feature; new_lock = (uaf->lock | af.lock) & feature; old_lock = af.lock & feature; /* are we changing a locked feature? */ if (old_lock && (new_feature != old_feature)) { audit_log_feature_change(i, old_feature, new_feature, old_lock, new_lock, 0); return -EPERM; } } /* nothing invalid, do the changes */ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { u32 feature = AUDIT_FEATURE_TO_MASK(i); u32 old_feature, new_feature, old_lock, new_lock; /* if we are not changing this feature, move along */ if (!(feature & uaf->mask)) continue; old_feature = af.features & feature; new_feature = uaf->features & feature; old_lock = af.lock & feature; new_lock = (uaf->lock | af.lock) & feature; if (new_feature != old_feature) audit_log_feature_change(i, old_feature, new_feature, old_lock, new_lock, 1); if (new_feature) af.features |= feature; else af.features &= ~feature; af.lock |= new_lock; } return 0; } static int audit_replace(struct pid *pid) { pid_t pvnr; struct sk_buff *skb; pvnr = pid_vnr(pid); skb = audit_make_reply(0, AUDIT_REPLACE, 0, 0, &pvnr, sizeof(pvnr)); if (!skb) return -ENOMEM; return auditd_send_unicast_skb(skb); } static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, bool *ack) { u32 seq; void *data; int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; struct audit_sig_info *sig_data; struct lsm_context lsmctx = { NULL, 0, 0 }; err = audit_netlink_ok(skb, msg_type); if (err) return err; seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { struct audit_status s; memset(&s, 0, sizeof(s)); s.enabled = audit_enabled; s.failure = audit_failure; /* NOTE: use pid_vnr() so the PID is relative to the current * namespace */ s.pid = auditd_pid_vnr(); s.rate_limit = audit_rate_limit; s.backlog_limit = audit_backlog_limit; s.lost = atomic_read(&audit_lost); s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; s.backlog_wait_time = audit_backlog_wait_time; s.backlog_wait_time_actual = atomic_read(&audit_backlog_wait_time_actual); audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_SET: { struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_FAILURE) { err = audit_set_failure(s.failure); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_PID) { /* NOTE: we are using the vnr PID functions below * because the s.pid value is relative to the * namespace of the caller; at present this * doesn't matter much since you can really only * run auditd from the initial pid namespace, but * something to keep in mind if this changes */ pid_t new_pid = s.pid; pid_t auditd_pid; struct pid *req_pid = task_tgid(current); /* Sanity check - PID values must match. Setting * pid to 0 is how auditd ends auditing. */ if (new_pid && (new_pid != pid_vnr(req_pid))) return -EINVAL; /* test the auditd connection */ audit_replace(req_pid); auditd_pid = auditd_pid_vnr(); if (auditd_pid) { /* replacing a healthy auditd is not allowed */ if (new_pid) { audit_log_config_change("audit_pid", new_pid, auditd_pid, 0); return -EEXIST; } /* only current auditd can unregister itself */ if (pid_vnr(req_pid) != auditd_pid) { audit_log_config_change("audit_pid", new_pid, auditd_pid, 0); return -EACCES; } } if (new_pid) { /* register a new auditd connection */ err = auditd_set(req_pid, NETLINK_CB(skb).portid, sock_net(NETLINK_CB(skb).sk), skb, ack); if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, auditd_pid, err ? 0 : 1); if (err) return err; /* try to process any backlog */ wake_up_interruptible(&kauditd_wait); } else { if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, auditd_pid, 1); /* unregister the auditd connection */ auditd_reset(NULL); } } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(s.rate_limit); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_BACKLOG_LIMIT) { err = audit_set_backlog_limit(s.backlog_limit); if (err < 0) return err; } if (s.mask & AUDIT_STATUS_BACKLOG_WAIT_TIME) { if (sizeof(s) > (size_t)nlh->nlmsg_len) return -EINVAL; if (s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME) return -EINVAL; err = audit_set_backlog_wait_time(s.backlog_wait_time); if (err < 0) return err; } if (s.mask == AUDIT_STATUS_LOST) { u32 lost = atomic_xchg(&audit_lost, 0); audit_log_config_change("lost", 0, lost, 1); return lost; } if (s.mask == AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL) { u32 actual = atomic_xchg(&audit_backlog_wait_time_actual, 0); audit_log_config_change("backlog_wait_time_actual", 0, actual, 1); return actual; } break; } case AUDIT_GET_FEATURE: err = audit_get_feature(skb); if (err) return err; break; case AUDIT_SET_FEATURE: if (data_len < sizeof(struct audit_features)) return -EINVAL; err = audit_set_feature(data); if (err) return err; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; /* exit early if there isn't at least one character to print */ if (data_len < 2) return -EINVAL; err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ char *str = data; err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); if (err) break; } audit_log_user_recv_msg(&ab, msg_type); if (msg_type != AUDIT_USER_TTY) { /* ensure NULL termination */ str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, str); } else { audit_log_format(ab, " data="); if (str[data_len - 1] == '\0') data_len--; audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=%s audit_enabled=%d res=0", msg_type == AUDIT_ADD_RULE ? "add_rule" : "remove_rule", audit_enabled); audit_log_end(ab); return -EPERM; } err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); break; case AUDIT_TRIM: audit_trim_trees(); audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=trim res=1"); audit_log_end(ab); break; case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; size_t msglen = data_len; char *old, *new; err = -EINVAL; if (msglen < 2 * sizeof(u32)) break; memcpy(sizes, bufp, 2 * sizeof(u32)); bufp += 2 * sizeof(u32); msglen -= 2 * sizeof(u32); old = audit_unpack_string(&bufp, &msglen, sizes[0]); if (IS_ERR(old)) { err = PTR_ERR(old); break; } new = audit_unpack_string(&bufp, &msglen, sizes[1]); if (IS_ERR(new)) { err = PTR_ERR(new); kfree(old); break; } /* OK, here comes... */ err = audit_tag_tree(old, new); audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=make_equiv old="); audit_log_untrustedstring(ab, old); audit_log_format(ab, " new="); audit_log_untrustedstring(ab, new); audit_log_format(ab, " res=%d", !err); audit_log_end(ab); kfree(old); kfree(new); break; } case AUDIT_SIGNAL_INFO: if (lsmprop_is_set(&audit_sig_lsm)) { err = security_lsmprop_to_secctx(&audit_sig_lsm, &lsmctx, LSM_ID_UNDEF); if (err < 0) return err; } sig_data = kmalloc(struct_size(sig_data, ctx, lsmctx.len), GFP_KERNEL); if (!sig_data) { if (lsmprop_is_set(&audit_sig_lsm)) security_release_secctx(&lsmctx); return -ENOMEM; } sig_data->uid = from_kuid(&init_user_ns, audit_sig_uid); sig_data->pid = audit_sig_pid; if (lsmprop_is_set(&audit_sig_lsm)) { memcpy(sig_data->ctx, lsmctx.context, lsmctx.len); security_release_secctx(&lsmctx); } audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, struct_size(sig_data, ctx, lsmctx.len)); kfree(sig_data); break; case AUDIT_TTY_GET: { struct audit_tty_status s; unsigned int t; t = READ_ONCE(current->signal->audit_tty); s.enabled = t & AUDIT_TTY_ENABLE; s.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD); audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_TTY_SET: { struct audit_tty_status s, old; struct audit_buffer *ab; unsigned int t; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) err = -EINVAL; if (err) t = READ_ONCE(current->signal->audit_tty); else { t = s.enabled | (-s.log_passwd & AUDIT_TTY_LOG_PASSWD); t = xchg(&current->signal->audit_tty, t); } old.enabled = t & AUDIT_TTY_ENABLE; old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD); audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d" " old-log_passwd=%d new-log_passwd=%d res=%d", old.enabled, s.enabled, old.log_passwd, s.log_passwd, !err); audit_log_end(ab); break; } default: err = -EINVAL; break; } return err < 0 ? err : 0; } /** * audit_receive - receive messages from a netlink control socket * @skb: the message buffer * * Parse the provided skb and deal with any messages that may be present, * malformed skbs are discarded. */ static void audit_receive(struct sk_buff *skb) { struct nlmsghdr *nlh; bool ack; /* * len MUST be signed for nlmsg_next to be able to dec it below 0 * if the nlmsg_len was not aligned */ int len; int err; nlh = nlmsg_hdr(skb); len = skb->len; audit_ctl_lock(); while (nlmsg_ok(nlh, len)) { ack = nlh->nlmsg_flags & NLM_F_ACK; err = audit_receive_msg(skb, nlh, &ack); /* send an ack if the user asked for one and audit_receive_msg * didn't already do it, or if there was an error. */ if (ack || err) netlink_ack(skb, nlh, err, NULL); nlh = nlmsg_next(nlh, &len); } audit_ctl_unlock(); /* can't block with the ctrl lock, so penalize the sender now */ if (audit_backlog_limit && (skb_queue_len(&audit_queue) > audit_backlog_limit)) { DECLARE_WAITQUEUE(wait, current); /* wake kauditd to try and flush the queue */ wake_up_interruptible(&kauditd_wait); add_wait_queue_exclusive(&audit_backlog_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(audit_backlog_wait_time); remove_wait_queue(&audit_backlog_wait, &wait); } } /* Log information about who is connecting to the audit multicast socket */ static void audit_log_multicast(int group, const char *op, int err) { const struct cred *cred; struct tty_struct *tty; char comm[sizeof(current->comm)]; struct audit_buffer *ab; if (!audit_enabled) return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_EVENT_LISTENER); if (!ab) return; cred = current_cred(); tty = audit_get_tty(); audit_log_format(ab, "pid=%u uid=%u auid=%u tty=%s ses=%u", task_tgid_nr(current), from_kuid(&init_user_ns, cred->uid), from_kuid(&init_user_ns, audit_get_loginuid(current)), tty ? tty_name(tty) : "(none)", audit_get_sessionid(current)); audit_put_tty(tty); audit_log_task_context(ab); /* subj= */ audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, current)); audit_log_d_path_exe(ab, current->mm); /* exe= */ audit_log_format(ab, " nl-mcgrp=%d op=%s res=%d", group, op, !err); audit_log_end(ab); } /* Run custom bind function on netlink socket group connect or bind requests. */ static int audit_multicast_bind(struct net *net, int group) { int err = 0; if (!capable(CAP_AUDIT_READ)) err = -EPERM; audit_log_multicast(group, "connect", err); return err; } static void audit_multicast_unbind(struct net *net, int group) { audit_log_multicast(group, "disconnect", 0); } static int __net_init audit_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = audit_receive, .bind = audit_multicast_bind, .unbind = audit_multicast_unbind, .flags = NL_CFG_F_NONROOT_RECV, .groups = AUDIT_NLGRP_MAX, }; struct audit_net *aunet = net_generic(net, audit_net_id); aunet->sk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg); if (aunet->sk == NULL) { audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } /* limit the timeout in case auditd is blocked/stopped */ aunet->sk->sk_sndtimeo = HZ / 10; return 0; } static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); /* NOTE: you would think that we would want to check the auditd * connection and potentially reset it here if it lives in this * namespace, but since the auditd connection tracking struct holds a * reference to this namespace (see auditd_set()) we are only ever * going to get here after that connection has been released */ netlink_kernel_release(aunet->sk); } static struct pernet_operations audit_net_ops __net_initdata = { .init = audit_net_init, .exit = audit_net_exit, .id = &audit_net_id, .size = sizeof(struct audit_net), }; /* Initialize audit support at boot time. */ static int __init audit_init(void) { int i; if (audit_initialized == AUDIT_DISABLED) return 0; audit_buffer_cache = KMEM_CACHE(audit_buffer, SLAB_PANIC); skb_queue_head_init(&audit_queue); skb_queue_head_init(&audit_retry_queue); skb_queue_head_init(&audit_hold_queue); for (i = 0; i < AUDIT_INODE_BUCKETS; i++) INIT_LIST_HEAD(&audit_inode_hash[i]); mutex_init(&audit_cmd_mutex.lock); audit_cmd_mutex.owner = NULL; pr_info("initializing netlink subsys (%s)\n", str_enabled_disabled(audit_default)); register_pernet_subsys(&audit_net_ops); audit_initialized = AUDIT_INITIALIZED; kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); if (IS_ERR(kauditd_task)) { int err = PTR_ERR(kauditd_task); panic("audit: failed to start the kauditd thread (%d)\n", err); } audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "state=initialized audit_enabled=%u res=1", audit_enabled); return 0; } postcore_initcall(audit_init); /* * Process kernel command-line parameter at boot time. * audit={0|off} or audit={1|on}. */ static int __init audit_enable(char *str) { if (!strcasecmp(str, "off") || !strcmp(str, "0")) audit_default = AUDIT_OFF; else if (!strcasecmp(str, "on") || !strcmp(str, "1")) audit_default = AUDIT_ON; else { pr_err("audit: invalid 'audit' parameter value (%s)\n", str); audit_default = AUDIT_ON; } if (audit_default == AUDIT_OFF) audit_initialized = AUDIT_DISABLED; if (audit_set_enabled(audit_default)) pr_err("audit: error setting audit state (%d)\n", audit_default); pr_info("%s\n", audit_default ? "enabled (after initialization)" : "disabled (until reboot)"); return 1; } __setup("audit=", audit_enable); /* Process kernel command-line parameter at boot time. * audit_backlog_limit=<n> */ static int __init audit_backlog_limit_set(char *str) { u32 audit_backlog_limit_arg; pr_info("audit_backlog_limit: "); if (kstrtouint(str, 0, &audit_backlog_limit_arg)) { pr_cont("using default of %u, unable to parse %s\n", audit_backlog_limit, str); return 1; } audit_backlog_limit = audit_backlog_limit_arg; pr_cont("%d\n", audit_backlog_limit); return 1; } __setup("audit_backlog_limit=", audit_backlog_limit_set); static void audit_buffer_free(struct audit_buffer *ab) { struct sk_buff *skb; if (!ab) return; while ((skb = skb_dequeue(&ab->skb_list))) kfree_skb(skb); kmem_cache_free(audit_buffer_cache, ab); } static struct audit_buffer *audit_buffer_alloc(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab; ab = kmem_cache_alloc(audit_buffer_cache, gfp_mask); if (!ab) return NULL; skb_queue_head_init(&ab->skb_list); ab->skb = nlmsg_new(AUDIT_BUFSIZ, gfp_mask); if (!ab->skb) goto err; skb_queue_tail(&ab->skb_list, ab->skb); if (!nlmsg_put(ab->skb, 0, 0, type, 0, 0)) goto err; ab->ctx = ctx; ab->gfp_mask = gfp_mask; return ab; err: audit_buffer_free(ab); return NULL; } /** * audit_serial - compute a serial number for the audit record * * Compute a serial number for the audit record. Audit records are * written to user-space as soon as they are generated, so a complete * audit record may be written in several pieces. The timestamp of the * record and this serial number are used by the user-space tools to * determine which pieces belong to the same audit record. The * (timestamp,serial) tuple is unique for each syscall and is live from * syscall entry to syscall exit. * * NOTE: Another possibility is to store the formatted records off the * audit context (for those records that have a context), and emit them * all at syscall exit. However, this could delay the reporting of * significant errors until syscall exit (or never, if the system * halts). */ unsigned int audit_serial(void) { static atomic_t serial = ATOMIC_INIT(0); return atomic_inc_return(&serial); } static inline void audit_get_stamp(struct audit_context *ctx, struct audit_stamp *stamp) { if (!ctx || !auditsc_get_stamp(ctx, stamp)) { ktime_get_coarse_real_ts64(&stamp->ctime); stamp->serial = audit_serial(); } } /** * audit_log_start - obtain an audit buffer * @ctx: audit_context (may be NULL) * @gfp_mask: type of allocation * @type: audit message type * * Returns audit_buffer pointer on success or NULL on error. * * Obtain an audit buffer. This routine does locking to obtain the * audit buffer, but then no locking is required for calls to * audit_log_*format. If the task (ctx) is a task that is currently in a * syscall, then the syscall is marked as auditable and an audit record * will be written at syscall exit. If there is no associated task, then * task context (ctx) should be NULL. */ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab; if (audit_initialized != AUDIT_INITIALIZED) return NULL; if (unlikely(!audit_filter(type, AUDIT_FILTER_EXCLUDE))) return NULL; /* NOTE: don't ever fail/sleep on these two conditions: * 1. auditd generated record - since we need auditd to drain the * queue; also, when we are checking for auditd, compare PIDs using * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() * using a PID anchored in the caller's namespace * 2. generator holding the audit_cmd_mutex - we don't want to block * while holding the mutex, although we do penalize the sender * later in audit_receive() when it is safe to block */ if (!(auditd_test_task(current) || audit_ctl_owner_current())) { long stime = audit_backlog_wait_time; while (audit_backlog_limit && (skb_queue_len(&audit_queue) > audit_backlog_limit)) { /* wake kauditd to try and flush the queue */ wake_up_interruptible(&kauditd_wait); /* sleep if we are allowed and we haven't exhausted our * backlog wait limit */ if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) { long rtime = stime; DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&audit_backlog_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); stime = schedule_timeout(rtime); atomic_add(rtime - stime, &audit_backlog_wait_time_actual); remove_wait_queue(&audit_backlog_wait, &wait); } else { if (audit_rate_check() && printk_ratelimit()) pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", skb_queue_len(&audit_queue), audit_backlog_limit); audit_log_lost("backlog limit exceeded"); return NULL; } } } ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { audit_log_lost("out of memory in audit_log_start"); return NULL; } audit_get_stamp(ab->ctx, &ab->stamp); /* cancel dummy context to enable supporting records */ if (ctx) ctx->dummy = 0; audit_log_format(ab, "audit(%llu.%03lu:%u): ", (unsigned long long)ab->stamp.ctime.tv_sec, ab->stamp.ctime.tv_nsec/1000000, ab->stamp.serial); return ab; } /** * audit_expand - expand skb in the audit buffer * @ab: audit_buffer * @extra: space to add at tail of the skb * * Returns 0 (no space) on failed expansion, or available space if * successful. */ static inline int audit_expand(struct audit_buffer *ab, int extra) { struct sk_buff *skb = ab->skb; int oldtail = skb_tailroom(skb); int ret = pskb_expand_head(skb, 0, extra, ab->gfp_mask); int newtail = skb_tailroom(skb); if (ret < 0) { audit_log_lost("out of memory in audit_expand"); return 0; } skb->truesize += newtail - oldtail; return newtail; } /* * Format an audit message into the audit buffer. If there isn't enough * room in the audit buffer, more room will be allocated and vsnprint * will be called a second time. Currently, we assume that a printk * can't format message larger than 1024 bytes, so we don't either. */ static __printf(2, 0) void audit_log_vformat(struct audit_buffer *ab, const char *fmt, va_list args) { int len, avail; struct sk_buff *skb; va_list args2; if (!ab) return; BUG_ON(!ab->skb); skb = ab->skb; avail = skb_tailroom(skb); if (avail == 0) { avail = audit_expand(ab, AUDIT_BUFSIZ); if (!avail) goto out; } va_copy(args2, args); len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args); if (len >= avail) { /* The printk buffer is 1024 bytes long, so if we get * here and AUDIT_BUFSIZ is at least 1024, then we can * log everything that printk could have logged. */ avail = audit_expand(ab, max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); if (!avail) goto out_va_end; len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); } if (len > 0) skb_put(skb, len); out_va_end: va_end(args2); out: return; } /** * audit_log_format - format a message into the audit buffer. * @ab: audit_buffer * @fmt: format string * @...: optional parameters matching @fmt string * * All the work is done in audit_log_vformat. */ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) { va_list args; if (!ab) return; va_start(args, fmt); audit_log_vformat(ab, fmt, args); va_end(args); } /** * audit_log_n_hex - convert a buffer to hex and append it to the audit skb * @ab: the audit_buffer * @buf: buffer to convert to hex * @len: length of @buf to be converted * * No return value; failure to expand is silently ignored. * * This function will take the passed buf and convert it into a string of * ascii hex digits. The new string is placed onto the skb. */ void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len) { int i, avail, new_len; unsigned char *ptr; struct sk_buff *skb; if (!ab) return; BUG_ON(!ab->skb); skb = ab->skb; avail = skb_tailroom(skb); new_len = len<<1; if (new_len >= avail) { /* Round the buffer request up to the next multiple */ new_len = AUDIT_BUFSIZ*(((new_len-avail)/AUDIT_BUFSIZ) + 1); avail = audit_expand(ab, new_len); if (!avail) return; } ptr = skb_tail_pointer(skb); for (i = 0; i < len; i++) ptr = hex_byte_pack_upper(ptr, buf[i]); *ptr = 0; skb_put(skb, len << 1); /* new string is twice the old string */ } /* * Format a string of no more than slen characters into the audit buffer, * enclosed in quote marks. */ void audit_log_n_string(struct audit_buffer *ab, const char *string, size_t slen) { int avail, new_len; unsigned char *ptr; struct sk_buff *skb; if (!ab) return; BUG_ON(!ab->skb); skb = ab->skb; avail = skb_tailroom(skb); new_len = slen + 3; /* enclosing quotes + null terminator */ if (new_len > avail) { avail = audit_expand(ab, new_len); if (!avail) return; } ptr = skb_tail_pointer(skb); *ptr++ = '"'; memcpy(ptr, string, slen); ptr += slen; *ptr++ = '"'; *ptr = 0; skb_put(skb, slen + 2); /* don't include null terminator */ } /** * audit_string_contains_control - does a string need to be logged in hex * @string: string to be checked * @len: max length of the string to check */ bool audit_string_contains_control(const char *string, size_t len) { const unsigned char *p; for (p = string; p < (const unsigned char *)string + len; p++) { if (*p == '"' || *p < 0x21 || *p > 0x7e) return true; } return false; } /** * audit_log_n_untrustedstring - log a string that may contain random characters * @ab: audit_buffer * @string: string to be logged * @len: length of string (not including trailing null) * * This code will escape a string that is passed to it if the string * contains a control character, unprintable character, double quote mark, * or a space. Unescaped strings will start and end with a double quote mark. * Strings that are escaped are printed in hex (2 digits per char). * * The caller specifies the number of characters in the string to log, which may * or may not be the entire string. */ void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, size_t len) { if (audit_string_contains_control(string, len)) audit_log_n_hex(ab, string, len); else audit_log_n_string(ab, string, len); } /** * audit_log_untrustedstring - log a string that may contain random characters * @ab: audit_buffer * @string: string to be logged * * Same as audit_log_n_untrustedstring(), except that strlen is used to * determine string length. */ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) { audit_log_n_untrustedstring(ab, string, strlen(string)); } /* This is a helper-function to print the escaped d_path */ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, const struct path *path) { char *p, *pathname; if (prefix) audit_log_format(ab, "%s", prefix); /* We will allow 11 spaces for ' (deleted)' to be appended */ pathname = kmalloc(PATH_MAX+11, ab->gfp_mask); if (!pathname) { audit_log_format(ab, "\"<no_memory>\""); return; } p = d_path(path, pathname, PATH_MAX+11); if (IS_ERR(p)) { /* Should never happen since we send PATH_MAX */ /* FIXME: can we save some information here? */ audit_log_format(ab, "\"<too_long>\""); } else audit_log_untrustedstring(ab, p); kfree(pathname); } void audit_log_session_info(struct audit_buffer *ab) { unsigned int sessionid = audit_get_sessionid(current); uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current)); audit_log_format(ab, "auid=%u ses=%u", auid, sessionid); } void audit_log_key(struct audit_buffer *ab, char *key) { audit_log_format(ab, " key="); if (key) audit_log_untrustedstring(ab, key); else audit_log_format(ab, "(null)"); } /** * audit_buffer_aux_new - Add an aux record buffer to the skb list * @ab: audit_buffer * @type: message type * * Aux records are allocated and added to the skb list of * the "main" record. The ab->skb is reset to point to the * aux record on its creation. When the aux record in complete * ab->skb has to be reset to point to the "main" record. * This allows the audit_log_ functions to be ignorant of * which kind of record it is logging to. It also avoids adding * special data for aux records. * * On success ab->skb will point to the new aux record. * Returns 0 on success, -ENOMEM should allocation fail. */ static int audit_buffer_aux_new(struct audit_buffer *ab, int type) { WARN_ON(ab->skb != skb_peek(&ab->skb_list)); ab->skb = nlmsg_new(AUDIT_BUFSIZ, ab->gfp_mask); if (!ab->skb) goto err; if (!nlmsg_put(ab->skb, 0, 0, type, 0, 0)) goto err; skb_queue_tail(&ab->skb_list, ab->skb); audit_log_format(ab, "audit(%llu.%03lu:%u): ", (unsigned long long)ab->stamp.ctime.tv_sec, ab->stamp.ctime.tv_nsec/1000000, ab->stamp.serial); return 0; err: kfree_skb(ab->skb); ab->skb = skb_peek(&ab->skb_list); return -ENOMEM; } /** * audit_buffer_aux_end - Switch back to the "main" record from an aux record * @ab: audit_buffer * * Restores the "main" audit record to ab->skb. */ static void audit_buffer_aux_end(struct audit_buffer *ab) { ab->skb = skb_peek(&ab->skb_list); } /** * audit_log_subj_ctx - Add LSM subject information * @ab: audit_buffer * @prop: LSM subject properties. * * Add a subj= field and, if necessary, a AUDIT_MAC_TASK_CONTEXTS record. */ int audit_log_subj_ctx(struct audit_buffer *ab, struct lsm_prop *prop) { struct lsm_context ctx; char *space = ""; int error; int i; security_current_getlsmprop_subj(prop); if (!lsmprop_is_set(prop)) return 0; if (audit_subj_secctx_cnt < 2) { error = security_lsmprop_to_secctx(prop, &ctx, LSM_ID_UNDEF); if (error < 0) { if (error != -EINVAL) goto error_path; return 0; } audit_log_format(ab, " subj=%s", ctx.context); security_release_secctx(&ctx); return 0; } /* Multiple LSMs provide contexts. Include an aux record. */ audit_log_format(ab, " subj=?"); error = audit_buffer_aux_new(ab, AUDIT_MAC_TASK_CONTEXTS); if (error) goto error_path; for (i = 0; i < audit_subj_secctx_cnt; i++) { error = security_lsmprop_to_secctx(prop, &ctx, audit_subj_lsms[i]->id); if (error < 0) { /* * Don't print anything. An LSM like BPF could * claim to support contexts, but only do so under * certain conditions. */ if (error == -EOPNOTSUPP) continue; if (error != -EINVAL) audit_panic("error in audit_log_subj_ctx"); } else { audit_log_format(ab, "%ssubj_%s=%s", space, audit_subj_lsms[i]->name, ctx.context); space = " "; security_release_secctx(&ctx); } } audit_buffer_aux_end(ab); return 0; error_path: audit_panic("error in audit_log_subj_ctx"); return error; } EXPORT_SYMBOL(audit_log_subj_ctx); int audit_log_task_context(struct audit_buffer *ab) { struct lsm_prop prop; security_current_getlsmprop_subj(&prop); return audit_log_subj_ctx(ab, &prop); } EXPORT_SYMBOL(audit_log_task_context); int audit_log_obj_ctx(struct audit_buffer *ab, struct lsm_prop *prop) { int i; int rc; int error = 0; char *space = ""; struct lsm_context ctx; if (audit_obj_secctx_cnt < 2) { error = security_lsmprop_to_secctx(prop, &ctx, LSM_ID_UNDEF); if (error < 0) { if (error != -EINVAL) goto error_path; return error; } audit_log_format(ab, " obj=%s", ctx.context); security_release_secctx(&ctx); return 0; } audit_log_format(ab, " obj=?"); error = audit_buffer_aux_new(ab, AUDIT_MAC_OBJ_CONTEXTS); if (error) goto error_path; for (i = 0; i < audit_obj_secctx_cnt; i++) { rc = security_lsmprop_to_secctx(prop, &ctx, audit_obj_lsms[i]->id); if (rc < 0) { audit_log_format(ab, "%sobj_%s=?", space, audit_obj_lsms[i]->name); if (rc != -EINVAL) audit_panic("error in audit_log_obj_ctx"); error = rc; } else { audit_log_format(ab, "%sobj_%s=%s", space, audit_obj_lsms[i]->name, ctx.context); security_release_secctx(&ctx); } space = " "; } audit_buffer_aux_end(ab); return error; error_path: audit_panic("error in audit_log_obj_ctx"); return error; } void audit_log_d_path_exe(struct audit_buffer *ab, struct mm_struct *mm) { struct file *exe_file; if (!mm) goto out_null; exe_file = get_mm_exe_file(mm); if (!exe_file) goto out_null; audit_log_d_path(ab, " exe=", &exe_file->f_path); fput(exe_file); return; out_null: audit_log_format(ab, " exe=(null)"); } struct tty_struct *audit_get_tty(void) { struct tty_struct *tty = NULL; unsigned long flags; spin_lock_irqsave(&current->sighand->siglock, flags); if (current->signal) tty = tty_kref_get(current->signal->tty); spin_unlock_irqrestore(&current->sighand->siglock, flags); return tty; } void audit_put_tty(struct tty_struct *tty) { tty_kref_put(tty); } void audit_log_task_info(struct audit_buffer *ab) { const struct cred *cred; char comm[sizeof(current->comm)]; struct tty_struct *tty; if (!ab) return; cred = current_cred(); tty = audit_get_tty(); audit_log_format(ab, " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", task_ppid_nr(current), task_tgid_nr(current), from_kuid(&init_user_ns, audit_get_loginuid(current)), from_kuid(&init_user_ns, cred->uid), from_kgid(&init_user_ns, cred->gid), from_kuid(&init_user_ns, cred->euid), from_kuid(&init_user_ns, cred->suid), from_kuid(&init_user_ns, cred->fsuid), from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), tty ? tty_name(tty) : "(none)", audit_get_sessionid(current)); audit_put_tty(tty); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, current)); audit_log_d_path_exe(ab, current->mm); audit_log_task_context(ab); } EXPORT_SYMBOL(audit_log_task_info); /** * audit_log_path_denied - report a path restriction denial * @type: audit message type (AUDIT_ANOM_LINK, AUDIT_ANOM_CREAT, etc) * @operation: specific operation name */ void audit_log_path_denied(int type, const char *operation) { struct audit_buffer *ab; if (!audit_enabled) return; /* Generate log with subject, operation, outcome. */ ab = audit_log_start(audit_context(), GFP_KERNEL, type); if (!ab) return; audit_log_format(ab, "op=%s", operation); audit_log_task_info(ab); audit_log_format(ab, " res=0"); audit_log_end(ab); } /* global counter which is incremented every time something logs in */ static atomic_t session_id = ATOMIC_INIT(0); static int audit_set_loginuid_perm(kuid_t loginuid) { /* if we are unset, we don't need privs */ if (!audit_loginuid_set(current)) return 0; /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) return -EPERM; /* it is set, you need permission */ if (!capable(CAP_AUDIT_CONTROL)) return -EPERM; /* reject if this is not an unset and we don't allow that */ if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid)) return -EPERM; return 0; } static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, unsigned int oldsessionid, unsigned int sessionid, int rc) { struct audit_buffer *ab; uid_t uid, oldloginuid, loginuid; struct tty_struct *tty; if (!audit_enabled) return; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid); tty = audit_get_tty(); audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); audit_log_task_context(ab); audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", oldsessionid, sessionid, !rc); audit_put_tty(tty); audit_log_end(ab); } /** * audit_set_loginuid - set current task's loginuid * @loginuid: loginuid value * * Returns 0. * * Called (set) from fs/proc/base.c::proc_loginuid_write(). */ int audit_set_loginuid(kuid_t loginuid) { unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET; kuid_t oldloginuid; int rc; oldloginuid = audit_get_loginuid(current); oldsessionid = audit_get_sessionid(current); rc = audit_set_loginuid_perm(loginuid); if (rc) goto out; /* are we setting or clearing? */ if (uid_valid(loginuid)) { sessionid = (unsigned int)atomic_inc_return(&session_id); if (unlikely(sessionid == AUDIT_SID_UNSET)) sessionid = (unsigned int)atomic_inc_return(&session_id); } current->sessionid = sessionid; current->loginuid = loginuid; out: audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); return rc; } /** * audit_signal_info - record signal info for shutting down audit subsystem * @sig: signal value * @t: task being signaled * * If the audit subsystem is being terminated, record the task (pid) * and uid that is doing that. */ int audit_signal_info(int sig, struct task_struct *t) { kuid_t uid = current_uid(), auid; if (auditd_test_task(t) && (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2)) { audit_sig_pid = task_tgid_nr(current); auid = audit_get_loginuid(current); if (uid_valid(auid)) audit_sig_uid = auid; else audit_sig_uid = uid; security_current_getlsmprop_subj(&audit_sig_lsm); } return audit_signal_info_syscall(t); } /** * __audit_log_end - enqueue one audit record * @skb: the buffer to send */ static void __audit_log_end(struct sk_buff *skb) { struct nlmsghdr *nlh; if (audit_rate_check()) { /* setup the netlink header, see the comments in * kauditd_send_multicast_skb() for length quirks */ nlh = nlmsg_hdr(skb); nlh->nlmsg_len = skb->len - NLMSG_HDRLEN; /* queue the netlink packet */ skb_queue_tail(&audit_queue, skb); } else { audit_log_lost("rate limit exceeded"); kfree_skb(skb); } } /** * audit_log_end - end one audit record * @ab: the audit_buffer * * We can not do a netlink send inside an irq context because it blocks (last * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a * queue and a kthread is scheduled to remove them from the queue outside the * irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) { struct sk_buff *skb; if (!ab) return; while ((skb = skb_dequeue(&ab->skb_list))) __audit_log_end(skb); /* poke the kauditd thread */ wake_up_interruptible(&kauditd_wait); audit_buffer_free(ab); } /** * audit_log - Log an audit record * @ctx: audit context * @gfp_mask: type of allocation * @type: audit message type * @fmt: format string to use * @...: variable parameters matching the format string * * This is a convenience function that calls audit_log_start, * audit_log_vformat, and audit_log_end. It may be called * in any context. */ void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, const char *fmt, ...) { struct audit_buffer *ab; va_list args; ab = audit_log_start(ctx, gfp_mask, type); if (ab) { va_start(args, fmt); audit_log_vformat(ab, fmt, args); va_end(args); audit_log_end(ab); } } EXPORT_SYMBOL(audit_log_start); EXPORT_SYMBOL(audit_log_end); EXPORT_SYMBOL(audit_log_format); EXPORT_SYMBOL(audit_log);
8 8 8 8 8 8 8 8 8 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 // SPDX-License-Identifier: GPL-2.0-only // Copyright (c) 2020 Facebook Inc. #include <linux/debugfs.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <net/udp_tunnel.h> #include "netdevsim.h" static int nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (!ret) { if (ns->udp_ports.ports[table][entry]) { WARN(1, "entry already in use\n"); ret = -EBUSY; } else { ns->udp_ports.ports[table][entry] = be16_to_cpu(ti->port) << 16 | ti->type; } } netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (!ret) { u32 val = be16_to_cpu(ti->port) << 16 | ti->type; if (val == ns->udp_ports.ports[table][entry]) { ns->udp_ports.ports[table][entry] = 0; } else { WARN(1, "entry not installed %x vs %x\n", val, ns->udp_ports.ports[table][entry]); ret = -ENOENT; } } netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_sync_table(struct net_device *dev, unsigned int table) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_info ti; unsigned int i; int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; for (i = 0; i < NSIM_UDP_TUNNEL_N_PORTS; i++) { udp_tunnel_nic_get_port(dev, table, i, &ti); ns->udp_ports.ports[table][i] = be16_to_cpu(ti.port) << 16 | ti.type; } return ret; } static const struct udp_tunnel_nic_info nsim_udp_tunnel_info = { .set_port = nsim_udp_tunnel_set_port, .unset_port = nsim_udp_tunnel_unset_port, .sync_table = nsim_udp_tunnel_sync_table, .tables = { { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE | UDP_TUNNEL_TYPE_VXLAN_GPE, }, }, }; static ssize_t nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); if (dev->reg_state == NETREG_REGISTERED) { memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); udp_tunnel_nic_reset_ntf(dev); } return count; } static const struct file_operations nsim_udp_tunnels_info_reset_fops = { .open = simple_open, .write = nsim_udp_tunnels_info_reset_write, .llseek = generic_file_llseek, .owner = THIS_MODULE, }; int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_nic_info *info; if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) { dev_err(&nsim_dev->nsim_bus_dev->dev, "shared can't be used in conjunction with open_only\n"); return -EINVAL; } if (!nsim_dev->udp_ports.shared) ns->udp_ports.ports = ns->udp_ports.__ports; else ns->udp_ports.ports = nsim_dev->udp_ports.__ports; ns->udp_ports.ddir = debugfs_create_dir("udp_ports", ns->nsim_dev_port->ddir); debugfs_create_u32("inject_error", 0600, ns->udp_ports.ddir, &ns->udp_ports.inject_error); ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("table0", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[0]); ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("table1", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[1]); debugfs_create_file("reset", 0200, ns->udp_ports.ddir, dev, &nsim_udp_tunnels_info_reset_fops); /* Note: it's not normal to allocate the info struct like this! * Drivers are expected to use a static const one, here we're testing. */ info = kmemdup(&nsim_udp_tunnel_info, sizeof(nsim_udp_tunnel_info), GFP_KERNEL); if (!info) return -ENOMEM; if (nsim_dev->udp_ports.sync_all) { info->set_port = NULL; info->unset_port = NULL; } else { info->sync_table = NULL; } if (nsim_dev->udp_ports.open_only) info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; if (nsim_dev->udp_ports.ipv4_only) info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; if (nsim_dev->udp_ports.shared) info->shared = &nsim_dev->udp_ports.utn_shared; if (nsim_dev->udp_ports.static_iana_vxlan) info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; dev->udp_tunnel_nic_info = info; return 0; } void nsim_udp_tunnels_info_destroy(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); debugfs_remove_recursive(ns->udp_ports.ddir); kfree(dev->udp_tunnel_nic_info); dev->udp_tunnel_nic_info = NULL; } void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) { debugfs_create_bool("udp_ports_sync_all", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sync_all); debugfs_create_bool("udp_ports_open_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.open_only); debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.ipv4_only); debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.shared); debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.static_iana_vxlan); }
2 12 12 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 /* * Utility functions for x86 operand and address decoding * * Copyright (C) Intel Corporation 2017 */ #include <linux/kernel.h> #include <linux/string.h> #include <linux/ratelimit.h> #include <linux/mmu_context.h> #include <asm/desc_defs.h> #include <asm/desc.h> #include <asm/inat.h> #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/ldt.h> #include <asm/msr.h> #include <asm/vm86.h> #undef pr_fmt #define pr_fmt(fmt) "insn: " fmt enum reg_type { REG_TYPE_RM = 0, REG_TYPE_REG, REG_TYPE_INDEX, REG_TYPE_BASE, }; /** * is_string_insn() - Determine if instruction is a string instruction * @insn: Instruction containing the opcode to inspect * * Returns: * * true if the instruction, determined by the opcode, is any of the * string instructions as defined in the Intel Software Development manual. * False otherwise. */ static bool is_string_insn(struct insn *insn) { /* All string instructions have a 1-byte opcode. */ if (insn->opcode.nbytes != 1) return false; switch (insn->opcode.bytes[0]) { case 0x6c ... 0x6f: /* INS, OUTS */ case 0xa4 ... 0xa7: /* MOVS, CMPS */ case 0xaa ... 0xaf: /* STOS, LODS, SCAS */ return true; default: return false; } } /** * insn_has_rep_prefix() - Determine if instruction has a REP prefix * @insn: Instruction containing the prefix to inspect * * Returns: * * true if the instruction has a REP prefix, false if not. */ bool insn_has_rep_prefix(struct insn *insn) { insn_byte_t p; int i; insn_get_prefixes(insn); for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } return false; } /** * get_seg_reg_override_idx() - obtain segment register override index * @insn: Valid instruction with segment override prefixes * * Inspect the instruction prefixes in @insn and find segment overrides, if any. * * Returns: * * A constant identifying the segment register to use, among CS, SS, DS, * ES, FS, or GS. INAT_SEG_REG_DEFAULT is returned if no segment override * prefixes were found. * * -EINVAL in case of error. */ static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; int num_overrides = 0, i; insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ for_each_insn_prefix(insn, i, p) { insn_attr_t attr; attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_CS): idx = INAT_SEG_REG_CS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_SS): idx = INAT_SEG_REG_SS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_DS): idx = INAT_SEG_REG_DS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_ES): idx = INAT_SEG_REG_ES; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_FS): idx = INAT_SEG_REG_FS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_GS): idx = INAT_SEG_REG_GS; num_overrides++; break; /* No default action needed. */ } } /* More than one segment override prefix leads to undefined behavior. */ if (num_overrides > 1) return -EINVAL; return idx; } /** * check_seg_overrides() - check if segment override prefixes are allowed * @insn: Valid instruction with segment override prefixes * @regoff: Operand offset, in pt_regs, for which the check is performed * * For a particular register used in register-indirect addressing, determine if * segment override prefixes can be used. Specifically, no overrides are allowed * for rDI if used with a string instruction. * * Returns: * * True if segment override prefixes can be used with the register indicated * in @regoff. False if otherwise. */ static bool check_seg_overrides(struct insn *insn, int regoff) { if (regoff == offsetof(struct pt_regs, di) && is_string_insn(insn)) return false; return true; } /** * resolve_default_seg() - resolve default segment register index for an operand * @insn: Instruction with opcode and address size. Must be valid. * @regs: Register values as seen when entering kernel mode * @off: Operand offset, in pt_regs, for which resolution is needed * * Resolve the default segment register index associated with the instruction * operand register indicated by @off. Such index is resolved based on defaults * described in the Intel Software Development Manual. * * Returns: * * If in protected mode, a constant identifying the segment register to use, * among CS, SS, ES or DS. If in long mode, INAT_SEG_REG_IGNORE. * * -EINVAL in case of error. */ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) { if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; /* * Resolve the default segment register as described in Section 3.7.4 * of the Intel Software Development Manual Vol. 1: * * + DS for all references involving r[ABCD]X, and rSI. * + If used in a string instruction, ES for rDI. Otherwise, DS. * + AX, CX and DX are not valid register operands in 16-bit address * encodings but are valid for 32-bit and 64-bit encodings. * + -EDOM is reserved to identify for cases in which no register * is used (i.e., displacement-only addressing). Use DS. * + SS for rSP or rBP. * + CS for rIP. */ switch (off) { case offsetof(struct pt_regs, ax): case offsetof(struct pt_regs, cx): case offsetof(struct pt_regs, dx): /* Need insn to verify address size. */ if (insn->addr_bytes == 2) return -EINVAL; fallthrough; case -EDOM: case offsetof(struct pt_regs, bx): case offsetof(struct pt_regs, si): return INAT_SEG_REG_DS; case offsetof(struct pt_regs, di): if (is_string_insn(insn)) return INAT_SEG_REG_ES; return INAT_SEG_REG_DS; case offsetof(struct pt_regs, bp): case offsetof(struct pt_regs, sp): return INAT_SEG_REG_SS; case offsetof(struct pt_regs, ip): return INAT_SEG_REG_CS; default: return -EINVAL; } } /** * resolve_seg_reg() - obtain segment register index * @insn: Instruction with operands * @regs: Register values as seen when entering kernel mode * @regoff: Operand offset, in pt_regs, used to determine segment register * * Determine the segment register associated with the operands and, if * applicable, prefixes and the instruction pointed by @insn. * * The segment register associated to an operand used in register-indirect * addressing depends on: * * a) Whether running in long mode (in such a case segments are ignored, except * if FS or GS are used). * * b) Whether segment override prefixes can be used. Certain instructions and * registers do not allow override prefixes. * * c) Whether segment overrides prefixes are found in the instruction prefixes. * * d) If there are not segment override prefixes or they cannot be used, the * default segment register associated with the operand register is used. * * The function checks first if segment override prefixes can be used with the * operand indicated by @regoff. If allowed, obtain such overridden segment * register index. Lastly, if not prefixes were found or cannot be used, resolve * the segment register index to use based on the defaults described in the * Intel documentation. In long mode, all segment register indexes will be * ignored, except if overrides were found for FS or GS. All these operations * are done using helper functions. * * The operand register, @regoff, is represented as the offset from the base of * pt_regs. * * As stated, the main use of this function is to determine the segment register * index based on the instruction, its operands and prefixes. Hence, @insn * must be valid. However, if @regoff indicates rIP, we don't need to inspect * @insn at all as in this case CS is used in all cases. This case is checked * before proceeding further. * * Please note that this function does not return the value in the segment * register (i.e., the segment selector) but our defined index. The segment * selector needs to be obtained using get_segment_selector() and passing the * segment register index resolved by this function. * * Returns: * * An index identifying the segment register to use, among CS, SS, DS, * ES, FS, or GS. INAT_SEG_REG_IGNORE is returned if running in long mode. * * -EINVAL in case of error. */ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) { int idx; /* * In the unlikely event of having to resolve the segment register * index for rIP, do it first. Segment override prefixes should not * be used. Hence, it is not necessary to inspect the instruction, * which may be invalid at this point. */ if (regoff == offsetof(struct pt_regs, ip)) { if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; else return INAT_SEG_REG_CS; } if (!insn) return -EINVAL; if (!check_seg_overrides(insn, regoff)) return resolve_default_seg(insn, regs, regoff); idx = get_seg_reg_override_idx(insn); if (idx < 0) return idx; if (idx == INAT_SEG_REG_DEFAULT) return resolve_default_seg(insn, regs, regoff); /* * In long mode, segment override prefixes are ignored, except for * overrides for FS and GS. */ if (any_64bit_mode(regs)) { if (idx != INAT_SEG_REG_FS && idx != INAT_SEG_REG_GS) idx = INAT_SEG_REG_IGNORE; } return idx; } /** * get_segment_selector() - obtain segment selector * @regs: Register values as seen when entering kernel mode * @seg_reg_idx: Segment register index to use * * Obtain the segment selector from any of the CS, SS, DS, ES, FS, GS segment * registers. In CONFIG_X86_32, the segment is obtained from either pt_regs or * kernel_vm86_regs as applicable. In CONFIG_X86_64, CS and SS are obtained * from pt_regs. DS, ES, FS and GS are obtained by reading the actual CPU * registers. This done for only for completeness as in CONFIG_X86_64 segment * registers are ignored. * * Returns: * * Value of the segment selector, including null when running in * long mode. * * -EINVAL on error. */ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) { unsigned short sel; #ifdef CONFIG_X86_64 switch (seg_reg_idx) { case INAT_SEG_REG_IGNORE: return 0; case INAT_SEG_REG_CS: return (unsigned short)(regs->cs & 0xffff); case INAT_SEG_REG_SS: return (unsigned short)(regs->ss & 0xffff); case INAT_SEG_REG_DS: savesegment(ds, sel); return sel; case INAT_SEG_REG_ES: savesegment(es, sel); return sel; case INAT_SEG_REG_FS: savesegment(fs, sel); return sel; case INAT_SEG_REG_GS: savesegment(gs, sel); return sel; default: return -EINVAL; } #else /* CONFIG_X86_32 */ struct kernel_vm86_regs *vm86regs = (struct kernel_vm86_regs *)regs; if (v8086_mode(regs)) { switch (seg_reg_idx) { case INAT_SEG_REG_CS: return (unsigned short)(regs->cs & 0xffff); case INAT_SEG_REG_SS: return (unsigned short)(regs->ss & 0xffff); case INAT_SEG_REG_DS: return vm86regs->ds; case INAT_SEG_REG_ES: return vm86regs->es; case INAT_SEG_REG_FS: return vm86regs->fs; case INAT_SEG_REG_GS: return vm86regs->gs; case INAT_SEG_REG_IGNORE: default: return -EINVAL; } } switch (seg_reg_idx) { case INAT_SEG_REG_CS: return (unsigned short)(regs->cs & 0xffff); case INAT_SEG_REG_SS: return (unsigned short)(regs->ss & 0xffff); case INAT_SEG_REG_DS: return (unsigned short)(regs->ds & 0xffff); case INAT_SEG_REG_ES: return (unsigned short)(regs->es & 0xffff); case INAT_SEG_REG_FS: return (unsigned short)(regs->fs & 0xffff); case INAT_SEG_REG_GS: savesegment(gs, sel); return sel; case INAT_SEG_REG_IGNORE: default: return -EINVAL; } #endif /* CONFIG_X86_64 */ } static const int pt_regoff[] = { offsetof(struct pt_regs, ax), offsetof(struct pt_regs, cx), offsetof(struct pt_regs, dx), offsetof(struct pt_regs, bx), offsetof(struct pt_regs, sp), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), #ifdef CONFIG_X86_64 offsetof(struct pt_regs, r8), offsetof(struct pt_regs, r9), offsetof(struct pt_regs, r10), offsetof(struct pt_regs, r11), offsetof(struct pt_regs, r12), offsetof(struct pt_regs, r13), offsetof(struct pt_regs, r14), offsetof(struct pt_regs, r15), #else offsetof(struct pt_regs, ds), offsetof(struct pt_regs, es), offsetof(struct pt_regs, fs), offsetof(struct pt_regs, gs), #endif }; int pt_regs_offset(struct pt_regs *regs, int regno) { if ((unsigned)regno < ARRAY_SIZE(pt_regoff)) return pt_regoff[regno]; return -EDOM; } static int get_regno(struct insn *insn, enum reg_type type) { int nr_registers = ARRAY_SIZE(pt_regoff); int regno = 0; /* * Don't possibly decode a 32-bit instructions as * reading a 64-bit-only register. */ if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64) nr_registers -= 8; switch (type) { case REG_TYPE_RM: regno = X86_MODRM_RM(insn->modrm.value); /* * ModRM.mod == 0 and ModRM.rm == 5 means a 32-bit displacement * follows the ModRM byte. */ if (!X86_MODRM_MOD(insn->modrm.value) && regno == 5) return -EDOM; if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_REG: regno = X86_MODRM_REG(insn->modrm.value); if (X86_REX_R(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_INDEX: regno = X86_SIB_INDEX(insn->sib.value); if (X86_REX_X(insn->rex_prefix.value)) regno += 8; /* * If ModRM.mod != 3 and SIB.index = 4 the scale*index * portion of the address computation is null. This is * true only if REX.X is 0. In such a case, the SIB index * is used in the address computation. */ if (X86_MODRM_MOD(insn->modrm.value) != 3 && regno == 4) return -EDOM; break; case REG_TYPE_BASE: regno = X86_SIB_BASE(insn->sib.value); /* * If ModRM.mod is 0 and SIB.base == 5, the base of the * register-indirect addressing is 0. In this case, a * 32-bit displacement follows the SIB byte. */ if (!X86_MODRM_MOD(insn->modrm.value) && regno == 5) return -EDOM; if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; default: pr_err_ratelimited("invalid register type: %d\n", type); return -EINVAL; } if (regno >= nr_registers) { WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } return regno; } static int get_reg_offset(struct insn *insn, struct pt_regs *regs, enum reg_type type) { int regno = get_regno(insn, type); if (regno < 0) return regno; return pt_regs_offset(regs, regno); } /** * get_reg_offset_16() - Obtain offset of register indicated by instruction * @insn: Instruction containing ModRM byte * @regs: Register values as seen when entering kernel mode * @offs1: Offset of the first operand register * @offs2: Offset of the second operand register, if applicable * * Obtain the offset, in pt_regs, of the registers indicated by the ModRM byte * in @insn. This function is to be used with 16-bit address encodings. The * @offs1 and @offs2 will be written with the offset of the two registers * indicated by the instruction. In cases where any of the registers is not * referenced by the instruction, the value will be set to -EDOM. * * Returns: * * 0 on success, -EINVAL on error. */ static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs, int *offs1, int *offs2) { /* * 16-bit addressing can use one or two registers. Specifics of * encodings are given in Table 2-1. "16-Bit Addressing Forms with the * ModR/M Byte" of the Intel Software Development Manual. */ static const int regoff1[] = { offsetof(struct pt_regs, bx), offsetof(struct pt_regs, bx), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, bx), }; static const int regoff2[] = { offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), -EDOM, -EDOM, -EDOM, -EDOM, }; if (!offs1 || !offs2) return -EINVAL; /* Operand is a register, use the generic function. */ if (X86_MODRM_MOD(insn->modrm.value) == 3) { *offs1 = insn_get_modrm_rm_off(insn, regs); *offs2 = -EDOM; return 0; } *offs1 = regoff1[X86_MODRM_RM(insn->modrm.value)]; *offs2 = regoff2[X86_MODRM_RM(insn->modrm.value)]; /* * If ModRM.mod is 0 and ModRM.rm is 110b, then we use displacement- * only addressing. This means that no registers are involved in * computing the effective address. Thus, ensure that the first * register offset is invalid. The second register offset is already * invalid under the aforementioned conditions. */ if ((X86_MODRM_MOD(insn->modrm.value) == 0) && (X86_MODRM_RM(insn->modrm.value) == 6)) *offs1 = -EDOM; return 0; } /** * get_desc() - Obtain contents of a segment descriptor * @out: Segment descriptor contents on success * @sel: Segment selector * * Given a segment selector, obtain a pointer to the segment descriptor. * Both global and local descriptor tables are supported. * * Returns: * * True on success, false on failure. * * NULL on error. */ static bool get_desc(struct desc_struct *out, unsigned short sel) { struct desc_ptr gdt_desc = {0, 0}; unsigned long desc_base; #ifdef CONFIG_MODIFY_LDT_SYSCALL if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) { bool success = false; struct ldt_struct *ldt; /* Bits [15:3] contain the index of the desired entry. */ sel >>= 3; /* * If we're not in a valid context with a real (not just lazy) * user mm, then don't even try. */ if (!nmi_uaccess_okay()) return false; mutex_lock(&current->mm->context.lock); ldt = current->mm->context.ldt; if (ldt && sel < ldt->nr_entries) { *out = ldt->entries[sel]; success = true; } mutex_unlock(&current->mm->context.lock); return success; } #endif native_store_gdt(&gdt_desc); /* * Segment descriptors have a size of 8 bytes. Thus, the index is * multiplied by 8 to obtain the memory offset of the desired descriptor * from the base of the GDT. As bits [15:3] of the segment selector * contain the index, it can be regarded as multiplied by 8 already. * All that remains is to clear bits [2:0]. */ desc_base = sel & ~(SEGMENT_RPL_MASK | SEGMENT_TI_MASK); if (desc_base > gdt_desc.size) return false; *out = *(struct desc_struct *)(gdt_desc.address + desc_base); return true; } /** * insn_get_seg_base() - Obtain base address of segment descriptor. * @regs: Register values as seen when entering kernel mode * @seg_reg_idx: Index of the segment register pointing to seg descriptor * * Obtain the base address of the segment as indicated by the segment descriptor * pointed by the segment selector. The segment selector is obtained from the * input segment register index @seg_reg_idx. * * Returns: * * In protected mode, base address of the segment. Zero in long mode, * except when FS or GS are used. In virtual-8086 mode, the segment * selector shifted 4 bits to the right. * * -1L in case of error. */ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) { struct desc_struct desc; short sel; sel = get_segment_selector(regs, seg_reg_idx); if (sel < 0) return -1L; if (v8086_mode(regs)) /* * Base is simply the segment selector shifted 4 * bits to the right. */ return (unsigned long)(sel << 4); if (any_64bit_mode(regs)) { /* * Only FS or GS will have a base address, the rest of * the segments' bases are forced to 0. */ unsigned long base; if (seg_reg_idx == INAT_SEG_REG_FS) { rdmsrq(MSR_FS_BASE, base); } else if (seg_reg_idx == INAT_SEG_REG_GS) { /* * swapgs was called at the kernel entry point. Thus, * MSR_KERNEL_GS_BASE will have the user-space GS base. */ if (user_mode(regs)) rdmsrq(MSR_KERNEL_GS_BASE, base); else rdmsrq(MSR_GS_BASE, base); } else { base = 0; } return base; } /* In protected mode the segment selector cannot be null. */ if (!sel) return -1L; if (!get_desc(&desc, sel)) return -1L; return get_desc_base(&desc); } /** * get_seg_limit() - Obtain the limit of a segment descriptor * @regs: Register values as seen when entering kernel mode * @seg_reg_idx: Index of the segment register pointing to seg descriptor * * Obtain the limit of the segment as indicated by the segment descriptor * pointed by the segment selector. The segment selector is obtained from the * input segment register index @seg_reg_idx. * * Returns: * * In protected mode, the limit of the segment descriptor in bytes. * In long mode and virtual-8086 mode, segment limits are not enforced. Thus, * limit is returned as -1L to imply a limit-less segment. * * Zero is returned on error. */ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) { struct desc_struct desc; unsigned long limit; short sel; sel = get_segment_selector(regs, seg_reg_idx); if (sel < 0) return 0; if (any_64bit_mode(regs) || v8086_mode(regs)) return -1L; if (!sel) return 0; if (!get_desc(&desc, sel)) return 0; /* * If the granularity bit is set, the limit is given in multiples * of 4096. This also means that the 12 least significant bits are * not tested when checking the segment limits. In practice, * this means that the segment ends in (limit << 12) + 0xfff. */ limit = get_desc_limit(&desc); if (desc.g) limit = (limit << 12) + 0xfff; return limit; } /** * insn_get_code_seg_params() - Obtain code segment parameters * @regs: Structure with register values as seen when entering kernel mode * * Obtain address and operand sizes of the code segment. It is obtained from the * selector contained in the CS register in regs. In protected mode, the default * address is determined by inspecting the L and D bits of the segment * descriptor. In virtual-8086 mode, the default is always two bytes for both * address and operand sizes. * * Returns: * * An int containing ORed-in default parameters on success. * * -EINVAL on error. */ int insn_get_code_seg_params(struct pt_regs *regs) { struct desc_struct desc; short sel; if (v8086_mode(regs)) /* Address and operand size are both 16-bit. */ return INSN_CODE_SEG_PARAMS(2, 2); sel = get_segment_selector(regs, INAT_SEG_REG_CS); if (sel < 0) return sel; if (!get_desc(&desc, sel)) return -EINVAL; /* * The most significant byte of the Type field of the segment descriptor * determines whether a segment contains data or code. If this is a data * segment, return error. */ if (!(desc.type & BIT(3))) return -EINVAL; switch ((desc.l << 1) | desc.d) { case 0: /* * Legacy mode. CS.L=0, CS.D=0. Address and operand size are * both 16-bit. */ return INSN_CODE_SEG_PARAMS(2, 2); case 1: /* * Legacy mode. CS.L=0, CS.D=1. Address and operand size are * both 32-bit. */ return INSN_CODE_SEG_PARAMS(4, 4); case 2: /* * IA-32e 64-bit mode. CS.L=1, CS.D=0. Address size is 64-bit; * operand size is 32-bit. */ return INSN_CODE_SEG_PARAMS(4, 8); case 3: /* Invalid setting. CS.L=1, CS.D=1 */ fallthrough; default: return -EINVAL; } } /** * insn_get_modrm_rm_off() - Obtain register in r/m part of the ModRM byte * @insn: Instruction containing the ModRM byte * @regs: Register values as seen when entering kernel mode * * Returns: * * The register indicated by the r/m part of the ModRM byte. The * register is obtained as an offset from the base of pt_regs. In specific * cases, the returned value can be -EDOM to indicate that the particular value * of ModRM does not refer to a register and shall be ignored. */ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs) { return get_reg_offset(insn, regs, REG_TYPE_RM); } /** * insn_get_modrm_reg_off() - Obtain register in reg part of the ModRM byte * @insn: Instruction containing the ModRM byte * @regs: Register values as seen when entering kernel mode * * Returns: * * The register indicated by the reg part of the ModRM byte. The * register is obtained as an offset from the base of pt_regs. */ int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs) { return get_reg_offset(insn, regs, REG_TYPE_REG); } /** * insn_get_modrm_reg_ptr() - Obtain register pointer based on ModRM byte * @insn: Instruction containing the ModRM byte * @regs: Register values as seen when entering kernel mode * * Returns: * * The register indicated by the reg part of the ModRM byte. * The register is obtained as a pointer within pt_regs. */ unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs) { int offset; offset = insn_get_modrm_reg_off(insn, regs); if (offset < 0) return NULL; return (void *)regs + offset; } /** * get_seg_base_limit() - obtain base address and limit of a segment * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Operand offset, in pt_regs, used to resolve segment descriptor * @base: Obtained segment base * @limit: Obtained segment limit * * Obtain the base address and limit of the segment associated with the operand * @regoff and, if any or allowed, override prefixes in @insn. This function is * different from insn_get_seg_base() as the latter does not resolve the segment * associated with the instruction operand. If a limit is not needed (e.g., * when running in long mode), @limit can be NULL. * * Returns: * * 0 on success. @base and @limit will contain the base address and of the * resolved segment, respectively. * * -EINVAL on error. */ static int get_seg_base_limit(struct insn *insn, struct pt_regs *regs, int regoff, unsigned long *base, unsigned long *limit) { int seg_reg_idx; if (!base) return -EINVAL; seg_reg_idx = resolve_seg_reg(insn, regs, regoff); if (seg_reg_idx < 0) return seg_reg_idx; *base = insn_get_seg_base(regs, seg_reg_idx); if (*base == -1L) return -EINVAL; if (!limit) return 0; *limit = get_seg_limit(regs, seg_reg_idx); if (!(*limit)) return -EINVAL; return 0; } /** * get_eff_addr_reg() - Obtain effective address from register operand * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Obtained operand offset, in pt_regs, with the effective address * @eff_addr: Obtained effective address * * Obtain the effective address stored in the register operand as indicated by * the ModRM byte. This function is to be used only with register addressing * (i.e., ModRM.mod is 3). The effective address is saved in @eff_addr. The * register operand, as an offset from the base of pt_regs, is saved in @regoff; * such offset can then be used to resolve the segment associated with the * operand. This function can be used with any of the supported address sizes * in x86. * * Returns: * * 0 on success. @eff_addr will have the effective address stored in the * operand indicated by ModRM. @regoff will have such operand as an offset from * the base of pt_regs. * * -EINVAL on error. */ static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs, int *regoff, long *eff_addr) { int ret; ret = insn_get_modrm(insn); if (ret) return ret; if (X86_MODRM_MOD(insn->modrm.value) != 3) return -EINVAL; *regoff = get_reg_offset(insn, regs, REG_TYPE_RM); if (*regoff < 0) return -EINVAL; /* Ignore bytes that are outside the address size. */ if (insn->addr_bytes == 2) *eff_addr = regs_get_register(regs, *regoff) & 0xffff; else if (insn->addr_bytes == 4) *eff_addr = regs_get_register(regs, *regoff) & 0xffffffff; else /* 64-bit address */ *eff_addr = regs_get_register(regs, *regoff); return 0; } /** * get_eff_addr_modrm() - Obtain referenced effective address via ModRM * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Obtained operand offset, in pt_regs, associated with segment * @eff_addr: Obtained effective address * * Obtain the effective address referenced by the ModRM byte of @insn. After * identifying the registers involved in the register-indirect memory reference, * its value is obtained from the operands in @regs. The computed address is * stored @eff_addr. Also, the register operand that indicates the associated * segment is stored in @regoff, this parameter can later be used to determine * such segment. * * Returns: * * 0 on success. @eff_addr will have the referenced effective address. @regoff * will have a register, as an offset from the base of pt_regs, that can be used * to resolve the associated segment. * * -EINVAL on error. */ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, int *regoff, long *eff_addr) { long tmp; int ret; if (insn->addr_bytes != 8 && insn->addr_bytes != 4) return -EINVAL; ret = insn_get_modrm(insn); if (ret) return ret; if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; *regoff = get_reg_offset(insn, regs, REG_TYPE_RM); /* * -EDOM means that we must ignore the address_offset. In such a case, * in 64-bit mode the effective address relative to the rIP of the * following instruction. */ if (*regoff == -EDOM) { if (any_64bit_mode(regs)) tmp = regs->ip + insn->length; else tmp = 0; } else if (*regoff < 0) { return -EINVAL; } else { tmp = regs_get_register(regs, *regoff); } if (insn->addr_bytes == 4) { int addr32 = (int)(tmp & 0xffffffff) + insn->displacement.value; *eff_addr = addr32 & 0xffffffff; } else { *eff_addr = tmp + insn->displacement.value; } return 0; } /** * get_eff_addr_modrm_16() - Obtain referenced effective address via ModRM * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Obtained operand offset, in pt_regs, associated with segment * @eff_addr: Obtained effective address * * Obtain the 16-bit effective address referenced by the ModRM byte of @insn. * After identifying the registers involved in the register-indirect memory * reference, its value is obtained from the operands in @regs. The computed * address is stored @eff_addr. Also, the register operand that indicates * the associated segment is stored in @regoff, this parameter can later be used * to determine such segment. * * Returns: * * 0 on success. @eff_addr will have the referenced effective address. @regoff * will have a register, as an offset from the base of pt_regs, that can be used * to resolve the associated segment. * * -EINVAL on error. */ static int get_eff_addr_modrm_16(struct insn *insn, struct pt_regs *regs, int *regoff, short *eff_addr) { int addr_offset1, addr_offset2, ret; short addr1 = 0, addr2 = 0, displacement; if (insn->addr_bytes != 2) return -EINVAL; insn_get_modrm(insn); if (!insn->modrm.nbytes) return -EINVAL; if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; ret = get_reg_offset_16(insn, regs, &addr_offset1, &addr_offset2); if (ret < 0) return -EINVAL; /* * Don't fail on invalid offset values. They might be invalid because * they cannot be used for this particular value of ModRM. Instead, use * them in the computation only if they contain a valid value. */ if (addr_offset1 != -EDOM) addr1 = regs_get_register(regs, addr_offset1) & 0xffff; if (addr_offset2 != -EDOM) addr2 = regs_get_register(regs, addr_offset2) & 0xffff; displacement = insn->displacement.value & 0xffff; *eff_addr = addr1 + addr2 + displacement; /* * The first operand register could indicate to use of either SS or DS * registers to obtain the segment selector. The second operand * register can only indicate the use of DS. Thus, the first operand * will be used to obtain the segment selector. */ *regoff = addr_offset1; return 0; } /** * get_eff_addr_sib() - Obtain referenced effective address via SIB * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @base_offset: Obtained operand offset, in pt_regs, associated with segment * @eff_addr: Obtained effective address * * Obtain the effective address referenced by the SIB byte of @insn. After * identifying the registers involved in the indexed, register-indirect memory * reference, its value is obtained from the operands in @regs. The computed * address is stored @eff_addr. Also, the register operand that indicates the * associated segment is stored in @base_offset; this parameter can later be * used to determine such segment. * * Returns: * * 0 on success. @eff_addr will have the referenced effective address. * @base_offset will have a register, as an offset from the base of pt_regs, * that can be used to resolve the associated segment. * * Negative value on error. */ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs, int *base_offset, long *eff_addr) { long base, indx; int indx_offset; int ret; if (insn->addr_bytes != 8 && insn->addr_bytes != 4) return -EINVAL; ret = insn_get_modrm(insn); if (ret) return ret; if (!insn->modrm.nbytes) return -EINVAL; if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; ret = insn_get_sib(insn); if (ret) return ret; if (!insn->sib.nbytes) return -EINVAL; *base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE); indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX); /* * Negative values in the base and index offset means an error when * decoding the SIB byte. Except -EDOM, which means that the registers * should not be used in the address computation. */ if (*base_offset == -EDOM) base = 0; else if (*base_offset < 0) return -EINVAL; else base = regs_get_register(regs, *base_offset); if (indx_offset == -EDOM) indx = 0; else if (indx_offset < 0) return -EINVAL; else indx = regs_get_register(regs, indx_offset); if (insn->addr_bytes == 4) { int addr32, base32, idx32; base32 = base & 0xffffffff; idx32 = indx & 0xffffffff; addr32 = base32 + idx32 * (1 << X86_SIB_SCALE(insn->sib.value)); addr32 += insn->displacement.value; *eff_addr = addr32 & 0xffffffff; } else { *eff_addr = base + indx * (1 << X86_SIB_SCALE(insn->sib.value)); *eff_addr += insn->displacement.value; } return 0; } /** * get_addr_ref_16() - Obtain the 16-bit address referred by instruction * @insn: Instruction containing ModRM byte and displacement * @regs: Register values as seen when entering kernel mode * * This function is to be used with 16-bit address encodings. Obtain the memory * address referred by the instruction's ModRM and displacement bytes. Also, the * segment used as base is determined by either any segment override prefixes in * @insn or the default segment of the registers involved in the address * computation. In protected mode, segment limits are enforced. * * Returns: * * Linear address referenced by the instruction operands on success. * * -1L on error. */ static void __user *get_addr_ref_16(struct insn *insn, struct pt_regs *regs) { unsigned long linear_addr = -1L, seg_base, seg_limit; int ret, regoff; short eff_addr; long tmp; if (insn_get_displacement(insn)) goto out; if (insn->addr_bytes != 2) goto out; if (X86_MODRM_MOD(insn->modrm.value) == 3) { ret = get_eff_addr_reg(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } else { ret = get_eff_addr_modrm_16(insn, regs, &regoff, &eff_addr); if (ret) goto out; } ret = get_seg_base_limit(insn, regs, regoff, &seg_base, &seg_limit); if (ret) goto out; /* * Before computing the linear address, make sure the effective address * is within the limits of the segment. In virtual-8086 mode, segment * limits are not enforced. In such a case, the segment limit is -1L to * reflect this fact. */ if ((unsigned long)(eff_addr & 0xffff) > seg_limit) goto out; linear_addr = (unsigned long)(eff_addr & 0xffff) + seg_base; /* Limit linear address to 20 bits */ if (v8086_mode(regs)) linear_addr &= 0xfffff; out: return (void __user *)linear_addr; } /** * get_addr_ref_32() - Obtain a 32-bit linear address * @insn: Instruction with ModRM, SIB bytes and displacement * @regs: Register values as seen when entering kernel mode * * This function is to be used with 32-bit address encodings to obtain the * linear memory address referred by the instruction's ModRM, SIB, * displacement bytes and segment base address, as applicable. If in protected * mode, segment limits are enforced. * * Returns: * * Linear address referenced by instruction and registers on success. * * -1L on error. */ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs) { unsigned long linear_addr = -1L, seg_base, seg_limit; int eff_addr, regoff; long tmp; int ret; if (insn->addr_bytes != 4) goto out; if (X86_MODRM_MOD(insn->modrm.value) == 3) { ret = get_eff_addr_reg(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } else { if (insn->sib.nbytes) { ret = get_eff_addr_sib(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } else { ret = get_eff_addr_modrm(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } } ret = get_seg_base_limit(insn, regs, regoff, &seg_base, &seg_limit); if (ret) goto out; /* * In protected mode, before computing the linear address, make sure * the effective address is within the limits of the segment. * 32-bit addresses can be used in long and virtual-8086 modes if an * address override prefix is used. In such cases, segment limits are * not enforced. When in virtual-8086 mode, the segment limit is -1L * to reflect this situation. * * After computed, the effective address is treated as an unsigned * quantity. */ if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) goto out; /* * Even though 32-bit address encodings are allowed in virtual-8086 * mode, the address range is still limited to [0x-0xffff]. */ if (v8086_mode(regs) && (eff_addr & ~0xffff)) goto out; /* * Data type long could be 64 bits in size. Ensure that our 32-bit * effective address is not sign-extended when computing the linear * address. */ linear_addr = (unsigned long)(eff_addr & 0xffffffff) + seg_base; /* Limit linear address to 20 bits */ if (v8086_mode(regs)) linear_addr &= 0xfffff; out: return (void __user *)linear_addr; } /** * get_addr_ref_64() - Obtain a 64-bit linear address * @insn: Instruction struct with ModRM and SIB bytes and displacement * @regs: Structure with register values as seen when entering kernel mode * * This function is to be used with 64-bit address encodings to obtain the * linear memory address referred by the instruction's ModRM, SIB, * displacement bytes and segment base address, as applicable. * * Returns: * * Linear address referenced by instruction and registers on success. * * -1L on error. */ #ifndef CONFIG_X86_64 static void __user *get_addr_ref_64(struct insn *insn, struct pt_regs *regs) { return (void __user *)-1L; } #else static void __user *get_addr_ref_64(struct insn *insn, struct pt_regs *regs) { unsigned long linear_addr = -1L, seg_base; int regoff, ret; long eff_addr; if (insn->addr_bytes != 8) goto out; if (X86_MODRM_MOD(insn->modrm.value) == 3) { ret = get_eff_addr_reg(insn, regs, &regoff, &eff_addr); if (ret) goto out; } else { if (insn->sib.nbytes) { ret = get_eff_addr_sib(insn, regs, &regoff, &eff_addr); if (ret) goto out; } else { ret = get_eff_addr_modrm(insn, regs, &regoff, &eff_addr); if (ret) goto out; } } ret = get_seg_base_limit(insn, regs, regoff, &seg_base, NULL); if (ret) goto out; linear_addr = (unsigned long)eff_addr + seg_base; out: return (void __user *)linear_addr; } #endif /* CONFIG_X86_64 */ /** * insn_get_addr_ref() - Obtain the linear address referred by instruction * @insn: Instruction structure containing ModRM byte and displacement * @regs: Structure with register values as seen when entering kernel mode * * Obtain the linear address referred by the instruction's ModRM, SIB and * displacement bytes, and segment base, as applicable. In protected mode, * segment limits are enforced. * * Returns: * * Linear address referenced by instruction and registers on success. * * -1L on error. */ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) { if (!insn || !regs) return (void __user *)-1L; if (insn_get_opcode(insn)) return (void __user *)-1L; switch (insn->addr_bytes) { case 2: return get_addr_ref_16(insn, regs); case 4: return get_addr_ref_32(insn, regs); case 8: return get_addr_ref_64(insn, regs); default: return (void __user *)-1L; } } int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip) { unsigned long seg_base = 0; /* * If not in user-space long mode, a custom code segment could be in * use. This is true in protected mode (if the process defined a local * descriptor table), or virtual-8086 mode. In most of the cases * seg_base will be zero as in USER_CS. */ if (!user_64bit_mode(regs)) { seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); if (seg_base == -1L) return -EINVAL; } *ip = seg_base + regs->ip; return 0; } /** * insn_fetch_from_user() - Copy instruction bytes from user-space memory * @regs: Structure with register values as seen when entering kernel mode * @buf: Array to store the fetched instruction * * Gets the linear address of the instruction and copies the instruction bytes * to the buf. * * Returns: * * - number of instruction bytes copied. * - 0 if nothing was copied. * - -EINVAL if the linear address of the instruction could not be calculated */ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; if (insn_get_effective_ip(regs, &ip)) return -EINVAL; not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); return MAX_INSN_SIZE - not_copied; } /** * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory * while in atomic code * @regs: Structure with register values as seen when entering kernel mode * @buf: Array to store the fetched instruction * * Gets the linear address of the instruction and copies the instruction bytes * to the buf. This function must be used in atomic context. * * Returns: * * - number of instruction bytes copied. * - 0 if nothing was copied. * - -EINVAL if the linear address of the instruction could not be calculated. */ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; if (insn_get_effective_ip(regs, &ip)) return -EINVAL; not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); return MAX_INSN_SIZE - not_copied; } /** * insn_decode_from_regs() - Decode an instruction * @insn: Structure to store decoded instruction * @regs: Structure with register values as seen when entering kernel mode * @buf: Buffer containing the instruction bytes * @buf_size: Number of instruction bytes available in buf * * Decodes the instruction provided in buf and stores the decoding results in * insn. Also determines the correct address and operand sizes. * * Returns: * * True if instruction was decoded, False otherwise. */ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size) { int seg_defs; insn_init(insn, buf, buf_size, user_64bit_mode(regs)); /* * Override the default operand and address sizes with what is specified * in the code segment descriptor. The instruction decoder only sets * the address size it to either 4 or 8 address bytes and does nothing * for the operand bytes. This OK for most of the cases, but we could * have special cases where, for instance, a 16-bit code segment * descriptor is used. * If there is an address override prefix, the instruction decoder * correctly updates these values, even for 16-bit defaults. */ seg_defs = insn_get_code_seg_params(regs); if (seg_defs == -EINVAL) return false; insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs); insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs); if (insn_get_length(insn)) return false; if (buf_size < insn->length) return false; return true; } /** * insn_decode_mmio() - Decode a MMIO instruction * @insn: Structure to store decoded instruction * @bytes: Returns size of memory operand * * Decodes instruction that used for Memory-mapped I/O. * * Returns: * * Type of the instruction. Size of the memory operand is stored in * @bytes. If decode failed, INSN_MMIO_DECODE_FAILED returned. */ enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes) { enum insn_mmio_type type = INSN_MMIO_DECODE_FAILED; *bytes = 0; if (insn_get_opcode(insn)) return INSN_MMIO_DECODE_FAILED; switch (insn->opcode.bytes[0]) { case 0x88: /* MOV m8,r8 */ *bytes = 1; fallthrough; case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_WRITE; break; case 0xc6: /* MOV m8, imm8 */ *bytes = 1; fallthrough; case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_WRITE_IMM; break; case 0x8a: /* MOV r8, m8 */ *bytes = 1; fallthrough; case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_READ; break; case 0xa4: /* MOVS m8, m8 */ *bytes = 1; fallthrough; case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_MOVS; break; case 0x0f: /* Two-byte instruction */ switch (insn->opcode.bytes[1]) { case 0xb6: /* MOVZX r16/r32/r64, m8 */ *bytes = 1; fallthrough; case 0xb7: /* MOVZX r32/r64, m16 */ if (!*bytes) *bytes = 2; type = INSN_MMIO_READ_ZERO_EXTEND; break; case 0xbe: /* MOVSX r16/r32/r64, m8 */ *bytes = 1; fallthrough; case 0xbf: /* MOVSX r32/r64, m16 */ if (!*bytes) *bytes = 2; type = INSN_MMIO_READ_SIGN_EXTEND; break; } break; } return type; }
5 1 3 5 5 1 1 16 2 1 2 3 3 12 4 5 1 4 6 2 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 /* * Copyright (c) 2010-2011 Atheros Communications Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "htc.h" static int htc_issue_send(struct htc_target *target, struct sk_buff* skb, u16 len, u8 flags, u8 epid) { struct htc_frame_hdr *hdr; struct htc_endpoint *endpoint = &target->endpoint[epid]; int status; hdr = skb_push(skb, sizeof(struct htc_frame_hdr)); hdr->endpoint_id = epid; hdr->flags = flags; hdr->payload_len = cpu_to_be16(len); memset(hdr->control, 0, sizeof(hdr->control)); status = target->hif->send(target->hif_dev, endpoint->ul_pipeid, skb); return status; } static struct htc_endpoint *get_next_avail_ep(struct htc_endpoint *endpoint) { enum htc_endpoint_id avail_epid; for (avail_epid = (ENDPOINT_MAX - 1); avail_epid > ENDPOINT0; avail_epid--) if (endpoint[avail_epid].service_id == 0) return &endpoint[avail_epid]; return NULL; } static u8 service_to_ulpipe(u16 service_id) { switch (service_id) { case WMI_CONTROL_SVC: return 4; case WMI_BEACON_SVC: case WMI_CAB_SVC: case WMI_UAPSD_SVC: case WMI_MGMT_SVC: case WMI_DATA_VO_SVC: case WMI_DATA_VI_SVC: case WMI_DATA_BE_SVC: case WMI_DATA_BK_SVC: return 1; default: return 0; } } static u8 service_to_dlpipe(u16 service_id) { switch (service_id) { case WMI_CONTROL_SVC: return 3; case WMI_BEACON_SVC: case WMI_CAB_SVC: case WMI_UAPSD_SVC: case WMI_MGMT_SVC: case WMI_DATA_VO_SVC: case WMI_DATA_VI_SVC: case WMI_DATA_BE_SVC: case WMI_DATA_BK_SVC: return 2; default: return 0; } } static void htc_process_target_rdy(struct htc_target *target, void *buf) { struct htc_endpoint *endpoint; struct htc_ready_msg *htc_ready_msg = buf; target->credit_size = be16_to_cpu(htc_ready_msg->credit_size); endpoint = &target->endpoint[ENDPOINT0]; endpoint->service_id = HTC_CTRL_RSVD_SVC; endpoint->max_msglen = HTC_MAX_CONTROL_MESSAGE_LENGTH; atomic_inc(&target->tgt_ready); complete(&target->target_wait); } static void htc_process_conn_rsp(struct htc_target *target, struct htc_frame_hdr *htc_hdr) { struct htc_conn_svc_rspmsg *svc_rspmsg; struct htc_endpoint *endpoint, *tmp_endpoint = NULL; u16 service_id; u16 max_msglen; enum htc_endpoint_id epid, tepid; svc_rspmsg = (struct htc_conn_svc_rspmsg *) ((void *) htc_hdr + sizeof(struct htc_frame_hdr)); if (svc_rspmsg->status == HTC_SERVICE_SUCCESS) { epid = svc_rspmsg->endpoint_id; /* Check that the received epid for the endpoint to attach * a new service is valid. ENDPOINT0 can't be used here as it * is already reserved for HTC_CTRL_RSVD_SVC service and thus * should not be modified. */ if (epid <= ENDPOINT0 || epid >= ENDPOINT_MAX) return; service_id = be16_to_cpu(svc_rspmsg->service_id); max_msglen = be16_to_cpu(svc_rspmsg->max_msg_len); endpoint = &target->endpoint[epid]; for (tepid = (ENDPOINT_MAX - 1); tepid > ENDPOINT0; tepid--) { tmp_endpoint = &target->endpoint[tepid]; if (tmp_endpoint->service_id == service_id) { tmp_endpoint->service_id = 0; break; } } if (tepid == ENDPOINT0) return; endpoint->service_id = service_id; endpoint->max_txqdepth = tmp_endpoint->max_txqdepth; endpoint->ep_callbacks = tmp_endpoint->ep_callbacks; endpoint->ul_pipeid = tmp_endpoint->ul_pipeid; endpoint->dl_pipeid = tmp_endpoint->dl_pipeid; endpoint->max_msglen = max_msglen; target->conn_rsp_epid = epid; complete(&target->cmd_wait); } else { target->conn_rsp_epid = ENDPOINT_UNUSED; } } static int htc_config_pipe_credits(struct htc_target *target) { struct sk_buff *skb; struct htc_config_pipe_msg *cp_msg; int ret; unsigned long time_left; skb = alloc_skb(50 + sizeof(struct htc_frame_hdr), GFP_ATOMIC); if (!skb) { dev_err(target->dev, "failed to allocate send buffer\n"); return -ENOMEM; } skb_reserve(skb, sizeof(struct htc_frame_hdr)); cp_msg = skb_put(skb, sizeof(struct htc_config_pipe_msg)); cp_msg->message_id = cpu_to_be16(HTC_MSG_CONFIG_PIPE_ID); cp_msg->pipe_id = USB_WLAN_TX_PIPE; cp_msg->credits = target->credits; target->htc_flags |= HTC_OP_CONFIG_PIPE_CREDITS; ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; time_left = wait_for_completion_timeout(&target->cmd_wait, HZ); if (!time_left) { dev_err(target->dev, "HTC credit config timeout\n"); return -ETIMEDOUT; } return 0; err: kfree_skb(skb); return -EINVAL; } static int htc_setup_complete(struct htc_target *target) { struct sk_buff *skb; struct htc_comp_msg *comp_msg; int ret = 0; unsigned long time_left; skb = alloc_skb(50 + sizeof(struct htc_frame_hdr), GFP_ATOMIC); if (!skb) { dev_err(target->dev, "failed to allocate send buffer\n"); return -ENOMEM; } skb_reserve(skb, sizeof(struct htc_frame_hdr)); comp_msg = skb_put(skb, sizeof(struct htc_comp_msg)); comp_msg->msg_id = cpu_to_be16(HTC_MSG_SETUP_COMPLETE_ID); target->htc_flags |= HTC_OP_START_WAIT; ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; time_left = wait_for_completion_timeout(&target->cmd_wait, HZ); if (!time_left) { dev_err(target->dev, "HTC start timeout\n"); return -ETIMEDOUT; } return 0; err: kfree_skb(skb); return -EINVAL; } /* HTC APIs */ int htc_init(struct htc_target *target) { int ret; ret = htc_config_pipe_credits(target); if (ret) return ret; return htc_setup_complete(target); } int htc_connect_service(struct htc_target *target, struct htc_service_connreq *service_connreq, enum htc_endpoint_id *conn_rsp_epid) { struct sk_buff *skb; struct htc_endpoint *endpoint; struct htc_conn_svc_msg *conn_msg; int ret; unsigned long time_left; /* Find an available endpoint */ endpoint = get_next_avail_ep(target->endpoint); if (!endpoint) { dev_err(target->dev, "Endpoint is not available for service %d\n", service_connreq->service_id); return -EINVAL; } endpoint->service_id = service_connreq->service_id; endpoint->max_txqdepth = service_connreq->max_send_qdepth; endpoint->ul_pipeid = service_to_ulpipe(service_connreq->service_id); endpoint->dl_pipeid = service_to_dlpipe(service_connreq->service_id); endpoint->ep_callbacks = service_connreq->ep_callbacks; skb = alloc_skb(sizeof(struct htc_conn_svc_msg) + sizeof(struct htc_frame_hdr), GFP_ATOMIC); if (!skb) { dev_err(target->dev, "Failed to allocate buf to send" "service connect req\n"); return -ENOMEM; } skb_reserve(skb, sizeof(struct htc_frame_hdr)); conn_msg = skb_put(skb, sizeof(struct htc_conn_svc_msg)); conn_msg->service_id = cpu_to_be16(service_connreq->service_id); conn_msg->msg_id = cpu_to_be16(HTC_MSG_CONNECT_SERVICE_ID); conn_msg->con_flags = cpu_to_be16(service_connreq->con_flags); conn_msg->dl_pipeid = endpoint->dl_pipeid; conn_msg->ul_pipeid = endpoint->ul_pipeid; /* To prevent infoleak */ conn_msg->svc_meta_len = 0; conn_msg->pad = 0; ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; time_left = wait_for_completion_timeout(&target->cmd_wait, HZ); if (!time_left) { dev_err(target->dev, "Service connection timeout for: %d\n", service_connreq->service_id); return -ETIMEDOUT; } if (target->conn_rsp_epid < 0 || target->conn_rsp_epid >= ENDPOINT_MAX) return -EINVAL; *conn_rsp_epid = target->conn_rsp_epid; return 0; err: kfree_skb(skb); return ret; } int htc_send(struct htc_target *target, struct sk_buff *skb) { struct ath9k_htc_tx_ctl *tx_ctl; tx_ctl = HTC_SKB_CB(skb); return htc_issue_send(target, skb, skb->len, 0, tx_ctl->epid); } int htc_send_epid(struct htc_target *target, struct sk_buff *skb, enum htc_endpoint_id epid) { return htc_issue_send(target, skb, skb->len, 0, epid); } void htc_stop(struct htc_target *target) { target->hif->stop(target->hif_dev); } void htc_start(struct htc_target *target) { target->hif->start(target->hif_dev); } void htc_sta_drain(struct htc_target *target, u8 idx) { target->hif->sta_drain(target->hif_dev, idx); } void ath9k_htc_txcompletion_cb(struct htc_target *htc_handle, struct sk_buff *skb, bool txok) { struct htc_endpoint *endpoint; struct htc_frame_hdr *htc_hdr = NULL; if (htc_handle->htc_flags & HTC_OP_CONFIG_PIPE_CREDITS) { complete(&htc_handle->cmd_wait); htc_handle->htc_flags &= ~HTC_OP_CONFIG_PIPE_CREDITS; goto ret; } if (htc_handle->htc_flags & HTC_OP_START_WAIT) { complete(&htc_handle->cmd_wait); htc_handle->htc_flags &= ~HTC_OP_START_WAIT; goto ret; } if (skb) { htc_hdr = (struct htc_frame_hdr *) skb->data; if (htc_hdr->endpoint_id >= ARRAY_SIZE(htc_handle->endpoint)) goto ret; endpoint = &htc_handle->endpoint[htc_hdr->endpoint_id]; skb_pull(skb, sizeof(struct htc_frame_hdr)); if (endpoint->ep_callbacks.tx) { endpoint->ep_callbacks.tx(endpoint->ep_callbacks.priv, skb, htc_hdr->endpoint_id, txok); } else { kfree_skb(skb); } } return; ret: kfree_skb(skb); } static void ath9k_htc_fw_panic_report(struct htc_target *htc_handle, struct sk_buff *skb, u32 len) { uint32_t *pattern = (uint32_t *)skb->data; if (*pattern == 0x33221199 && len >= sizeof(struct htc_panic_bad_vaddr)) { struct htc_panic_bad_vaddr *htc_panic; htc_panic = (struct htc_panic_bad_vaddr *) skb->data; dev_err(htc_handle->dev, "ath: firmware panic! " "exccause: 0x%08x; pc: 0x%08x; badvaddr: 0x%08x.\n", htc_panic->exccause, htc_panic->pc, htc_panic->badvaddr); return; } if (*pattern == 0x33221299) { struct htc_panic_bad_epid *htc_panic; htc_panic = (struct htc_panic_bad_epid *) skb->data; dev_err(htc_handle->dev, "ath: firmware panic! " "bad epid: 0x%08x\n", htc_panic->epid); return; } dev_err(htc_handle->dev, "ath: unknown panic pattern!\n"); } /* * HTC Messages are handled directly here and the obtained SKB * is freed. * * Service messages (Data, WMI) are passed to the corresponding * endpoint RX handlers, which have to free the SKB. */ void ath9k_htc_rx_msg(struct htc_target *htc_handle, struct sk_buff *skb, u32 len, u8 pipe_id) { struct htc_frame_hdr *htc_hdr; enum htc_endpoint_id epid; struct htc_endpoint *endpoint; __be16 *msg_id; if (!htc_handle || !skb) return; /* A valid message requires len >= 8. * * sizeof(struct htc_frame_hdr) == 8 * sizeof(struct htc_ready_msg) == 8 * sizeof(struct htc_panic_bad_vaddr) == 16 * sizeof(struct htc_panic_bad_epid) == 8 */ if (unlikely(len < sizeof(struct htc_frame_hdr))) goto invalid; htc_hdr = (struct htc_frame_hdr *) skb->data; epid = htc_hdr->endpoint_id; if (epid == 0x99) { ath9k_htc_fw_panic_report(htc_handle, skb, len); kfree_skb(skb); return; } if (epid < 0 || epid >= ENDPOINT_MAX) { invalid: if (pipe_id != USB_REG_IN_PIPE) dev_kfree_skb_any(skb); else kfree_skb(skb); return; } if (epid == ENDPOINT0) { /* Handle trailer */ if (htc_hdr->flags & HTC_FLAGS_RECV_TRAILER) { if (be32_to_cpu(*(__be32 *) skb->data) == 0x00C60000) { /* Move past the Watchdog pattern */ htc_hdr = (struct htc_frame_hdr *)(skb->data + 4); len -= 4; } } /* Get the message ID */ if (unlikely(len < sizeof(struct htc_frame_hdr) + sizeof(__be16))) goto invalid; msg_id = (__be16 *) ((void *) htc_hdr + sizeof(struct htc_frame_hdr)); /* Now process HTC messages */ switch (be16_to_cpu(*msg_id)) { case HTC_MSG_READY_ID: if (unlikely(len < sizeof(struct htc_ready_msg))) goto invalid; htc_process_target_rdy(htc_handle, htc_hdr); break; case HTC_MSG_CONNECT_SERVICE_RESPONSE_ID: if (unlikely(len < sizeof(struct htc_frame_hdr) + sizeof(struct htc_conn_svc_rspmsg))) goto invalid; htc_process_conn_rsp(htc_handle, htc_hdr); break; default: break; } kfree_skb(skb); } else { if (htc_hdr->flags & HTC_FLAGS_RECV_TRAILER) skb_trim(skb, len - htc_hdr->control[0]); skb_pull(skb, sizeof(struct htc_frame_hdr)); endpoint = &htc_handle->endpoint[epid]; if (endpoint->ep_callbacks.rx) endpoint->ep_callbacks.rx(endpoint->ep_callbacks.priv, skb, epid); else goto invalid; } } struct htc_target *ath9k_htc_hw_alloc(void *hif_handle, struct ath9k_htc_hif *hif, struct device *dev) { struct htc_endpoint *endpoint; struct htc_target *target; target = kzalloc(sizeof(struct htc_target), GFP_KERNEL); if (!target) return NULL; init_completion(&target->target_wait); init_completion(&target->cmd_wait); target->hif = hif; target->hif_dev = hif_handle; target->dev = dev; /* Assign control endpoint pipe IDs */ endpoint = &target->endpoint[ENDPOINT0]; endpoint->ul_pipeid = hif->control_ul_pipe; endpoint->dl_pipeid = hif->control_dl_pipe; atomic_set(&target->tgt_ready, 0); return target; } void ath9k_htc_hw_free(struct htc_target *htc) { kfree(htc); } int ath9k_htc_hw_init(struct htc_target *target, struct device *dev, u16 devid, char *product, u32 drv_info) { if (ath9k_htc_probe_device(target, dev, devid, product, drv_info)) { pr_err("Failed to initialize the device\n"); return -ENODEV; } return 0; } void ath9k_htc_hw_deinit(struct htc_target *target, bool hot_unplug) { if (target) ath9k_htc_disconnect_device(target, hot_unplug); }
96 4 43 12 16 36 10 1 1 15 2 27 1 18 1 12 3 35 33 30 12 6 27 21 3 2 2 1 2 1 1 2 2 2 3 1 3 1 3 5 1 1 2 7 3 2 1 1 1 1 1 1 3 2 1 11 4 4 3 6 4 15 14 1 31 30 1 2 21 1 20 75 70 1 1 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_device.h" #include "seq_oss_synth.h" #include "seq_oss_midi.h" #include "seq_oss_event.h" #include "seq_oss_timer.h" #include <sound/seq_oss_legacy.h> #include "seq_oss_readq.h" #include "seq_oss_writeq.h" #include <linux/nospec.h> /* * prototypes */ static int extended_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev); static int chn_voice_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int chn_common_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int timing_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int local_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int old_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev); static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev); static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev); static int set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev); static int set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev); static int set_echo_event(struct seq_oss_devinfo *dp, union evrec *rec, struct snd_seq_event *ev); /* * convert an OSS event to ALSA event * return 0 : enqueued * non-zero : invalid - ignored */ int snd_seq_oss_process_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->s.code) { case SEQ_EXTENDED: return extended_event(dp, q, ev); case EV_CHN_VOICE: return chn_voice_event(dp, q, ev); case EV_CHN_COMMON: return chn_common_event(dp, q, ev); case EV_TIMING: return timing_event(dp, q, ev); case EV_SEQ_LOCAL: return local_event(dp, q, ev); case EV_SYSEX: return snd_seq_oss_synth_sysex(dp, q->x.dev, q->x.buf, ev); case SEQ_MIDIPUTC: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; /* put a midi byte */ if (! is_write_mode(dp->file_mode)) break; if (snd_seq_oss_midi_open(dp, q->s.dev, SNDRV_SEQ_OSS_FILE_WRITE)) break; if (snd_seq_oss_midi_filemode(dp, q->s.dev) & SNDRV_SEQ_OSS_FILE_WRITE) return snd_seq_oss_midi_putc(dp, q->s.dev, q->s.parm1, ev); break; case SEQ_ECHO: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return set_echo_event(dp, q, ev); case SEQ_PRIVATE: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return snd_seq_oss_synth_raw_event(dp, q->c[1], q->c, ev); default: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return old_event(dp, q, ev); } return -EINVAL; } /* old type events: mode1 only */ static int old_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->s.code) { case SEQ_NOTEOFF: return note_off_event(dp, 0, q->n.chn, q->n.note, q->n.vel, ev); case SEQ_NOTEON: return note_on_event(dp, 0, q->n.chn, q->n.note, q->n.vel, ev); case SEQ_WAIT: /* skip */ break; case SEQ_PGMCHANGE: return set_control_event(dp, 0, SNDRV_SEQ_EVENT_PGMCHANGE, q->n.chn, 0, q->n.note, ev); case SEQ_SYNCTIMER: return snd_seq_oss_timer_reset(dp->timer); } return -EINVAL; } /* 8bytes extended event: mode1 only */ static int extended_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { int val; switch (q->e.cmd) { case SEQ_NOTEOFF: return note_off_event(dp, q->e.dev, q->e.chn, q->e.p1, q->e.p2, ev); case SEQ_NOTEON: return note_on_event(dp, q->e.dev, q->e.chn, q->e.p1, q->e.p2, ev); case SEQ_PGMCHANGE: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_PGMCHANGE, q->e.chn, 0, q->e.p1, ev); case SEQ_AFTERTOUCH: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CHANPRESS, q->e.chn, 0, q->e.p1, ev); case SEQ_BALANCE: /* convert -128:127 to 0:127 */ val = (char)q->e.p1; val = (val + 128) / 2; return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CONTROLLER, q->e.chn, CTL_PAN, val, ev); case SEQ_CONTROLLER: val = ((short)q->e.p3 << 8) | (short)q->e.p2; switch (q->e.p1) { case CTRL_PITCH_BENDER: /* SEQ1 V2 control */ /* -0x2000:0x1fff */ return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_PITCHBEND, q->e.chn, 0, val, ev); case CTRL_PITCH_BENDER_RANGE: /* conversion: 100/semitone -> 128/semitone */ return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_REGPARAM, q->e.chn, 0, val*128/100, ev); default: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CONTROL14, q->e.chn, q->e.p1, val, ev); } case SEQ_VOLMODE: return snd_seq_oss_synth_raw_event(dp, q->e.dev, q->c, ev); } return -EINVAL; } /* channel voice events: mode1 and 2 */ static int chn_voice_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { if (q->v.chn >= 32) return -EINVAL; switch (q->v.cmd) { case MIDI_NOTEON: return note_on_event(dp, q->v.dev, q->v.chn, q->v.note, q->v.parm, ev); case MIDI_NOTEOFF: return note_off_event(dp, q->v.dev, q->v.chn, q->v.note, q->v.parm, ev); case MIDI_KEY_PRESSURE: return set_note_event(dp, q->v.dev, SNDRV_SEQ_EVENT_KEYPRESS, q->v.chn, q->v.note, q->v.parm, ev); } return -EINVAL; } /* channel common events: mode1 and 2 */ static int chn_common_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { if (q->l.chn >= 32) return -EINVAL; switch (q->l.cmd) { case MIDI_PGM_CHANGE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_PGMCHANGE, q->l.chn, 0, q->l.p1, ev); case MIDI_CTL_CHANGE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_CONTROLLER, q->l.chn, q->l.p1, q->l.val, ev); case MIDI_PITCH_BEND: /* conversion: 0:0x3fff -> -0x2000:0x1fff */ return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_PITCHBEND, q->l.chn, 0, q->l.val - 8192, ev); case MIDI_CHN_PRESSURE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_CHANPRESS, q->l.chn, 0, q->l.val, ev); } return -EINVAL; } /* timer events: mode1 and mode2 */ static int timing_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->t.cmd) { case TMR_ECHO: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return set_echo_event(dp, q, ev); else { union evrec tmp; memset(&tmp, 0, sizeof(tmp)); /* XXX: only for little-endian! */ tmp.echo = (q->t.time << 8) | SEQ_ECHO; return set_echo_event(dp, &tmp, ev); } case TMR_STOP: if (dp->seq_mode) return snd_seq_oss_timer_stop(dp->timer); return 0; case TMR_CONTINUE: if (dp->seq_mode) return snd_seq_oss_timer_continue(dp->timer); return 0; case TMR_TEMPO: if (dp->seq_mode) return snd_seq_oss_timer_tempo(dp->timer, q->t.time); return 0; } return -EINVAL; } /* local events: mode1 and 2 */ static int local_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { return -EINVAL; } /* * process note-on event for OSS synth * three different modes are available: * - SNDRV_SEQ_OSS_PROCESS_EVENTS (for one-voice per channel mode) * Accept note 255 as volume change. * - SNDRV_SEQ_OSS_PASS_EVENTS * Pass all events to lowlevel driver anyway * - SNDRV_SEQ_OSS_PROCESS_KEYPRESS (mostly for Emu8000) * Use key-pressure if note >= 128 */ static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { struct seq_oss_synthinfo *info; info = snd_seq_oss_synth_info(dp, dev); if (!info) return -ENXIO; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { /* pass directly */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } ch = array_index_nospec(ch, info->nr_voices); if (note == 255 && info->ch[ch].note >= 0) { /* volume control */ int type; if (info->ch[ch].vel) /* sample already started -- volume change */ type = SNDRV_SEQ_EVENT_KEYPRESS; else /* sample not started -- start now */ type = SNDRV_SEQ_EVENT_NOTEON; info->ch[ch].vel = vel; return set_note_event(dp, dev, type, ch, info->ch[ch].note, vel, ev); } else if (note >= 128) return -EINVAL; /* invalid */ if (note != info->ch[ch].note && info->ch[ch].note >= 0) /* note changed - note off at beginning */ set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, info->ch[ch].note, 0, ev); /* set current status */ info->ch[ch].note = note; info->ch[ch].vel = vel; if (vel) /* non-zero velocity - start the note now */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); return -EINVAL; case SNDRV_SEQ_OSS_PASS_EVENTS: /* pass the event anyway */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); case SNDRV_SEQ_OSS_PROCESS_KEYPRESS: if (note >= 128) /* key pressure: shifted by 128 */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_KEYPRESS, ch, note - 128, vel, ev); else /* normal note-on event */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } return -EINVAL; } /* * process note-off event for OSS synth */ static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { struct seq_oss_synthinfo *info; info = snd_seq_oss_synth_info(dp, dev); if (!info) return -ENXIO; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { /* pass directly */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } ch = array_index_nospec(ch, info->nr_voices); if (info->ch[ch].note >= 0) { note = info->ch[ch].note; info->ch[ch].vel = 0; info->ch[ch].note = -1; return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, note, vel, ev); } return -EINVAL; /* invalid */ case SNDRV_SEQ_OSS_PASS_EVENTS: case SNDRV_SEQ_OSS_PROCESS_KEYPRESS: /* pass the event anyway */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, note, vel, ev); } return -EINVAL; } /* * create a note event */ static int set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev) { if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; snd_seq_oss_synth_addr(dp, dev, ev); ev->data.note.channel = ch; ev->data.note.note = note; ev->data.note.velocity = vel; return 0; } /* * create a control event */ static int set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev) { if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; snd_seq_oss_synth_addr(dp, dev, ev); ev->data.control.channel = ch; ev->data.control.param = param; ev->data.control.value = val; return 0; } /* * create an echo event */ static int set_echo_event(struct seq_oss_devinfo *dp, union evrec *rec, struct snd_seq_event *ev) { ev->type = SNDRV_SEQ_EVENT_ECHO; /* echo back to itself */ snd_seq_oss_fill_addr(dp, ev, dp->addr.client, dp->addr.port); memcpy(&ev->data, rec, LONG_EVENT_SIZE); return 0; } /* * event input callback from ALSA sequencer: * the echo event is processed here. */ int snd_seq_oss_event_input(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { struct seq_oss_devinfo *dp = (struct seq_oss_devinfo *)private_data; union evrec *rec; if (ev->type != SNDRV_SEQ_EVENT_ECHO) return snd_seq_oss_midi_input(ev, direct, private_data); if (ev->source.client != dp->cseq) return 0; /* ignored */ rec = (union evrec*)&ev->data; if (rec->s.code == SEQ_SYNCTIMER) { /* sync echo back */ snd_seq_oss_writeq_wakeup(dp->writeq, rec->t.time); } else { /* echo back event */ if (dp->readq == NULL) return 0; snd_seq_oss_readq_put_event(dp->readq, rec); } return 0; }
9 9 4 3 3 3 3 7 7 4 3 7 7 6 7 7 7 9 9 8 9 9 100 100 100 100 100 100 100 134 135 134 9 113 53 58 55 157 1 1 1 158 157 158 155 158 157 156 157 58 99 158 113 44 1 1 1 1 1 158 157 1 157 156 158 157 158 158 157 158 43 1 157 28 157 158 8 9 9 9 9 9 8 9 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 1999 Eric Youngdale * Copyright (C) 2014 Christoph Hellwig * * SCSI queueing library. * Initial versions: Eric Youngdale (eric@andante.org). * Based upon conversations with large numbers * of people at Linux Expo. */ #include <linux/bio.h> #include <linux/bitops.h> #include <linux/blkdev.h> #include <linux/completion.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/init.h> #include <linux/pci.h> #include <linux/delay.h> #include <linux/hardirq.h> #include <linux/scatterlist.h> #include <linux/blk-mq.h> #include <linux/blk-integrity.h> #include <linux/ratelimit.h> #include <linux/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport.h> /* scsi_init_limits() */ #include <scsi/scsi_dh.h> #include <trace/events/scsi.h> #include "scsi_debugfs.h" #include "scsi_priv.h" #include "scsi_logging.h" /* * Size of integrity metadata is usually small, 1 inline sg should * cover normal cases. */ #ifdef CONFIG_ARCH_NO_SG_CHAIN #define SCSI_INLINE_PROT_SG_CNT 0 #define SCSI_INLINE_SG_CNT 0 #else #define SCSI_INLINE_PROT_SG_CNT 1 #define SCSI_INLINE_SG_CNT 2 #endif static struct kmem_cache *scsi_sense_cache; static DEFINE_MUTEX(scsi_sense_cache_mutex); static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd); int scsi_init_sense_cache(struct Scsi_Host *shost) { int ret = 0; mutex_lock(&scsi_sense_cache_mutex); if (!scsi_sense_cache) { scsi_sense_cache = kmem_cache_create_usercopy("scsi_sense_cache", SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, 0, SCSI_SENSE_BUFFERSIZE, NULL); if (!scsi_sense_cache) ret = -ENOMEM; } mutex_unlock(&scsi_sense_cache_mutex); return ret; } static void scsi_set_blocked(struct scsi_cmnd *cmd, int reason) { struct Scsi_Host *host = cmd->device->host; struct scsi_device *device = cmd->device; struct scsi_target *starget = scsi_target(device); /* * Set the appropriate busy bit for the device/host. * * If the host/device isn't busy, assume that something actually * completed, and that we should be able to queue a command now. * * Note that the prior mid-layer assumption that any host could * always queue at least one command is now broken. The mid-layer * will implement a user specifiable stall (see * scsi_host.max_host_blocked and scsi_device.max_device_blocked) * if a command is requeued with no other commands outstanding * either for the device or for the host. */ switch (reason) { case SCSI_MLQUEUE_HOST_BUSY: atomic_set(&host->host_blocked, host->max_host_blocked); break; case SCSI_MLQUEUE_DEVICE_BUSY: case SCSI_MLQUEUE_EH_RETRY: atomic_set(&device->device_blocked, device->max_device_blocked); break; case SCSI_MLQUEUE_TARGET_BUSY: atomic_set(&starget->target_blocked, starget->max_target_blocked); break; } } static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs) { struct request *rq = scsi_cmd_to_rq(cmd); if (rq->rq_flags & RQF_DONTPREP) { rq->rq_flags &= ~RQF_DONTPREP; scsi_mq_uninit_cmd(cmd); } else { WARN_ON_ONCE(true); } blk_mq_requeue_request(rq, false); if (!scsi_host_in_recovery(cmd->device->host)) blk_mq_delay_kick_requeue_list(rq->q, msecs); } /** * __scsi_queue_insert - private queue insertion * @cmd: The SCSI command being requeued * @reason: The reason for the requeue * @unbusy: Whether the queue should be unbusied * * This is a private queue insertion. The public interface * scsi_queue_insert() always assumes the queue should be unbusied * because it's always called before the completion. This function is * for a requeue after completion, which should only occur in this * file. */ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) { struct scsi_device *device = cmd->device; SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd, "Inserting command %p into mlqueue\n", cmd)); scsi_set_blocked(cmd, reason); /* * Decrement the counters, since these commands are no longer * active on the host/device. */ if (unbusy) scsi_device_unbusy(device, cmd); /* * Requeue this command. It will go before all other commands * that are already in the queue. Schedule requeue work under * lock such that the kblockd_schedule_work() call happens * before blk_mq_destroy_queue() finishes. */ cmd->result = 0; blk_mq_requeue_request(scsi_cmd_to_rq(cmd), !scsi_host_in_recovery(cmd->device->host)); } /** * scsi_queue_insert - Reinsert a command in the queue. * @cmd: command that we are adding to queue. * @reason: why we are inserting command to queue. * * We do this for one of two cases. Either the host is busy and it cannot accept * any more commands for the time being, or the device returned QUEUE_FULL and * can accept no more commands. * * Context: This could be called either from an interrupt context or a normal * process context. */ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) { __scsi_queue_insert(cmd, reason, true); } /** * scsi_failures_reset_retries - reset all failures to zero * @failures: &struct scsi_failures with specific failure modes set */ void scsi_failures_reset_retries(struct scsi_failures *failures) { struct scsi_failure *failure; failures->total_retries = 0; for (failure = failures->failure_definitions; failure->result; failure++) failure->retries = 0; } EXPORT_SYMBOL_GPL(scsi_failures_reset_retries); /** * scsi_check_passthrough - Determine if passthrough scsi_cmnd needs a retry. * @scmd: scsi_cmnd to check. * @failures: scsi_failures struct that lists failures to check for. * * Returns -EAGAIN if the caller should retry else 0. */ static int scsi_check_passthrough(struct scsi_cmnd *scmd, struct scsi_failures *failures) { struct scsi_failure *failure; struct scsi_sense_hdr sshdr; enum sam_status status; if (!scmd->result) return 0; if (!failures) return 0; for (failure = failures->failure_definitions; failure->result; failure++) { if (failure->result == SCMD_FAILURE_RESULT_ANY) goto maybe_retry; if (host_byte(scmd->result) && host_byte(scmd->result) == host_byte(failure->result)) goto maybe_retry; status = status_byte(scmd->result); if (!status) continue; if (failure->result == SCMD_FAILURE_STAT_ANY && !scsi_status_is_good(scmd->result)) goto maybe_retry; if (status != status_byte(failure->result)) continue; if (status_byte(failure->result) != SAM_STAT_CHECK_CONDITION || failure->sense == SCMD_FAILURE_SENSE_ANY) goto maybe_retry; if (!scsi_command_normalize_sense(scmd, &sshdr)) return 0; if (failure->sense != sshdr.sense_key) continue; if (failure->asc == SCMD_FAILURE_ASC_ANY) goto maybe_retry; if (failure->asc != sshdr.asc) continue; if (failure->ascq == SCMD_FAILURE_ASCQ_ANY || failure->ascq == sshdr.ascq) goto maybe_retry; } return 0; maybe_retry: if (failure->allowed) { if (failure->allowed == SCMD_FAILURE_NO_LIMIT || ++failure->retries <= failure->allowed) return -EAGAIN; } else { if (failures->total_allowed == SCMD_FAILURE_NO_LIMIT || ++failures->total_retries <= failures->total_allowed) return -EAGAIN; } return 0; } /** * scsi_execute_cmd - insert request and wait for the result * @sdev: scsi_device * @cmd: scsi command * @opf: block layer request cmd_flags * @buffer: data buffer * @bufflen: len of buffer * @timeout: request timeout in HZ * @ml_retries: number of times SCSI midlayer will retry request * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, blk_opf_t opf, void *buffer, unsigned int bufflen, int timeout, int ml_retries, const struct scsi_exec_args *args) { static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; if (!args) args = &default_args; else if (WARN_ON_ONCE(args->sense && args->sense_len != SCSI_SENSE_BUFFERSIZE)) return -EINVAL; retry: req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); if (bufflen) { ret = blk_rq_map_kern(req, buffer, bufflen, GFP_NOIO); if (ret) goto out; } scmd = blk_mq_rq_to_pdu(req); scmd->cmd_len = COMMAND_SIZE(cmd[0]); memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = ml_retries; scmd->flags |= args->scmd_flags; req->timeout = timeout; req->rq_flags |= RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work */ blk_execute_rq(req, true); if (scsi_check_passthrough(scmd, args->failures) == -EAGAIN) { blk_mq_free_request(req); goto retry; } /* * Some devices (USB mass-storage in particular) may transfer * garbage data together with a residue indicating that the data * is invalid. Prevent the garbage from being misinterpreted * and prevent security leaks by zeroing out the excess data. */ if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); if (args->resid) *args->resid = scmd->resid_len; if (args->sense) memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); if (args->sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, args->sshdr); ret = scmd->result; out: blk_mq_free_request(req); return ret; } EXPORT_SYMBOL(scsi_execute_cmd); /* * Wake up the error handler if necessary. Avoid as follows that the error * handler is not woken up if host in-flight requests number == * shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination * with an RCU read lock in this function to ensure that this function in * its entirety either finishes before scsi_eh_scmd_add() increases the * host_failed counter or that it notices the shost state change made by * scsi_eh_scmd_add(). */ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { unsigned long flags; rcu_read_lock(); __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); if (unlikely(scsi_host_in_recovery(shost))) { unsigned int busy = scsi_host_busy(shost); spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); } void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd) { struct Scsi_Host *shost = sdev->host; struct scsi_target *starget = scsi_target(sdev); scsi_dec_host_busy(shost, cmd); if (starget->can_queue > 0) atomic_dec(&starget->target_busy); sbitmap_put(&sdev->budget_map, cmd->budget_token); cmd->budget_token = -1; } /* * Kick the queue of SCSI device @sdev if @sdev != current_sdev. Called with * interrupts disabled. */ static void scsi_kick_sdev_queue(struct scsi_device *sdev, void *data) { struct scsi_device *current_sdev = data; if (sdev != current_sdev) blk_mq_run_hw_queues(sdev->request_queue, true); } /* * Called for single_lun devices on IO completion. Clear starget_sdev_user, * and call blk_run_queue for all the scsi_devices on the target - * including current_sdev first. * * Called with *no* scsi locks held. */ static void scsi_single_lun_run(struct scsi_device *current_sdev) { struct Scsi_Host *shost = current_sdev->host; struct scsi_target *starget = scsi_target(current_sdev); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); starget->starget_sdev_user = NULL; spin_unlock_irqrestore(shost->host_lock, flags); /* * Call blk_run_queue for all LUNs on the target, starting with * current_sdev. We race with others (to set starget_sdev_user), * but in most cases, we will be first. Ideally, each LU on the * target would get some limited time or requests on the target. */ blk_mq_run_hw_queues(current_sdev->request_queue, shost->queuecommand_may_block); spin_lock_irqsave(shost->host_lock, flags); if (!starget->starget_sdev_user) __starget_for_each_device(starget, current_sdev, scsi_kick_sdev_queue); spin_unlock_irqrestore(shost->host_lock, flags); } static inline bool scsi_device_is_busy(struct scsi_device *sdev) { if (scsi_device_busy(sdev) >= sdev->queue_depth) return true; if (atomic_read(&sdev->device_blocked) > 0) return true; return false; } static inline bool scsi_target_is_busy(struct scsi_target *starget) { if (starget->can_queue > 0) { if (atomic_read(&starget->target_busy) >= starget->can_queue) return true; if (atomic_read(&starget->target_blocked) > 0) return true; } return false; } static inline bool scsi_host_is_busy(struct Scsi_Host *shost) { if (atomic_read(&shost->host_blocked) > 0) return true; if (shost->host_self_blocked) return true; return false; } static void scsi_starved_list_run(struct Scsi_Host *shost) { LIST_HEAD(starved_list); struct scsi_device *sdev; unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); list_splice_init(&shost->starved_list, &starved_list); while (!list_empty(&starved_list)) { struct request_queue *slq; /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn * drops the queue_lock and can add us back to the * starved_list. * * host_lock protects the starved_list and starved_entry. * scsi_request_fn must get the host_lock before checking * or modifying starved_list or starved_entry. */ if (scsi_host_is_busy(shost)) break; sdev = list_entry(starved_list.next, struct scsi_device, starved_entry); list_del_init(&sdev->starved_entry); if (scsi_target_is_busy(scsi_target(sdev))) { list_move_tail(&sdev->starved_entry, &shost->starved_list); continue; } /* * Once we drop the host lock, a racing scsi_remove_device() * call may remove the sdev from the starved list and destroy * it and the queue. Mitigate by taking a reference to the * queue and never touching the sdev again after we drop the * host lock. Note: if __scsi_remove_device() invokes * blk_mq_destroy_queue() before the queue is run from this * function then blk_run_queue() will return immediately since * blk_mq_destroy_queue() marks the queue with QUEUE_FLAG_DYING. */ slq = sdev->request_queue; if (!blk_get_queue(slq)) continue; spin_unlock_irqrestore(shost->host_lock, flags); blk_mq_run_hw_queues(slq, false); blk_put_queue(slq); spin_lock_irqsave(shost->host_lock, flags); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); spin_unlock_irqrestore(shost->host_lock, flags); } /** * scsi_run_queue - Select a proper request queue to serve next. * @q: last request's queue * * The previous command was completely finished, start a new one if possible. */ static void scsi_run_queue(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; if (scsi_target(sdev)->single_lun) scsi_single_lun_run(sdev); if (!list_empty(&sdev->host->starved_list)) scsi_starved_list_run(sdev->host); /* Note: blk_mq_kick_requeue_list() runs the queue asynchronously. */ blk_mq_kick_requeue_list(q); } void scsi_requeue_run_queue(struct work_struct *work) { struct scsi_device *sdev; struct request_queue *q; sdev = container_of(work, struct scsi_device, requeue_work); q = sdev->request_queue; scsi_run_queue(q); } void scsi_run_host_queues(struct Scsi_Host *shost) { struct scsi_device *sdev; shost_for_each_device(sdev, shost) scsi_run_queue(sdev->request_queue); } static void scsi_uninit_cmd(struct scsi_cmnd *cmd) { if (!blk_rq_is_passthrough(scsi_cmd_to_rq(cmd))) { struct scsi_driver *drv = scsi_cmd_to_driver(cmd); if (drv->uninit_command) drv->uninit_command(cmd); } } void scsi_free_sgtables(struct scsi_cmnd *cmd) { if (cmd->sdb.table.nents) sg_free_table_chained(&cmd->sdb.table, SCSI_INLINE_SG_CNT); if (scsi_prot_sg_count(cmd)) sg_free_table_chained(&cmd->prot_sdb->table, SCSI_INLINE_PROT_SG_CNT); } EXPORT_SYMBOL_GPL(scsi_free_sgtables); static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd) { scsi_free_sgtables(cmd); scsi_uninit_cmd(cmd); } static void scsi_run_queue_async(struct scsi_device *sdev) { if (scsi_host_in_recovery(sdev->host)) return; if (scsi_target(sdev)->single_lun || !list_empty(&sdev->host->starved_list)) { kblockd_schedule_work(&sdev->requeue_work); } else { /* * smp_mb() present in sbitmap_queue_clear() or implied in * .end_io is for ordering writing .device_busy in * scsi_device_unbusy() and reading sdev->restarts. */ int old = atomic_read(&sdev->restarts); /* * ->restarts has to be kept as non-zero if new budget * contention occurs. * * No need to run queue when either another re-run * queue wins in updating ->restarts or a new budget * contention occurs. */ if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old) blk_mq_run_hw_queues(sdev->request_queue, true); } } /* Returns false when no more bytes to process, true if there are more */ static bool scsi_end_request(struct request *req, blk_status_t error, unsigned int bytes) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); struct scsi_device *sdev = cmd->device; struct request_queue *q = sdev->request_queue; if (blk_update_request(req, error, bytes)) return true; if (q->limits.features & BLK_FEAT_ADD_RANDOM) add_disk_randomness(req->q->disk); WARN_ON_ONCE(!blk_rq_is_passthrough(req) && !(cmd->flags & SCMD_INITIALIZED)); cmd->flags = 0; /* * Calling rcu_barrier() is not necessary here because the * SCSI error handler guarantees that the function called by * call_rcu() has been called before scsi_end_request() is * called. */ destroy_rcu_head(&cmd->rcu); /* * In the MQ case the command gets freed by __blk_mq_end_request, * so we have to do all cleanup that depends on it earlier. * * We also can't kick the queues from irq context, so we * will have to defer it to a workqueue. */ scsi_mq_uninit_cmd(cmd); /* * queue is still alive, so grab the ref for preventing it * from being cleaned up during running queue. */ percpu_ref_get(&q->q_usage_counter); __blk_mq_end_request(req, error); scsi_run_queue_async(sdev); percpu_ref_put(&q->q_usage_counter); return false; } /** * scsi_result_to_blk_status - translate a SCSI result code into blk_status_t * @result: scsi error code * * Translate a SCSI result code into a blk_status_t value. */ static blk_status_t scsi_result_to_blk_status(int result) { /* * Check the scsi-ml byte first in case we converted a host or status * byte. */ switch (scsi_ml_byte(result)) { case SCSIML_STAT_OK: break; case SCSIML_STAT_RESV_CONFLICT: return BLK_STS_RESV_CONFLICT; case SCSIML_STAT_NOSPC: return BLK_STS_NOSPC; case SCSIML_STAT_MED_ERROR: return BLK_STS_MEDIUM; case SCSIML_STAT_TGT_FAILURE: return BLK_STS_TARGET; case SCSIML_STAT_DL_TIMEOUT: return BLK_STS_DURATION_LIMIT; } switch (host_byte(result)) { case DID_OK: if (scsi_status_is_good(result)) return BLK_STS_OK; return BLK_STS_IOERR; case DID_TRANSPORT_FAILFAST: case DID_TRANSPORT_MARGINAL: return BLK_STS_TRANSPORT; default: return BLK_STS_IOERR; } } /** * scsi_rq_err_bytes - determine number of bytes till the next failure boundary * @rq: request to examine * * Description: * A request could be merge of IOs which require different failure * handling. This function determines the number of bytes which * can be failed from the beginning of the request without * crossing into area which need to be retried further. * * Return: * The number of bytes to fail. */ static unsigned int scsi_rq_err_bytes(const struct request *rq) { blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; unsigned int bytes = 0; struct bio *bio; if (!(rq->rq_flags & RQF_MIXED_MERGE)) return blk_rq_bytes(rq); /* * Currently the only 'mixing' which can happen is between * different fastfail types. We can safely fail portions * which have all the failfast bits that the first one has - * the ones which are at least as eager to fail as the first * one. */ for (bio = rq->bio; bio; bio = bio->bi_next) { if ((bio->bi_opf & ff) != ff) break; bytes += bio->bi_iter.bi_size; } /* this could lead to infinite loop */ BUG_ON(blk_rq_bytes(rq) && !bytes); return bytes; } static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) { struct request *req = scsi_cmd_to_rq(cmd); unsigned long wait_for; if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT) return false; wait_for = (cmd->allowed + 1) * req->timeout; if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n", wait_for/HZ); return true; } return false; } /* * When ALUA transition state is returned, reprep the cmd to * use the ALUA handler's transition timeout. Delay the reprep * 1 sec to avoid aggressive retries of the target in that * state. */ #define ALUA_TRANSITION_REPREP_DELAY 1000 /* Helper for scsi_io_completion() when special action required. */ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) { struct request *req = scsi_cmd_to_rq(cmd); int level = 0; enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP, ACTION_RETRY, ACTION_DELAYED_RETRY} action; struct scsi_sense_hdr sshdr; bool sense_valid; bool sense_current = true; /* false implies "deferred sense" */ blk_status_t blk_stat; sense_valid = scsi_command_normalize_sense(cmd, &sshdr); if (sense_valid) sense_current = !scsi_sense_is_deferred(&sshdr); blk_stat = scsi_result_to_blk_status(result); if (host_byte(result) == DID_RESET) { /* Third party bus reset or reset for error recovery * reasons. Just retry the command and see what * happens. */ action = ACTION_RETRY; } else if (sense_valid && sense_current) { switch (sshdr.sense_key) { case UNIT_ATTENTION: if (cmd->device->removable) { /* Detected disc change. Set a bit * and quietly refuse further access. */ cmd->device->changed = 1; action = ACTION_FAIL; } else { /* Must have been a power glitch, or a * bus reset. Could not have been a * media change, so we just retry the * command and see what happens. */ action = ACTION_RETRY; } break; case ILLEGAL_REQUEST: /* If we had an ILLEGAL REQUEST returned, then * we may have performed an unsupported * command. The only thing this should be * would be a ten byte read where only a six * byte read was supported. Also, on a system * where READ CAPACITY failed, we may have * read past the end of the disk. */ if ((cmd->device->use_10_for_rw && sshdr.asc == 0x20 && sshdr.ascq == 0x00) && (cmd->cmnd[0] == READ_10 || cmd->cmnd[0] == WRITE_10)) { /* This will issue a new 6-byte command. */ cmd->device->use_10_for_rw = 0; action = ACTION_REPREP; } else if (sshdr.asc == 0x10) /* DIX */ { action = ACTION_FAIL; blk_stat = BLK_STS_PROTECTION; /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) { action = ACTION_FAIL; blk_stat = BLK_STS_TARGET; } else action = ACTION_FAIL; break; case ABORTED_COMMAND: action = ACTION_FAIL; if (sshdr.asc == 0x10) /* DIF */ blk_stat = BLK_STS_PROTECTION; break; case NOT_READY: /* If the device is in the process of becoming * ready, or has a temporary blockage, retry. */ if (sshdr.asc == 0x04) { switch (sshdr.ascq) { case 0x01: /* becoming ready */ case 0x04: /* format in progress */ case 0x05: /* rebuild in progress */ case 0x06: /* recalculation in progress */ case 0x07: /* operation in progress */ case 0x08: /* Long write in progress */ case 0x09: /* self test in progress */ case 0x11: /* notify (enable spinup) required */ case 0x14: /* space allocation in progress */ case 0x1a: /* start stop unit in progress */ case 0x1b: /* sanitize in progress */ case 0x1d: /* configuration in progress */ action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ action = ACTION_DELAYED_REPREP; break; /* * Depopulation might take many hours, * thus it is not worthwhile to retry. */ case 0x24: /* depopulation in progress */ case 0x25: /* depopulation restore in progress */ fallthrough; default: action = ACTION_FAIL; break; } } else action = ACTION_FAIL; break; case VOLUME_OVERFLOW: /* See SSC3rXX or current. */ action = ACTION_FAIL; break; case DATA_PROTECT: action = ACTION_FAIL; if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) || (sshdr.asc == 0x55 && (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) { /* Insufficient zone resources */ blk_stat = BLK_STS_ZONE_OPEN_RESOURCE; } break; case COMPLETED: fallthrough; default: action = ACTION_FAIL; break; } } else action = ACTION_FAIL; if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd)) action = ACTION_FAIL; switch (action) { case ACTION_FAIL: /* Give up and fail the remainder of the request */ if (!(req->rq_flags & RQF_QUIET)) { static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); if (unlikely(scsi_logging_level)) level = SCSI_LOG_LEVEL(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS); /* * if logging is enabled the failure will be printed * in scsi_log_completion(), so avoid duplicate messages */ if (!level && __ratelimit(&_rs)) { scsi_print_result(cmd, NULL, FAILED); if (sense_valid) scsi_print_sense(cmd); scsi_print_command(cmd); } } if (!scsi_end_request(req, blk_stat, scsi_rq_err_bytes(req))) return; fallthrough; case ACTION_REPREP: scsi_mq_requeue_cmd(cmd, 0); break; case ACTION_DELAYED_REPREP: scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY); break; case ACTION_RETRY: /* Retry the same command immediately */ __scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, false); break; case ACTION_DELAYED_RETRY: /* Retry the same command after a delay */ __scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, false); break; } } /* * Helper for scsi_io_completion() when cmd->result is non-zero. Returns a * new result that may suppress further error checking. Also modifies * *blk_statp in some cases. */ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, blk_status_t *blk_statp) { bool sense_valid; bool sense_current = true; /* false implies "deferred sense" */ struct request *req = scsi_cmd_to_rq(cmd); struct scsi_sense_hdr sshdr; sense_valid = scsi_command_normalize_sense(cmd, &sshdr); if (sense_valid) sense_current = !scsi_sense_is_deferred(&sshdr); if (blk_rq_is_passthrough(req)) { if (sense_valid) { /* * SG_IO wants current and deferred errors */ cmd->sense_len = min(8 + cmd->sense_buffer[7], SCSI_SENSE_BUFFERSIZE); } if (sense_current) *blk_statp = scsi_result_to_blk_status(result); } else if (blk_rq_bytes(req) == 0 && sense_current) { /* * Flush commands do not transfers any data, and thus cannot use * good_bytes != blk_rq_bytes(req) as the signal for an error. * This sets *blk_statp explicitly for the problem case. */ *blk_statp = scsi_result_to_blk_status(result); } /* * Recovered errors need reporting, but they're always treated as * success, so fiddle the result code here. For passthrough requests * we already took a copy of the original into sreq->result which * is what gets returned to the user */ if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) { bool do_print = true; /* * if ATA PASS-THROUGH INFORMATION AVAILABLE [0x0, 0x1d] * skip print since caller wants ATA registers. Only occurs * on SCSI ATA PASS_THROUGH commands when CK_COND=1 */ if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d)) do_print = false; else if (req->rq_flags & RQF_QUIET) do_print = false; if (do_print) scsi_print_sense(cmd); result = 0; /* for passthrough, *blk_statp may be set */ *blk_statp = BLK_STS_OK; } /* * Another corner case: the SCSI status byte is non-zero but 'good'. * Example: PRE-FETCH command returns SAM_STAT_CONDITION_MET when * it is able to fit nominated LBs in its cache (and SAM_STAT_GOOD * if it can't fit). Treat SAM_STAT_CONDITION_MET and the related * intermediate statuses (both obsolete in SAM-4) as good. */ if ((result & 0xff) && scsi_status_is_good(result)) { result = 0; *blk_statp = BLK_STS_OK; } return result; } /** * scsi_io_completion - Completion processing for SCSI commands. * @cmd: command that is finished. * @good_bytes: number of processed bytes. * * We will finish off the specified number of sectors. If we are done, the * command block will be released and the queue function will be goosed. If we * are not done then we have to figure out what to do next: * * a) We can call scsi_mq_requeue_cmd(). The request will be * unprepared and put back on the queue. Then a new command will * be created for it. This should be used if we made forward * progress, or if we want to switch from READ(10) to READ(6) for * example. * * b) We can call scsi_io_completion_action(). The request will be * put back on the queue and retried using the same command as * before, possibly after a delay. * * c) We can call scsi_end_request() with blk_stat other than * BLK_STS_OK, to fail the remainder of the request. */ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; struct request *req = scsi_cmd_to_rq(cmd); blk_status_t blk_stat = BLK_STS_OK; if (unlikely(result)) /* a nz result may or may not be an error */ result = scsi_io_completion_nz_result(cmd, result, &blk_stat); /* * Next deal with any sectors which we were able to correctly * handle. */ SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, cmd, "%u sectors total, %d bytes done.\n", blk_rq_sectors(req), good_bytes)); /* * Failed, zero length commands always need to drop down * to retry code. Fast path should return in this block. */ if (likely(blk_rq_bytes(req) > 0 || blk_stat == BLK_STS_OK)) { if (likely(!scsi_end_request(req, blk_stat, good_bytes))) return; /* no bytes remaining */ } /* Kill remainder if no retries. */ if (unlikely(blk_stat && scsi_noretry_cmd(cmd))) { if (scsi_end_request(req, blk_stat, blk_rq_bytes(req))) WARN_ONCE(true, "Bytes remaining after failed, no-retry command"); return; } /* * If there had been no error, but we have leftover bytes in the * request just queue the command up again. */ if (likely(result == 0)) scsi_mq_requeue_cmd(cmd, 0); else scsi_io_completion_action(cmd, result); } static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev, struct request *rq) { return sdev->dma_drain_len && blk_rq_is_passthrough(rq) && !op_is_write(req_op(rq)) && sdev->host->hostt->dma_need_drain(rq); } /** * scsi_alloc_sgtables - Allocate and initialize data and integrity scatterlists * @cmd: SCSI command data structure to initialize. * * Initializes @cmd->sdb and also @cmd->prot_sdb if data integrity is enabled * for @cmd. * * Returns: * * BLK_STS_OK - on success * * BLK_STS_RESOURCE - if the failure is retryable * * BLK_STS_IOERR - if the failure is fatal */ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd) { struct scsi_device *sdev = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); unsigned short nr_segs = blk_rq_nr_phys_segments(rq); struct scatterlist *last_sg = NULL; blk_status_t ret; bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq); int count; if (WARN_ON_ONCE(!nr_segs)) return BLK_STS_IOERR; /* * Make sure there is space for the drain. The driver must adjust * max_hw_segments to be prepared for this. */ if (need_drain) nr_segs++; /* * If sg table allocation fails, requeue request later. */ if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs, cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT))) return BLK_STS_RESOURCE; /* * Next, walk the list, and fill in the addresses and sizes of * each segment. */ count = __blk_rq_map_sg(rq, cmd->sdb.table.sgl, &last_sg); if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) { unsigned int pad_len = (rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1; last_sg->length += pad_len; cmd->extra_len += pad_len; } if (need_drain) { sg_unmark_end(last_sg); last_sg = sg_next(last_sg); sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len); sg_mark_end(last_sg); cmd->extra_len += sdev->dma_drain_len; count++; } BUG_ON(count > cmd->sdb.table.nents); cmd->sdb.table.nents = count; cmd->sdb.length = blk_rq_payload_bytes(rq); if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; if (WARN_ON_ONCE(!prot_sdb)) { /* * This can happen if someone (e.g. multipath) * queues a command to a device on an adapter * that does not support DIX. */ ret = BLK_STS_IOERR; goto out_free_sgtables; } if (sg_alloc_table_chained(&prot_sdb->table, rq->nr_integrity_segments, prot_sdb->table.sgl, SCSI_INLINE_PROT_SG_CNT)) { ret = BLK_STS_RESOURCE; goto out_free_sgtables; } count = blk_rq_map_integrity_sg(rq, prot_sdb->table.sgl); cmd->prot_sdb = prot_sdb; cmd->prot_sdb->table.nents = count; } return BLK_STS_OK; out_free_sgtables: scsi_free_sgtables(cmd); return ret; } EXPORT_SYMBOL(scsi_alloc_sgtables); /** * scsi_initialize_rq - initialize struct scsi_cmnd partially * @rq: Request associated with the SCSI command to be initialized. * * This function initializes the members of struct scsi_cmnd that must be * initialized before request processing starts and that won't be * reinitialized if a SCSI command is requeued. */ static void scsi_initialize_rq(struct request *rq) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); memset(cmd->cmnd, 0, sizeof(cmd->cmnd)); cmd->cmd_len = MAX_COMMAND_SIZE; cmd->sense_len = 0; init_rcu_head(&cmd->rcu); cmd->jiffies_at_alloc = jiffies; cmd->retries = 0; } /** * scsi_alloc_request - allocate a block request and partially * initialize its &scsi_cmnd * @q: the device's request queue * @opf: the request operation code * @flags: block layer allocation flags * * Return: &struct request pointer on success or %NULL on failure */ struct request *scsi_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags) { struct request *rq; rq = blk_mq_alloc_request(q, opf, flags); if (!IS_ERR(rq)) scsi_initialize_rq(rq); return rq; } EXPORT_SYMBOL_GPL(scsi_alloc_request); /* * Only called when the request isn't completed by SCSI, and not freed by * SCSI */ static void scsi_cleanup_rq(struct request *rq) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); cmd->flags = 0; if (rq->rq_flags & RQF_DONTPREP) { scsi_mq_uninit_cmd(cmd); rq->rq_flags &= ~RQF_DONTPREP; } } /* Called before a request is prepared. See also scsi_mq_prep_fn(). */ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); if (!blk_rq_is_passthrough(rq) && !(cmd->flags & SCMD_INITIALIZED)) { cmd->flags |= SCMD_INITIALIZED; scsi_initialize_rq(rq); } cmd->device = dev; INIT_LIST_HEAD(&cmd->eh_entry); INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); } static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); /* * Passthrough requests may transfer data, in which case they must * a bio attached to them. Or they might contain a SCSI command * that does not transfer data, in which case they may optionally * submit a request without an attached bio. */ if (req->bio) { blk_status_t ret = scsi_alloc_sgtables(cmd); if (unlikely(ret != BLK_STS_OK)) return ret; } else { BUG_ON(blk_rq_bytes(req)); memset(&cmd->sdb, 0, sizeof(cmd->sdb)); } cmd->transfersize = blk_rq_bytes(req); return BLK_STS_OK; } static blk_status_t scsi_device_state_check(struct scsi_device *sdev, struct request *req) { switch (sdev->sdev_state) { case SDEV_CREATED: return BLK_STS_OK; case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: /* * If the device is offline we refuse to process any * commands. The device must be brought online * before trying any recovery commands. */ if (!sdev->offline_already) { sdev->offline_already = true; sdev_printk(KERN_ERR, sdev, "rejecting I/O to offline device\n"); } return BLK_STS_IOERR; case SDEV_DEL: /* * If the device is fully deleted, we refuse to * process any commands as well. */ sdev_printk(KERN_ERR, sdev, "rejecting I/O to dead device\n"); return BLK_STS_IOERR; case SDEV_BLOCK: case SDEV_CREATED_BLOCK: return BLK_STS_RESOURCE; case SDEV_QUIESCE: /* * If the device is blocked we only accept power management * commands. */ if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM))) return BLK_STS_RESOURCE; return BLK_STS_OK; default: /* * For any other not fully online state we only allow * power management commands. */ if (req && !(req->rq_flags & RQF_PM)) return BLK_STS_OFFLINE; return BLK_STS_OK; } } /* * scsi_dev_queue_ready: if we can send requests to sdev, assign one token * and return the token else return -1. */ static inline int scsi_dev_queue_ready(struct request_queue *q, struct scsi_device *sdev) { int token; token = sbitmap_get(&sdev->budget_map); if (token < 0) return -1; if (!atomic_read(&sdev->device_blocked)) return token; /* * Only unblock if no other commands are pending and * if device_blocked has decreased to zero */ if (scsi_device_busy(sdev) > 1 || atomic_dec_return(&sdev->device_blocked) > 0) { sbitmap_put(&sdev->budget_map, token); return -1; } SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev, "unblocking device at zero depth\n")); return token; } /* * scsi_target_queue_ready: checks if there we can send commands to target * @sdev: scsi device on starget to check. */ static inline int scsi_target_queue_ready(struct Scsi_Host *shost, struct scsi_device *sdev) { struct scsi_target *starget = scsi_target(sdev); unsigned int busy; if (starget->single_lun) { spin_lock_irq(shost->host_lock); if (starget->starget_sdev_user && starget->starget_sdev_user != sdev) { spin_unlock_irq(shost->host_lock); return 0; } starget->starget_sdev_user = sdev; spin_unlock_irq(shost->host_lock); } if (starget->can_queue <= 0) return 1; busy = atomic_inc_return(&starget->target_busy) - 1; if (atomic_read(&starget->target_blocked) > 0) { if (busy) goto starved; /* * unblock after target_blocked iterates to zero */ if (atomic_dec_return(&starget->target_blocked) > 0) goto out_dec; SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget, "unblocking target at zero depth\n")); } if (busy >= starget->can_queue) goto starved; return 1; starved: spin_lock_irq(shost->host_lock); list_move_tail(&sdev->starved_entry, &shost->starved_list); spin_unlock_irq(shost->host_lock); out_dec: if (starget->can_queue > 0) atomic_dec(&starget->target_busy); return 0; } /* * scsi_host_queue_ready: if we can send requests to shost, return 1 else * return 0. We must end up running the queue again whenever 0 is * returned, else IO can hang. */ static inline int scsi_host_queue_ready(struct request_queue *q, struct Scsi_Host *shost, struct scsi_device *sdev, struct scsi_cmnd *cmd) { if (atomic_read(&shost->host_blocked) > 0) { if (scsi_host_busy(shost) > 0) goto starved; /* * unblock after host_blocked iterates to zero */ if (atomic_dec_return(&shost->host_blocked) > 0) goto out_dec; SCSI_LOG_MLQUEUE(3, shost_printk(KERN_INFO, shost, "unblocking host at zero depth\n")); } if (shost->host_self_blocked) goto starved; /* We're OK to process the command, so we can't be starved */ if (!list_empty(&sdev->starved_entry)) { spin_lock_irq(shost->host_lock); if (!list_empty(&sdev->starved_entry)) list_del_init(&sdev->starved_entry); spin_unlock_irq(shost->host_lock); } __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); return 1; starved: spin_lock_irq(shost->host_lock); if (list_empty(&sdev->starved_entry)) list_add_tail(&sdev->starved_entry, &shost->starved_list); spin_unlock_irq(shost->host_lock); out_dec: scsi_dec_host_busy(shost, cmd); return 0; } /* * Busy state exporting function for request stacking drivers. * * For efficiency, no lock is taken to check the busy state of * shost/starget/sdev, since the returned value is not guaranteed and * may be changed after request stacking drivers call the function, * regardless of taking lock or not. * * When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi * needs to return 'not busy'. Otherwise, request stacking drivers * may hold requests forever. */ static bool scsi_mq_lld_busy(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost; if (blk_queue_dying(q)) return false; shost = sdev->host; /* * Ignore host/starget busy state. * Since block layer does not have a concept of fairness across * multiple queues, congestion of host/starget needs to be handled * in SCSI layer. */ if (scsi_host_in_recovery(shost) || scsi_device_is_busy(sdev)) return true; return false; } /* * Block layer request completion callback. May be called from interrupt * context. */ static void scsi_complete(struct request *rq) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); enum scsi_disposition disposition; INIT_LIST_HEAD(&cmd->eh_entry); atomic_inc(&cmd->device->iodone_cnt); if (cmd->result) atomic_inc(&cmd->device->ioerr_cnt); disposition = scsi_decide_disposition(cmd); if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd)) disposition = SUCCESS; scsi_log_completion(cmd, disposition); switch (disposition) { case SUCCESS: scsi_finish_command(cmd); break; case NEEDS_RETRY: scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY); break; case ADD_TO_MLQUEUE: scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); break; default: scsi_eh_scmd_add(cmd); break; } } /** * scsi_dispatch_cmd - Dispatch a command to the low-level driver. * @cmd: command block we are dispatching. * * Return: nonzero return request was rejected and device's queue needs to be * plugged. */ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; int rtn = 0; atomic_inc(&cmd->device->iorequest_cnt); /* check if the device is still usable */ if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { /* in SDEV_DEL we error all commands. DID_NO_CONNECT * returns an immediate error upwards, and signals * that the device is no longer present */ cmd->result = DID_NO_CONNECT << 16; goto done; } /* Check to see if the scsi lld made this device blocked. */ if (unlikely(scsi_device_blocked(cmd->device))) { /* * in blocked state, the command is just put back on * the device queue. The suspend state has already * blocked the queue so future requests should not * occur until the device transitions out of the * suspend state. */ SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd, "queuecommand : device blocked\n")); atomic_dec(&cmd->device->iorequest_cnt); return SCSI_MLQUEUE_DEVICE_BUSY; } /* Store the LUN value in cmnd, if needed. */ if (cmd->device->lun_in_cdb) cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) | (cmd->device->lun << 5 & 0xe0); scsi_log_send(cmd); /* * Before we queue this command, check if the command * length exceeds what the host adapter can handle. */ if (cmd->cmd_len > cmd->device->host->max_cmd_len) { SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd, "queuecommand : command too long. " "cdb_size=%d host->max_cmd_len=%d\n", cmd->cmd_len, cmd->device->host->max_cmd_len)); cmd->result = (DID_ABORT << 16); goto done; } if (unlikely(host->shost_state == SHOST_DEL)) { cmd->result = (DID_NO_CONNECT << 16); goto done; } trace_scsi_dispatch_cmd_start(cmd); rtn = host->hostt->queuecommand(host, cmd); if (rtn) { atomic_dec(&cmd->device->iorequest_cnt); trace_scsi_dispatch_cmd_error(cmd, rtn); if (rtn != SCSI_MLQUEUE_DEVICE_BUSY && rtn != SCSI_MLQUEUE_TARGET_BUSY) rtn = SCSI_MLQUEUE_HOST_BUSY; SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd, "queuecommand : request rejected\n")); } return rtn; done: scsi_done(cmd); return 0; } /* Size in bytes of the sg-list stored in the scsi-mq command-private data. */ static unsigned int scsi_mq_inline_sgl_size(struct Scsi_Host *shost) { return min_t(unsigned int, shost->sg_tablesize, SCSI_INLINE_SG_CNT) * sizeof(struct scatterlist); } static blk_status_t scsi_prepare_cmd(struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); struct scsi_device *sdev = req->q->queuedata; struct Scsi_Host *shost = sdev->host; bool in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state); struct scatterlist *sg; scsi_init_command(sdev, cmd); cmd->eh_eflags = 0; cmd->prot_type = 0; cmd->prot_flags = 0; cmd->submitter = 0; memset(&cmd->sdb, 0, sizeof(cmd->sdb)); cmd->underflow = 0; cmd->transfersize = 0; cmd->host_scribble = NULL; cmd->result = 0; cmd->extra_len = 0; cmd->state = 0; if (in_flight) __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); cmd->prot_op = SCSI_PROT_NORMAL; if (blk_rq_bytes(req)) cmd->sc_data_direction = rq_dma_dir(req); else cmd->sc_data_direction = DMA_NONE; sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; cmd->sdb.table.sgl = sg; if (scsi_host_get_prot(shost)) { memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer)); cmd->prot_sdb->table.sgl = (struct scatterlist *)(cmd->prot_sdb + 1); } /* * Special handling for passthrough commands, which don't go to the ULP * at all: */ if (blk_rq_is_passthrough(req)) return scsi_setup_scsi_cmnd(sdev, req); if (sdev->handler && sdev->handler->prep_fn) { blk_status_t ret = sdev->handler->prep_fn(sdev, req); if (ret != BLK_STS_OK) return ret; } /* Usually overridden by the ULP */ cmd->allowed = 0; memset(cmd->cmnd, 0, sizeof(cmd->cmnd)); return scsi_cmd_to_driver(cmd)->init_command(cmd); } static void scsi_done_internal(struct scsi_cmnd *cmd, bool complete_directly) { struct request *req = scsi_cmd_to_rq(cmd); switch (cmd->submitter) { case SUBMITTED_BY_BLOCK_LAYER: break; case SUBMITTED_BY_SCSI_ERROR_HANDLER: return scsi_eh_done(cmd); case SUBMITTED_BY_SCSI_RESET_IOCTL: return; } if (unlikely(blk_should_fake_timeout(scsi_cmd_to_rq(cmd)->q))) return; if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state))) return; trace_scsi_dispatch_cmd_done(cmd); if (complete_directly) blk_mq_complete_request_direct(req, scsi_complete); else blk_mq_complete_request(req); } void scsi_done(struct scsi_cmnd *cmd) { scsi_done_internal(cmd, false); } EXPORT_SYMBOL(scsi_done); void scsi_done_direct(struct scsi_cmnd *cmd) { scsi_done_internal(cmd, true); } EXPORT_SYMBOL(scsi_done_direct); static void scsi_mq_put_budget(struct request_queue *q, int budget_token) { struct scsi_device *sdev = q->queuedata; sbitmap_put(&sdev->budget_map, budget_token); } /* * When to reinvoke queueing after a resource shortage. It's 3 msecs to * not change behaviour from the previous unplug mechanism, experimentation * may prove this needs changing. */ #define SCSI_QUEUE_DELAY 3 static int scsi_mq_get_budget(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; int token = scsi_dev_queue_ready(q, sdev); if (token >= 0) return token; atomic_inc(&sdev->restarts); /* * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy). * .restarts must be incremented before .device_busy is read because the * code in scsi_run_queue_async() depends on the order of these operations. */ smp_mb__after_atomic(); /* * If all in-flight requests originated from this LUN are completed * before reading .device_busy, sdev->device_busy will be observed as * zero, then blk_mq_delay_run_hw_queues() will dispatch this request * soon. Otherwise, completion of one of these requests will observe * the .restarts flag, and the request queue will be run for handling * this request, see scsi_end_request(). */ if (unlikely(scsi_device_busy(sdev) == 0 && !scsi_device_blocked(sdev))) blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY); return -1; } static void scsi_mq_set_rq_budget_token(struct request *req, int token) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); cmd->budget_token = token; } static int scsi_mq_get_rq_budget_token(struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); return cmd->budget_token; } static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *req = bd->rq; struct request_queue *q = req->q; struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost = sdev->host; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); blk_status_t ret; int reason; WARN_ON_ONCE(cmd->budget_token < 0); /* * If the device is not in running state we will reject some or all * commands. */ if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { ret = scsi_device_state_check(sdev, req); if (ret != BLK_STS_OK) goto out_put_budget; } ret = BLK_STS_RESOURCE; if (!scsi_target_queue_ready(shost, sdev)) goto out_put_budget; if (unlikely(scsi_host_in_recovery(shost))) { if (cmd->flags & SCMD_FAIL_IF_RECOVERING) ret = BLK_STS_OFFLINE; goto out_dec_target_busy; } if (!scsi_host_queue_ready(q, shost, sdev, cmd)) goto out_dec_target_busy; /* * Only clear the driver-private command data if the LLD does not supply * a function to initialize that data. */ if (shost->hostt->cmd_size && !shost->hostt->init_cmd_priv) memset(scsi_cmd_priv(cmd), 0, shost->hostt->cmd_size); if (!(req->rq_flags & RQF_DONTPREP)) { ret = scsi_prepare_cmd(req); if (ret != BLK_STS_OK) goto out_dec_host_busy; req->rq_flags |= RQF_DONTPREP; } else { clear_bit(SCMD_STATE_COMPLETE, &cmd->state); } cmd->flags &= SCMD_PRESERVED_FLAGS; if (sdev->simple_tags) cmd->flags |= SCMD_TAGGED; if (bd->last) cmd->flags |= SCMD_LAST; scsi_set_resid(cmd, 0); memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); cmd->submitter = SUBMITTED_BY_BLOCK_LAYER; blk_mq_start_request(req); reason = scsi_dispatch_cmd(cmd); if (reason) { scsi_set_blocked(cmd, reason); ret = BLK_STS_RESOURCE; goto out_dec_host_busy; } return BLK_STS_OK; out_dec_host_busy: scsi_dec_host_busy(shost, cmd); out_dec_target_busy: if (scsi_target(sdev)->can_queue > 0) atomic_dec(&scsi_target(sdev)->target_busy); out_put_budget: scsi_mq_put_budget(q, cmd->budget_token); cmd->budget_token = -1; switch (ret) { case BLK_STS_OK: break; case BLK_STS_RESOURCE: if (scsi_device_blocked(sdev)) ret = BLK_STS_DEV_RESOURCE; break; case BLK_STS_AGAIN: cmd->result = DID_BUS_BUSY << 16; if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); break; default: if (unlikely(!scsi_device_online(sdev))) cmd->result = DID_NO_CONNECT << 16; else cmd->result = DID_ERROR << 16; /* * Make sure to release all allocated resources when * we hit an error, as we will never see this command * again. */ if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); scsi_run_queue_async(sdev); break; } return ret; } static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); struct scatterlist *sg; int ret = 0; cmd->sense_buffer = kmem_cache_alloc_node(scsi_sense_cache, GFP_KERNEL, numa_node); if (!cmd->sense_buffer) return -ENOMEM; if (scsi_host_get_prot(shost)) { sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; cmd->prot_sdb = (void *)sg + scsi_mq_inline_sgl_size(shost); } if (shost->hostt->init_cmd_priv) { ret = shost->hostt->init_cmd_priv(shost, cmd); if (ret < 0) kmem_cache_free(scsi_sense_cache, cmd->sense_buffer); } return ret; } static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx) { struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); if (shost->hostt->exit_cmd_priv) shost->hostt->exit_cmd_priv(shost, cmd); kmem_cache_free(scsi_sense_cache, cmd->sense_buffer); } static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct Scsi_Host *shost = hctx->driver_data; if (shost->hostt->mq_poll) return shost->hostt->mq_poll(shost, hctx->queue_num); return 0; } static int scsi_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct Scsi_Host *shost = data; hctx->driver_data = shost; return 0; } static void scsi_map_queues(struct blk_mq_tag_set *set) { struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set); if (shost->hostt->map_queues) return shost->hostt->map_queues(shost); blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); } void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) { struct device *dev = shost->dma_dev; memset(lim, 0, sizeof(*lim)); lim->max_segments = min_t(unsigned short, shost->sg_tablesize, SG_MAX_SEGMENTS); if (scsi_host_prot_dma(shost)) { shost->sg_prot_tablesize = min_not_zero(shost->sg_prot_tablesize, (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); lim->max_integrity_segments = shost->sg_prot_tablesize; } lim->max_hw_sectors = shost->max_sectors; lim->seg_boundary_mask = shost->dma_boundary; lim->max_segment_size = shost->max_segment_size; lim->virt_boundary_mask = shost->virt_boundary_mask; lim->dma_alignment = max_t(unsigned int, shost->dma_alignment, dma_get_cache_alignment() - 1); /* * Propagate the DMA formation properties to the dma-mapping layer as * a courtesy service to the LLDDs. This needs to check that the buses * actually support the DMA API first, though. */ if (dev->dma_parms) { dma_set_seg_boundary(dev, shost->dma_boundary); dma_set_max_seg_size(dev, shost->max_segment_size); } } EXPORT_SYMBOL_GPL(scsi_init_limits); static const struct blk_mq_ops scsi_mq_ops_no_commit = { .get_budget = scsi_mq_get_budget, .put_budget = scsi_mq_put_budget, .queue_rq = scsi_queue_rq, .complete = scsi_complete, .timeout = scsi_timeout, #ifdef CONFIG_BLK_DEBUG_FS .show_rq = scsi_show_rq, #endif .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, .init_hctx = scsi_init_hctx, .poll = scsi_mq_poll, .set_rq_budget_token = scsi_mq_set_rq_budget_token, .get_rq_budget_token = scsi_mq_get_rq_budget_token, }; static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx) { struct Scsi_Host *shost = hctx->driver_data; shost->hostt->commit_rqs(shost, hctx->queue_num); } static const struct blk_mq_ops scsi_mq_ops = { .get_budget = scsi_mq_get_budget, .put_budget = scsi_mq_put_budget, .queue_rq = scsi_queue_rq, .commit_rqs = scsi_commit_rqs, .complete = scsi_complete, .timeout = scsi_timeout, #ifdef CONFIG_BLK_DEBUG_FS .show_rq = scsi_show_rq, #endif .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, .init_hctx = scsi_init_hctx, .poll = scsi_mq_poll, .set_rq_budget_token = scsi_mq_set_rq_budget_token, .get_rq_budget_token = scsi_mq_get_rq_budget_token, }; int scsi_mq_setup_tags(struct Scsi_Host *shost) { unsigned int cmd_size, sgl_size; struct blk_mq_tag_set *tag_set = &shost->tag_set; sgl_size = max_t(unsigned int, sizeof(struct scatterlist), scsi_mq_inline_sgl_size(shost)); cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size; if (scsi_host_get_prot(shost)) cmd_size += sizeof(struct scsi_data_buffer) + sizeof(struct scatterlist) * SCSI_INLINE_PROT_SG_CNT; memset(tag_set, 0, sizeof(*tag_set)); if (shost->hostt->commit_rqs) tag_set->ops = &scsi_mq_ops; else tag_set->ops = &scsi_mq_ops_no_commit; tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1; tag_set->nr_maps = shost->nr_maps ? : 1; tag_set->queue_depth = shost->can_queue; tag_set->cmd_size = cmd_size; tag_set->numa_node = dev_to_node(shost->dma_dev); if (shost->hostt->tag_alloc_policy_rr) tag_set->flags |= BLK_MQ_F_TAG_RR; if (shost->queuecommand_may_block) tag_set->flags |= BLK_MQ_F_BLOCKING; tag_set->driver_data = shost; if (shost->host_tagset) tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; return blk_mq_alloc_tag_set(tag_set); } void scsi_mq_free_tags(struct kref *kref) { struct Scsi_Host *shost = container_of(kref, typeof(*shost), tagset_refcnt); blk_mq_free_tag_set(&shost->tag_set); complete(&shost->tagset_freed); } /** * scsi_device_from_queue - return sdev associated with a request_queue * @q: The request queue to return the sdev from * * Return the sdev associated with a request queue or NULL if the * request_queue does not reference a SCSI device. */ struct scsi_device *scsi_device_from_queue(struct request_queue *q) { struct scsi_device *sdev = NULL; if (q->mq_ops == &scsi_mq_ops_no_commit || q->mq_ops == &scsi_mq_ops) sdev = q->queuedata; if (!sdev || !get_device(&sdev->sdev_gendev)) sdev = NULL; return sdev; } /* * pktcdvd should have been integrated into the SCSI layers, but for historical * reasons like the old IDE driver it isn't. This export allows it to safely * probe if a given device is a SCSI one and only attach to that. */ #ifdef CONFIG_CDROM_PKTCDVD_MODULE EXPORT_SYMBOL_GPL(scsi_device_from_queue); #endif /** * scsi_block_requests - Utility function used by low-level drivers to prevent * further commands from being queued to the device. * @shost: host in question * * There is no timer nor any other means by which the requests get unblocked * other than the low-level driver calling scsi_unblock_requests(). */ void scsi_block_requests(struct Scsi_Host *shost) { shost->host_self_blocked = 1; } EXPORT_SYMBOL(scsi_block_requests); /** * scsi_unblock_requests - Utility function used by low-level drivers to allow * further commands to be queued to the device. * @shost: host in question * * There is no timer nor any other means by which the requests get unblocked * other than the low-level driver calling scsi_unblock_requests(). This is done * as an API function so that changes to the internals of the scsi mid-layer * won't require wholesale changes to drivers that use this feature. */ void scsi_unblock_requests(struct Scsi_Host *shost) { shost->host_self_blocked = 0; scsi_run_host_queues(shost); } EXPORT_SYMBOL(scsi_unblock_requests); void scsi_exit_queue(void) { kmem_cache_destroy(scsi_sense_cache); } /** * scsi_mode_select - issue a mode select * @sdev: SCSI device to be queried * @pf: Page format bit (1 == standard, 0 == vendor specific) * @sp: Save page bit (0 == don't save, 1 == save) * @buffer: request buffer (may not be smaller than eight bytes) * @len: length of request buffer. * @timeout: command timeout * @retries: number of retries before failing * @data: returns a structure abstracting the mode header data * @sshdr: place to put sense data (or NULL if no sense to be collected). * must be SCSI_SENSE_BUFFERSIZE big. * * Returns zero if successful; negative error number or scsi * status on error * */ int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, unsigned char *buffer, int len, int timeout, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { unsigned char cmd[10]; unsigned char *real_buffer; const struct scsi_exec_args exec_args = { .sshdr = sshdr, }; int ret; memset(cmd, 0, sizeof(cmd)); cmd[1] = (pf ? 0x10 : 0) | (sp ? 0x01 : 0); /* * Use MODE SELECT(10) if the device asked for it or if the mode page * and the mode select header cannot fit within the maximumm 255 bytes * of the MODE SELECT(6) command. */ if (sdev->use_10_for_ms || len + 4 > 255 || data->block_descriptor_length > 255) { if (len > 65535 - 8) return -EINVAL; real_buffer = kmalloc(8 + len, GFP_KERNEL); if (!real_buffer) return -ENOMEM; memcpy(real_buffer + 8, buffer, len); len += 8; real_buffer[0] = 0; real_buffer[1] = 0; real_buffer[2] = data->medium_type; real_buffer[3] = data->device_specific; real_buffer[4] = data->longlba ? 0x01 : 0; real_buffer[5] = 0; put_unaligned_be16(data->block_descriptor_length, &real_buffer[6]); cmd[0] = MODE_SELECT_10; put_unaligned_be16(len, &cmd[7]); } else { if (data->longlba) return -EINVAL; real_buffer = kmalloc(4 + len, GFP_KERNEL); if (!real_buffer) return -ENOMEM; memcpy(real_buffer + 4, buffer, len); len += 4; real_buffer[0] = 0; real_buffer[1] = data->medium_type; real_buffer[2] = data->device_specific; real_buffer[3] = data->block_descriptor_length; cmd[0] = MODE_SELECT; cmd[4] = len; } ret = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, real_buffer, len, timeout, retries, &exec_args); kfree(real_buffer); return ret; } EXPORT_SYMBOL_GPL(scsi_mode_select); /** * scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary. * @sdev: SCSI device to be queried * @dbd: set to prevent mode sense from returning block descriptors * @modepage: mode page being requested * @subpage: sub-page of the mode page being requested * @buffer: request buffer (may not be smaller than eight bytes) * @len: length of request buffer. * @timeout: command timeout * @retries: number of retries before failing * @data: returns a structure abstracting the mode header data * @sshdr: place to put sense data (or NULL if no sense to be collected). * must be SCSI_SENSE_BUFFERSIZE big. * * Returns zero if successful, or a negative error number on failure */ int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage, unsigned char *buffer, int len, int timeout, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { unsigned char cmd[12]; int use_10_for_ms; int header_length; int result; struct scsi_sense_hdr my_sshdr; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = retries, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { /* caller might not be interested in sense, but we need it */ .sshdr = sshdr ? : &my_sshdr, .failures = &failures, }; memset(data, 0, sizeof(*data)); memset(&cmd[0], 0, 12); dbd = sdev->set_dbd_for_ms ? 8 : dbd; cmd[1] = dbd & 0x18; /* allows DBD and LLBA bits */ cmd[2] = modepage; cmd[3] = subpage; sshdr = exec_args.sshdr; retry: use_10_for_ms = sdev->use_10_for_ms || len > 255; if (use_10_for_ms) { if (len < 8 || len > 65535) return -EINVAL; cmd[0] = MODE_SENSE_10; put_unaligned_be16(len, &cmd[7]); header_length = 8; } else { if (len < 4) return -EINVAL; cmd[0] = MODE_SENSE; cmd[4] = len; header_length = 4; } memset(buffer, 0, len); result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, buffer, len, timeout, retries, &exec_args); if (result < 0) return result; /* This code looks awful: what it's doing is making sure an * ILLEGAL REQUEST sense return identifies the actual command * byte as the problem. MODE_SENSE commands can return * ILLEGAL REQUEST if the code page isn't supported */ if (!scsi_status_is_good(result)) { if (scsi_sense_valid(sshdr)) { if ((sshdr->sense_key == ILLEGAL_REQUEST) && (sshdr->asc == 0x20) && (sshdr->ascq == 0)) { /* * Invalid command operation code: retry using * MODE SENSE(6) if this was a MODE SENSE(10) * request, except if the request mode page is * too large for MODE SENSE single byte * allocation length field. */ if (use_10_for_ms) { if (len > 255) return -EIO; sdev->use_10_for_ms = 0; goto retry; } } } return -EIO; } if (unlikely(buffer[0] == 0x86 && buffer[1] == 0x0b && (modepage == 6 || modepage == 8))) { /* Initio breakage? */ header_length = 0; data->length = 13; data->medium_type = 0; data->device_specific = 0; data->longlba = 0; data->block_descriptor_length = 0; } else if (use_10_for_ms) { data->length = get_unaligned_be16(&buffer[0]) + 2; data->medium_type = buffer[2]; data->device_specific = buffer[3]; data->longlba = buffer[4] & 0x01; data->block_descriptor_length = get_unaligned_be16(&buffer[6]); } else { data->length = buffer[0] + 1; data->medium_type = buffer[1]; data->device_specific = buffer[2]; data->block_descriptor_length = buffer[3]; } data->header_length = header_length; return 0; } EXPORT_SYMBOL(scsi_mode_sense); /** * scsi_test_unit_ready - test if unit is ready * @sdev: scsi device to change the state of. * @timeout: command timeout * @retries: number of retries before failing * @sshdr: outpout pointer for decoded sense information. * * Returns zero if unsuccessful or an error if TUR failed. For * removable media, UNIT_ATTENTION sets ->changed flag. **/ int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr) { char cmd[] = { TEST_UNIT_READY, 0, 0, 0, 0, 0, }; const struct scsi_exec_args exec_args = { .sshdr = sshdr, }; int result; /* try to eat the UNIT_ATTENTION if there are enough retries */ do { result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, 1, &exec_args); if (sdev->removable && result > 0 && scsi_sense_valid(sshdr) && sshdr->sense_key == UNIT_ATTENTION) sdev->changed = 1; } while (result > 0 && scsi_sense_valid(sshdr) && sshdr->sense_key == UNIT_ATTENTION && --retries); return result; } EXPORT_SYMBOL(scsi_test_unit_ready); /** * scsi_device_set_state - Take the given device through the device state model. * @sdev: scsi device to change the state of. * @state: state to change to. * * Returns zero if successful or an error if the requested * transition is illegal. */ int scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) { enum scsi_device_state oldstate = sdev->sdev_state; if (state == oldstate) return 0; switch (state) { case SDEV_CREATED: switch (oldstate) { case SDEV_CREATED_BLOCK: break; default: goto illegal; } break; case SDEV_RUNNING: switch (oldstate) { case SDEV_CREATED: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: case SDEV_QUIESCE: case SDEV_BLOCK: break; default: goto illegal; } break; case SDEV_QUIESCE: switch (oldstate) { case SDEV_RUNNING: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: break; default: goto illegal; } break; case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: switch (oldstate) { case SDEV_CREATED: case SDEV_RUNNING: case SDEV_QUIESCE: case SDEV_BLOCK: break; default: goto illegal; } break; case SDEV_BLOCK: switch (oldstate) { case SDEV_RUNNING: case SDEV_CREATED_BLOCK: case SDEV_QUIESCE: case SDEV_OFFLINE: break; default: goto illegal; } break; case SDEV_CREATED_BLOCK: switch (oldstate) { case SDEV_CREATED: break; default: goto illegal; } break; case SDEV_CANCEL: switch (oldstate) { case SDEV_CREATED: case SDEV_RUNNING: case SDEV_QUIESCE: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: break; default: goto illegal; } break; case SDEV_DEL: switch (oldstate) { case SDEV_CREATED: case SDEV_RUNNING: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: case SDEV_CANCEL: case SDEV_BLOCK: case SDEV_CREATED_BLOCK: break; default: goto illegal; } break; } sdev->offline_already = false; sdev->sdev_state = state; return 0; illegal: SCSI_LOG_ERROR_RECOVERY(1, sdev_printk(KERN_ERR, sdev, "Illegal state transition %s->%s", scsi_device_state_name(oldstate), scsi_device_state_name(state)) ); return -EINVAL; } EXPORT_SYMBOL(scsi_device_set_state); /** * scsi_evt_emit - emit a single SCSI device uevent * @sdev: associated SCSI device * @evt: event to emit * * Send a single uevent (scsi_event) to the associated scsi_device. */ static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt) { int idx = 0; char *envp[3]; switch (evt->evt_type) { case SDEV_EVT_MEDIA_CHANGE: envp[idx++] = "SDEV_MEDIA_CHANGE=1"; break; case SDEV_EVT_INQUIRY_CHANGE_REPORTED: scsi_rescan_device(sdev); envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED"; break; case SDEV_EVT_CAPACITY_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=CAPACITY_DATA_HAS_CHANGED"; break; case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED: envp[idx++] = "SDEV_UA=THIN_PROVISIONING_SOFT_THRESHOLD_REACHED"; break; case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=MODE_PARAMETERS_CHANGED"; break; case SDEV_EVT_LUN_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=REPORTED_LUNS_DATA_HAS_CHANGED"; break; case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=ASYMMETRIC_ACCESS_STATE_CHANGED"; break; case SDEV_EVT_POWER_ON_RESET_OCCURRED: envp[idx++] = "SDEV_UA=POWER_ON_RESET_OCCURRED"; break; default: /* do nothing */ break; } envp[idx++] = NULL; kobject_uevent_env(&sdev->sdev_gendev.kobj, KOBJ_CHANGE, envp); } /** * scsi_evt_thread - send a uevent for each scsi event * @work: work struct for scsi_device * * Dispatch queued events to their associated scsi_device kobjects * as uevents. */ void scsi_evt_thread(struct work_struct *work) { struct scsi_device *sdev; enum scsi_device_event evt_type; LIST_HEAD(event_list); sdev = container_of(work, struct scsi_device, event_work); for (evt_type = SDEV_EVT_FIRST; evt_type <= SDEV_EVT_LAST; evt_type++) if (test_and_clear_bit(evt_type, sdev->pending_events)) sdev_evt_send_simple(sdev, evt_type, GFP_KERNEL); while (1) { struct scsi_event *evt; struct list_head *this, *tmp; unsigned long flags; spin_lock_irqsave(&sdev->list_lock, flags); list_splice_init(&sdev->event_list, &event_list); spin_unlock_irqrestore(&sdev->list_lock, flags); if (list_empty(&event_list)) break; list_for_each_safe(this, tmp, &event_list) { evt = list_entry(this, struct scsi_event, node); list_del(&evt->node); scsi_evt_emit(sdev, evt); kfree(evt); } } } /** * sdev_evt_send - send asserted event to uevent thread * @sdev: scsi_device event occurred on * @evt: event to send * * Assert scsi device event asynchronously. */ void sdev_evt_send(struct scsi_device *sdev, struct scsi_event *evt) { unsigned long flags; #if 0 /* FIXME: currently this check eliminates all media change events * for polled devices. Need to update to discriminate between AN * and polled events */ if (!test_bit(evt->evt_type, sdev->supported_events)) { kfree(evt); return; } #endif spin_lock_irqsave(&sdev->list_lock, flags); list_add_tail(&evt->node, &sdev->event_list); schedule_work(&sdev->event_work); spin_unlock_irqrestore(&sdev->list_lock, flags); } EXPORT_SYMBOL_GPL(sdev_evt_send); /** * sdev_evt_alloc - allocate a new scsi event * @evt_type: type of event to allocate * @gfpflags: GFP flags for allocation * * Allocates and returns a new scsi_event. */ struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type, gfp_t gfpflags) { struct scsi_event *evt = kzalloc(sizeof(struct scsi_event), gfpflags); if (!evt) return NULL; evt->evt_type = evt_type; INIT_LIST_HEAD(&evt->node); /* evt_type-specific initialization, if any */ switch (evt_type) { case SDEV_EVT_MEDIA_CHANGE: case SDEV_EVT_INQUIRY_CHANGE_REPORTED: case SDEV_EVT_CAPACITY_CHANGE_REPORTED: case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED: case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED: case SDEV_EVT_LUN_CHANGE_REPORTED: case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED: case SDEV_EVT_POWER_ON_RESET_OCCURRED: default: /* do nothing */ break; } return evt; } EXPORT_SYMBOL_GPL(sdev_evt_alloc); /** * sdev_evt_send_simple - send asserted event to uevent thread * @sdev: scsi_device event occurred on * @evt_type: type of event to send * @gfpflags: GFP flags for allocation * * Assert scsi device event asynchronously, given an event type. */ void sdev_evt_send_simple(struct scsi_device *sdev, enum scsi_device_event evt_type, gfp_t gfpflags) { struct scsi_event *evt = sdev_evt_alloc(evt_type, gfpflags); if (!evt) { sdev_printk(KERN_ERR, sdev, "event %d eaten due to OOM\n", evt_type); return; } sdev_evt_send(sdev, evt); } EXPORT_SYMBOL_GPL(sdev_evt_send_simple); /** * scsi_device_quiesce - Block all commands except power management. * @sdev: scsi device to quiesce. * * This works by trying to transition to the SDEV_QUIESCE state * (which must be a legal transition). When the device is in this * state, only power management requests will be accepted, all others will * be deferred. * * Must be called with user context, may sleep. * * Returns zero if unsuccessful or an error if not. */ int scsi_device_quiesce(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; unsigned int memflags; int err; /* * It is allowed to call scsi_device_quiesce() multiple times from * the same context but concurrent scsi_device_quiesce() calls are * not allowed. */ WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); if (sdev->quiesced_by == current) return 0; blk_set_pm_only(q); memflags = blk_mq_freeze_queue(q); /* * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue * unfreeze even if the queue was already frozen before this function * was called. See also https://lwn.net/Articles/573497/. */ synchronize_rcu(); blk_mq_unfreeze_queue(q, memflags); mutex_lock(&sdev->state_mutex); err = scsi_device_set_state(sdev, SDEV_QUIESCE); if (err == 0) sdev->quiesced_by = current; else blk_clear_pm_only(q); mutex_unlock(&sdev->state_mutex); return err; } EXPORT_SYMBOL(scsi_device_quiesce); /** * scsi_device_resume - Restart user issued commands to a quiesced device. * @sdev: scsi device to resume. * * Moves the device from quiesced back to running and restarts the * queues. * * Must be called with user context, may sleep. */ void scsi_device_resume(struct scsi_device *sdev) { /* check if the device state was mutated prior to resume, and if * so assume the state is being managed elsewhere (for example * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); if (sdev->sdev_state == SDEV_QUIESCE) scsi_device_set_state(sdev, SDEV_RUNNING); if (sdev->quiesced_by) { sdev->quiesced_by = NULL; blk_clear_pm_only(sdev->request_queue); } mutex_unlock(&sdev->state_mutex); } EXPORT_SYMBOL(scsi_device_resume); static void device_quiesce_fn(struct scsi_device *sdev, void *data) { scsi_device_quiesce(sdev); } void scsi_target_quiesce(struct scsi_target *starget) { starget_for_each_device(starget, NULL, device_quiesce_fn); } EXPORT_SYMBOL(scsi_target_quiesce); static void device_resume_fn(struct scsi_device *sdev, void *data) { scsi_device_resume(sdev); } void scsi_target_resume(struct scsi_target *starget) { starget_for_each_device(starget, NULL, device_resume_fn); } EXPORT_SYMBOL(scsi_target_resume); static int __scsi_internal_device_block_nowait(struct scsi_device *sdev) { if (scsi_device_set_state(sdev, SDEV_BLOCK)) return scsi_device_set_state(sdev, SDEV_CREATED_BLOCK); return 0; } void scsi_start_queue(struct scsi_device *sdev) { if (cmpxchg(&sdev->queue_stopped, 1, 0)) blk_mq_unquiesce_queue(sdev->request_queue); } static void scsi_stop_queue(struct scsi_device *sdev) { /* * The atomic variable of ->queue_stopped covers that * blk_mq_quiesce_queue* is balanced with blk_mq_unquiesce_queue. * * The caller needs to wait until quiesce is done. */ if (!cmpxchg(&sdev->queue_stopped, 0, 1)) blk_mq_quiesce_queue_nowait(sdev->request_queue); } /** * scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state * @sdev: device to block * * Pause SCSI command processing on the specified device. Does not sleep. * * Returns zero if successful or a negative error code upon failure. * * Notes: * This routine transitions the device to the SDEV_BLOCK state (which must be * a legal transition). When the device is in this state, command processing * is paused until the device leaves the SDEV_BLOCK state. See also * scsi_internal_device_unblock_nowait(). */ int scsi_internal_device_block_nowait(struct scsi_device *sdev) { int ret = __scsi_internal_device_block_nowait(sdev); /* * The device has transitioned to SDEV_BLOCK. Stop the * block layer from calling the midlayer with this device's * request queue. */ if (!ret) scsi_stop_queue(sdev); return ret; } EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait); /** * scsi_device_block - try to transition to the SDEV_BLOCK state * @sdev: device to block * @data: dummy argument, ignored * * Pause SCSI command processing on the specified device. Callers must wait * until all ongoing scsi_queue_rq() calls have finished after this function * returns. * * Note: * This routine transitions the device to the SDEV_BLOCK state (which must be * a legal transition). When the device is in this state, command processing * is paused until the device leaves the SDEV_BLOCK state. See also * scsi_internal_device_unblock(). */ static void scsi_device_block(struct scsi_device *sdev, void *data) { int err; enum scsi_device_state state; mutex_lock(&sdev->state_mutex); err = __scsi_internal_device_block_nowait(sdev); state = sdev->sdev_state; if (err == 0) /* * scsi_stop_queue() must be called with the state_mutex * held. Otherwise a simultaneous scsi_start_queue() call * might unquiesce the queue before we quiesce it. */ scsi_stop_queue(sdev); mutex_unlock(&sdev->state_mutex); WARN_ONCE(err, "%s: failed to block %s in state %d\n", __func__, dev_name(&sdev->sdev_gendev), state); } /** * scsi_internal_device_unblock_nowait - resume a device after a block request * @sdev: device to resume * @new_state: state to set the device to after unblocking * * Restart the device queue for a previously suspended SCSI device. Does not * sleep. * * Returns zero if successful or a negative error code upon failure. * * Notes: * This routine transitions the device to the SDEV_RUNNING state or to one of * the offline states (which must be a legal transition) allowing the midlayer * to goose the queue for this device. */ int scsi_internal_device_unblock_nowait(struct scsi_device *sdev, enum scsi_device_state new_state) { switch (new_state) { case SDEV_RUNNING: case SDEV_TRANSPORT_OFFLINE: break; default: return -EINVAL; } /* * Try to transition the scsi device to SDEV_RUNNING or one of the * offlined states and goose the device queue if successful. */ switch (sdev->sdev_state) { case SDEV_BLOCK: case SDEV_TRANSPORT_OFFLINE: sdev->sdev_state = new_state; break; case SDEV_CREATED_BLOCK: if (new_state == SDEV_TRANSPORT_OFFLINE || new_state == SDEV_OFFLINE) sdev->sdev_state = new_state; else sdev->sdev_state = SDEV_CREATED; break; case SDEV_CANCEL: case SDEV_OFFLINE: break; default: return -EINVAL; } scsi_start_queue(sdev); return 0; } EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait); /** * scsi_internal_device_unblock - resume a device after a block request * @sdev: device to resume * @new_state: state to set the device to after unblocking * * Restart the device queue for a previously suspended SCSI device. May sleep. * * Returns zero if successful or a negative error code upon failure. * * Notes: * This routine transitions the device to the SDEV_RUNNING state or to one of * the offline states (which must be a legal transition) allowing the midlayer * to goose the queue for this device. */ static int scsi_internal_device_unblock(struct scsi_device *sdev, enum scsi_device_state new_state) { int ret; mutex_lock(&sdev->state_mutex); ret = scsi_internal_device_unblock_nowait(sdev, new_state); mutex_unlock(&sdev->state_mutex); return ret; } static int target_block(struct device *dev, void *data) { if (scsi_is_target_device(dev)) starget_for_each_device(to_scsi_target(dev), NULL, scsi_device_block); return 0; } /** * scsi_block_targets - transition all SCSI child devices to SDEV_BLOCK state * @dev: a parent device of one or more scsi_target devices * @shost: the Scsi_Host to which this device belongs * * Iterate over all children of @dev, which should be scsi_target devices, * and switch all subordinate scsi devices to SDEV_BLOCK state. Wait for * ongoing scsi_queue_rq() calls to finish. May sleep. * * Note: * @dev must not itself be a scsi_target device. */ void scsi_block_targets(struct Scsi_Host *shost, struct device *dev) { WARN_ON_ONCE(scsi_is_target_device(dev)); device_for_each_child(dev, NULL, target_block); blk_mq_wait_quiesce_done(&shost->tag_set); } EXPORT_SYMBOL_GPL(scsi_block_targets); static void device_unblock(struct scsi_device *sdev, void *data) { scsi_internal_device_unblock(sdev, *(enum scsi_device_state *)data); } static int target_unblock(struct device *dev, void *data) { if (scsi_is_target_device(dev)) starget_for_each_device(to_scsi_target(dev), data, device_unblock); return 0; } void scsi_target_unblock(struct device *dev, enum scsi_device_state new_state) { if (scsi_is_target_device(dev)) starget_for_each_device(to_scsi_target(dev), &new_state, device_unblock); else device_for_each_child(dev, &new_state, target_unblock); } EXPORT_SYMBOL_GPL(scsi_target_unblock); /** * scsi_host_block - Try to transition all logical units to the SDEV_BLOCK state * @shost: device to block * * Pause SCSI command processing for all logical units associated with the SCSI * host and wait until pending scsi_queue_rq() calls have finished. * * Returns zero if successful or a negative error code upon failure. */ int scsi_host_block(struct Scsi_Host *shost) { struct scsi_device *sdev; int ret; /* * Call scsi_internal_device_block_nowait so we can avoid * calling synchronize_rcu() for each LUN. */ shost_for_each_device(sdev, shost) { mutex_lock(&sdev->state_mutex); ret = scsi_internal_device_block_nowait(sdev); mutex_unlock(&sdev->state_mutex); if (ret) { scsi_device_put(sdev); return ret; } } /* Wait for ongoing scsi_queue_rq() calls to finish. */ blk_mq_wait_quiesce_done(&shost->tag_set); return 0; } EXPORT_SYMBOL_GPL(scsi_host_block); int scsi_host_unblock(struct Scsi_Host *shost, int new_state) { struct scsi_device *sdev; int ret = 0; shost_for_each_device(sdev, shost) { ret = scsi_internal_device_unblock(sdev, new_state); if (ret) { scsi_device_put(sdev); break; } } return ret; } EXPORT_SYMBOL_GPL(scsi_host_unblock); /** * scsi_kmap_atomic_sg - find and atomically map an sg-elemnt * @sgl: scatter-gather list * @sg_count: number of segments in sg * @offset: offset in bytes into sg, on return offset into the mapped area * @len: bytes to map, on return number of bytes mapped * * Returns virtual address of the start of the mapped page */ void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count, size_t *offset, size_t *len) { int i; size_t sg_len = 0, len_complete = 0; struct scatterlist *sg; struct page *page; WARN_ON(!irqs_disabled()); for_each_sg(sgl, sg, sg_count, i) { len_complete = sg_len; /* Complete sg-entries */ sg_len += sg->length; if (sg_len > *offset) break; } if (unlikely(i == sg_count)) { printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, " "elements %d\n", __func__, sg_len, *offset, sg_count); WARN_ON(1); return NULL; } /* Offset starting from the beginning of first page in this sg-entry */ *offset = *offset - len_complete + sg->offset; page = sg_page(sg) + (*offset >> PAGE_SHIFT); *offset &= ~PAGE_MASK; /* Bytes in this sg-entry from *offset to the end of the page */ sg_len = PAGE_SIZE - *offset; if (*len > sg_len) *len = sg_len; return kmap_atomic(page); } EXPORT_SYMBOL(scsi_kmap_atomic_sg); /** * scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously mapped with scsi_kmap_atomic_sg * @virt: virtual address to be unmapped */ void scsi_kunmap_atomic_sg(void *virt) { kunmap_atomic(virt); } EXPORT_SYMBOL(scsi_kunmap_atomic_sg); void sdev_disable_disk_events(struct scsi_device *sdev) { atomic_inc(&sdev->disk_events_disable_depth); } EXPORT_SYMBOL(sdev_disable_disk_events); void sdev_enable_disk_events(struct scsi_device *sdev) { if (WARN_ON_ONCE(atomic_read(&sdev->disk_events_disable_depth) <= 0)) return; atomic_dec(&sdev->disk_events_disable_depth); } EXPORT_SYMBOL(sdev_enable_disk_events); static unsigned char designator_prio(const unsigned char *d) { if (d[1] & 0x30) /* not associated with LUN */ return 0; if (d[3] == 0) /* invalid length */ return 0; /* * Order of preference for lun descriptor: * - SCSI name string * - NAA IEEE Registered Extended * - EUI-64 based 16-byte * - EUI-64 based 12-byte * - NAA IEEE Registered * - NAA IEEE Extended * - EUI-64 based 8-byte * - SCSI name string (truncated) * - T10 Vendor ID * as longer descriptors reduce the likelyhood * of identification clashes. */ switch (d[1] & 0xf) { case 8: /* SCSI name string, variable-length UTF-8 */ return 9; case 3: switch (d[4] >> 4) { case 6: /* NAA registered extended */ return 8; case 5: /* NAA registered */ return 5; case 4: /* NAA extended */ return 4; case 3: /* NAA locally assigned */ return 1; default: break; } break; case 2: switch (d[3]) { case 16: /* EUI64-based, 16 byte */ return 7; case 12: /* EUI64-based, 12 byte */ return 6; case 8: /* EUI64-based, 8 byte */ return 3; default: break; } break; case 1: /* T10 vendor ID */ return 1; default: break; } return 0; } /** * scsi_vpd_lun_id - return a unique device identification * @sdev: SCSI device * @id: buffer for the identification * @id_len: length of the buffer * * Copies a unique device identification into @id based * on the information in the VPD page 0x83 of the device. * The string will be formatted as a SCSI name string. * * Returns the length of the identification or error on failure. * If the identifier is longer than the supplied buffer the actual * identifier length is returned and the buffer is not zero-padded. */ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) { u8 cur_id_prio = 0; u8 cur_id_size = 0; const unsigned char *d, *cur_id_str; const struct scsi_vpd *vpd_pg83; int id_size = -EINVAL; rcu_read_lock(); vpd_pg83 = rcu_dereference(sdev->vpd_pg83); if (!vpd_pg83) { rcu_read_unlock(); return -ENXIO; } /* The id string must be at least 20 bytes + terminating NULL byte */ if (id_len < 21) { rcu_read_unlock(); return -EINVAL; } memset(id, 0, id_len); for (d = vpd_pg83->data + 4; d < vpd_pg83->data + vpd_pg83->len; d += d[3] + 4) { u8 prio = designator_prio(d); if (prio == 0 || cur_id_prio > prio) continue; switch (d[1] & 0xf) { case 0x1: /* T10 Vendor ID */ if (cur_id_size > d[3]) break; cur_id_prio = prio; cur_id_size = d[3]; if (cur_id_size + 4 > id_len) cur_id_size = id_len - 4; cur_id_str = d + 4; id_size = snprintf(id, id_len, "t10.%*pE", cur_id_size, cur_id_str); break; case 0x2: /* EUI-64 */ cur_id_prio = prio; cur_id_size = d[3]; cur_id_str = d + 4; switch (cur_id_size) { case 8: id_size = snprintf(id, id_len, "eui.%8phN", cur_id_str); break; case 12: id_size = snprintf(id, id_len, "eui.%12phN", cur_id_str); break; case 16: id_size = snprintf(id, id_len, "eui.%16phN", cur_id_str); break; default: break; } break; case 0x3: /* NAA */ cur_id_prio = prio; cur_id_size = d[3]; cur_id_str = d + 4; switch (cur_id_size) { case 8: id_size = snprintf(id, id_len, "naa.%8phN", cur_id_str); break; case 16: id_size = snprintf(id, id_len, "naa.%16phN", cur_id_str); break; default: break; } break; case 0x8: /* SCSI name string */ if (cur_id_size > d[3]) break; /* Prefer others for truncated descriptor */ if (d[3] > id_len) { prio = 2; if (cur_id_prio > prio) break; } cur_id_prio = prio; cur_id_size = id_size = d[3]; cur_id_str = d + 4; if (cur_id_size >= id_len) cur_id_size = id_len - 1; memcpy(id, cur_id_str, cur_id_size); break; default: break; } } rcu_read_unlock(); return id_size; } EXPORT_SYMBOL(scsi_vpd_lun_id); /** * scsi_vpd_tpg_id - return a target port group identifier * @sdev: SCSI device * @rel_id: pointer to return relative target port in if not %NULL * * Returns the Target Port Group identifier from the information * from VPD page 0x83 of the device. * Optionally sets @rel_id to the relative target port on success. * * Return: the identifier or error on failure. */ int scsi_vpd_tpg_id(struct scsi_device *sdev, int *rel_id) { const unsigned char *d; const struct scsi_vpd *vpd_pg83; int group_id = -EAGAIN, rel_port = -1; rcu_read_lock(); vpd_pg83 = rcu_dereference(sdev->vpd_pg83); if (!vpd_pg83) { rcu_read_unlock(); return -ENXIO; } d = vpd_pg83->data + 4; while (d < vpd_pg83->data + vpd_pg83->len) { switch (d[1] & 0xf) { case 0x4: /* Relative target port */ rel_port = get_unaligned_be16(&d[6]); break; case 0x5: /* Target port group */ group_id = get_unaligned_be16(&d[6]); break; default: break; } d += d[3] + 4; } rcu_read_unlock(); if (group_id >= 0 && rel_id && rel_port != -1) *rel_id = rel_port; return group_id; } EXPORT_SYMBOL(scsi_vpd_tpg_id); /** * scsi_build_sense - build sense data for a command * @scmd: scsi command for which the sense should be formatted * @desc: Sense format (non-zero == descriptor format, * 0 == fixed format) * @key: Sense key * @asc: Additional sense code * @ascq: Additional sense code qualifier * **/ void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq) { scsi_build_sense_buffer(desc, scmd->sense_buffer, key, asc, ascq); scmd->result = SAM_STAT_CHECK_CONDITION; } EXPORT_SYMBOL_GPL(scsi_build_sense); #ifdef CONFIG_SCSI_LIB_KUNIT_TEST #include "scsi_lib_test.c" #endif
402 205 10 269 270 206 72 208 206 20 188 4 1 3 209 208 10 141 70 210 208 10 2 10 10 19 189 205 10 76 7 194 396 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2002 International Business Machines, Corp. * * This file is part of the SCTP kernel implementation * * These functions are the methods for accessing the SCTP inqueue. * * An SCTP inqueue is a queue into which you push SCTP packets * (which might be bundles or fragments of chunks) and out of which you * pop SCTP whole chunks. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <linux/interrupt.h> #include <linux/slab.h> /* Initialize an SCTP inqueue. */ void sctp_inq_init(struct sctp_inq *queue) { INIT_LIST_HEAD(&queue->in_chunk_list); queue->in_progress = NULL; /* Create a task for delivering data. */ INIT_WORK(&queue->immediate, NULL); } /* Properly release the chunk which is being worked on. */ static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk) { if (chunk->head_skb) chunk->skb = chunk->head_skb; sctp_chunk_free(chunk); } /* Release the memory associated with an SCTP inqueue. */ void sctp_inq_free(struct sctp_inq *queue) { struct sctp_chunk *chunk, *tmp; /* Empty the queue. */ list_for_each_entry_safe(chunk, tmp, &queue->in_chunk_list, list) { list_del_init(&chunk->list); sctp_chunk_free(chunk); } /* If there is a packet which is currently being worked on, * free it as well. */ if (queue->in_progress) { sctp_inq_chunk_free(queue->in_progress); queue->in_progress = NULL; } } /* Put a new packet in an SCTP inqueue. * We assume that packet->sctp_hdr is set and in host byte order. */ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) { /* Directly call the packet handling routine. */ if (chunk->rcvr->dead) { sctp_chunk_free(chunk); return; } /* We are now calling this either from the soft interrupt * or from the backlog processing. * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ list_add_tail(&chunk->list, &q->in_chunk_list); if (chunk->asoc) chunk->asoc->stats.ipackets++; q->immediate.func(&q->immediate); } /* Peek at the next chunk on the inqeue. */ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) { struct sctp_chunk *chunk; struct sctp_chunkhdr *ch = NULL; chunk = queue->in_progress; /* If there is no more chunks in this packet, say so */ if (chunk->singleton || chunk->end_of_packet || chunk->pdiscard) return NULL; ch = (struct sctp_chunkhdr *)chunk->chunk_end; return ch; } /* Extract a chunk from an SCTP inqueue. * * WARNING: If you need to put the chunk on another queue, you need to * make a shallow copy (clone) of it. */ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) { struct sctp_chunk *chunk; struct sctp_chunkhdr *ch = NULL; /* The assumption is that we are safe to process the chunks * at this time. */ chunk = queue->in_progress; if (chunk) { /* There is a packet that we have been working on. * Any post processing work to do before we move on? */ if (chunk->singleton || chunk->end_of_packet || chunk->pdiscard) { if (chunk->head_skb == chunk->skb) { chunk->skb = skb_shinfo(chunk->skb)->frag_list; goto new_skb; } if (chunk->skb->next) { chunk->skb = chunk->skb->next; goto new_skb; } sctp_inq_chunk_free(chunk); chunk = queue->in_progress = NULL; } else { /* Nothing to do. Next chunk in the packet, please. */ ch = (struct sctp_chunkhdr *)chunk->chunk_end; /* Force chunk->skb->data to chunk->chunk_end. */ skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); /* We are guaranteed to pull a SCTP header. */ } } /* Do we need to take the next packet out of the queue to process? */ if (!chunk) { struct list_head *entry; next_chunk: /* Is the queue empty? */ entry = sctp_list_dequeue(&queue->in_chunk_list); if (!entry) return NULL; chunk = list_entry(entry, struct sctp_chunk, list); if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) { /* GSO-marked skbs but without frags, handle * them normally */ if (skb_shinfo(chunk->skb)->frag_list) chunk->head_skb = chunk->skb; /* skbs with "cover letter" */ if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) chunk->skb = skb_shinfo(chunk->skb)->frag_list; if (WARN_ON(!chunk->skb)) { __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); sctp_chunk_free(chunk); goto next_chunk; } } if (chunk->asoc) sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); queue->in_progress = chunk; new_skb: /* This is the first chunk in the packet. */ ch = (struct sctp_chunkhdr *)chunk->skb->data; chunk->singleton = 1; chunk->data_accepted = 0; chunk->pdiscard = 0; chunk->auth = 0; chunk->has_asconf = 0; chunk->end_of_packet = 0; if (chunk->head_skb) { struct sctp_input_cb *cb = SCTP_INPUT_CB(chunk->skb), *head_cb = SCTP_INPUT_CB(chunk->head_skb); cb->chunk = head_cb->chunk; cb->af = head_cb->af; } } chunk->chunk_hdr = ch; chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); skb_pull(chunk->skb, sizeof(*ch)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ if (chunk->chunk_end + sizeof(*ch) <= skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { /* Discard inside state machine. */ chunk->pdiscard = 1; chunk->chunk_end = skb_tail_pointer(chunk->skb); } else { /* We are at the end of the packet, so mark the chunk * in case we need to send a SACK. */ chunk->end_of_packet = 1; } pr_debug("+++sctp_inq_pop+++ chunk:%p[%s], length:%d, skb->len:%d\n", chunk, sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), ntohs(chunk->chunk_hdr->length), chunk->skb->len); return chunk; } /* Set a top-half handler. * * Originally, we the top-half handler was scheduled as a BH. We now * call the handler directly in sctp_inq_push() at a time that * we know we are lock safe. * The intent is that this routine will pull stuff out of the * inqueue and process it. */ void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback) { INIT_WORK(&q->immediate, callback); }
14 13 13 1 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ufs/util.c * * Copyright (C) 1998 * Daniel Pirkl <daniel.pirkl@email.cz> * Charles University, Faculty of Mathematics and Physics */ #include <linux/string.h> #include <linux/slab.h> #include <linux/buffer_head.h> #include "ufs_fs.h" #include "ufs.h" #include "swab.h" #include "util.h" struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi, struct super_block *sb, u64 fragment, u64 size) { struct ufs_buffer_head * ubh; unsigned i, j ; u64 count = 0; if (size & ~uspi->s_fmask) return NULL; count = size >> uspi->s_fshift; if (count > UFS_MAXFRAG) return NULL; ubh = kmalloc (sizeof (struct ufs_buffer_head), GFP_NOFS); if (!ubh) return NULL; ubh->fragment = fragment; ubh->count = count; for (i = 0; i < count; i++) if (!(ubh->bh[i] = sb_bread(sb, fragment + i))) goto failed; for (; i < UFS_MAXFRAG; i++) ubh->bh[i] = NULL; return ubh; failed: for (j = 0; j < i; j++) brelse (ubh->bh[j]); kfree(ubh); return NULL; } struct ufs_buffer_head * ubh_bread_uspi (struct ufs_sb_private_info * uspi, struct super_block *sb, u64 fragment, u64 size) { unsigned i, j; u64 count = 0; if (size & ~uspi->s_fmask) return NULL; count = size >> uspi->s_fshift; if (count <= 0 || count > UFS_MAXFRAG) return NULL; USPI_UBH(uspi)->fragment = fragment; USPI_UBH(uspi)->count = count; for (i = 0; i < count; i++) if (!(USPI_UBH(uspi)->bh[i] = sb_bread(sb, fragment + i))) goto failed; for (; i < UFS_MAXFRAG; i++) USPI_UBH(uspi)->bh[i] = NULL; return USPI_UBH(uspi); failed: for (j = 0; j < i; j++) brelse (USPI_UBH(uspi)->bh[j]); return NULL; } void ubh_brelse (struct ufs_buffer_head * ubh) { unsigned i; if (!ubh) return; for (i = 0; i < ubh->count; i++) brelse (ubh->bh[i]); kfree (ubh); } void ubh_brelse_uspi (struct ufs_sb_private_info * uspi) { unsigned i; if (!USPI_UBH(uspi)) return; for ( i = 0; i < USPI_UBH(uspi)->count; i++ ) { brelse (USPI_UBH(uspi)->bh[i]); USPI_UBH(uspi)->bh[i] = NULL; } } void ubh_mark_buffer_dirty (struct ufs_buffer_head * ubh) { unsigned i; if (!ubh) return; for ( i = 0; i < ubh->count; i++ ) mark_buffer_dirty (ubh->bh[i]); } void ubh_sync_block(struct ufs_buffer_head *ubh) { if (ubh) { unsigned i; for (i = 0; i < ubh->count; i++) write_dirty_buffer(ubh->bh[i], 0); for (i = 0; i < ubh->count; i++) wait_on_buffer(ubh->bh[i]); } } void ubh_bforget (struct ufs_buffer_head * ubh) { unsigned i; if (!ubh) return; for ( i = 0; i < ubh->count; i++ ) if ( ubh->bh[i] ) bforget (ubh->bh[i]); } int ubh_buffer_dirty (struct ufs_buffer_head * ubh) { unsigned i; unsigned result = 0; if (!ubh) return 0; for ( i = 0; i < ubh->count; i++ ) result |= buffer_dirty(ubh->bh[i]); return result; } dev_t ufs_get_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi) { __u32 fs32; dev_t dev; if ((UFS_SB(sb)->s_flags & UFS_ST_MASK) == UFS_ST_SUNx86) fs32 = fs32_to_cpu(sb, ufsi->i_u1.i_data[1]); else fs32 = fs32_to_cpu(sb, ufsi->i_u1.i_data[0]); switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUNx86: case UFS_ST_SUN: if ((fs32 & 0xffff0000) == 0 || (fs32 & 0xffff0000) == 0xffff0000) dev = old_decode_dev(fs32 & 0x7fff); else dev = MKDEV(sysv_major(fs32), sysv_minor(fs32)); break; default: dev = old_decode_dev(fs32); break; } return dev; } void ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev) { __u32 fs32; switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUNx86: case UFS_ST_SUN: fs32 = sysv_encode_dev(dev); if ((fs32 & 0xffff8000) == 0) { fs32 = old_encode_dev(dev); } break; default: fs32 = old_encode_dev(dev); break; } if ((UFS_SB(sb)->s_flags & UFS_ST_MASK) == UFS_ST_SUNx86) ufsi->i_u1.i_data[1] = cpu_to_fs32(sb, fs32); else ufsi->i_u1.i_data[0] = cpu_to_fs32(sb, fs32); } /** * ufs_get_locked_folio() - locate, pin and lock a pagecache folio, if not exist * read it from disk. * @mapping: the address_space to search * @index: the page index * * Locates the desired pagecache folio, if not exist we'll read it, * locks it, increments its reference * count and returns its address. * */ struct folio *ufs_get_locked_folio(struct address_space *mapping, pgoff_t index) { struct inode *inode = mapping->host; struct folio *folio = filemap_lock_folio(mapping, index); if (IS_ERR(folio)) { folio = read_mapping_folio(mapping, index, NULL); if (IS_ERR(folio)) { printk(KERN_ERR "ufs_change_blocknr: read_mapping_folio error: ino %lu, index: %lu\n", mapping->host->i_ino, index); return folio; } folio_lock(folio); if (unlikely(folio->mapping == NULL)) { /* Truncate got there first */ folio_unlock(folio); folio_put(folio); return NULL; } } if (!folio_buffers(folio)) create_empty_buffers(folio, 1 << inode->i_blkbits, 0); return folio; }
23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 /* * Copyright (c) 2007-2011 Atheros Communications Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "core.h" #include "debug.h" #include "hif-ops.h" #define HTC_PACKET_CONTAINER_ALLOCATION 32 #define HTC_CONTROL_BUFFER_SIZE (HTC_MAX_CTRL_MSG_LEN + HTC_HDR_LENGTH) static int ath6kl_htc_pipe_tx(struct htc_target *handle, struct htc_packet *packet); static void ath6kl_htc_pipe_cleanup(struct htc_target *handle); /* htc pipe tx path */ static inline void restore_tx_packet(struct htc_packet *packet) { if (packet->info.tx.flags & HTC_FLAGS_TX_FIXUP_NETBUF) { skb_pull(packet->skb, sizeof(struct htc_frame_hdr)); packet->info.tx.flags &= ~HTC_FLAGS_TX_FIXUP_NETBUF; } } static void do_send_completion(struct htc_endpoint *ep, struct list_head *queue_to_indicate) { struct htc_packet *packet; if (list_empty(queue_to_indicate)) { /* nothing to indicate */ return; } if (ep->ep_cb.tx_comp_multi != NULL) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: calling ep %d, send complete multiple callback (%d pkts)\n", __func__, ep->eid, get_queue_depth(queue_to_indicate)); /* * a multiple send complete handler is being used, * pass the queue to the handler */ ep->ep_cb.tx_comp_multi(ep->target, queue_to_indicate); /* * all packets are now owned by the callback, * reset queue to be safe */ INIT_LIST_HEAD(queue_to_indicate); } else { /* using legacy EpTxComplete */ do { packet = list_first_entry(queue_to_indicate, struct htc_packet, list); list_del(&packet->list); ath6kl_dbg(ATH6KL_DBG_HTC, "%s: calling ep %d send complete callback on packet 0x%p\n", __func__, ep->eid, packet); ep->ep_cb.tx_complete(ep->target, packet); } while (!list_empty(queue_to_indicate)); } } static void send_packet_completion(struct htc_target *target, struct htc_packet *packet) { struct htc_endpoint *ep = &target->endpoint[packet->endpoint]; struct list_head container; restore_tx_packet(packet); INIT_LIST_HEAD(&container); list_add_tail(&packet->list, &container); /* do completion */ do_send_completion(ep, &container); } static void get_htc_packet_credit_based(struct htc_target *target, struct htc_endpoint *ep, struct list_head *queue) { int credits_required; int remainder; u8 send_flags; struct htc_packet *packet; unsigned int transfer_len; /* NOTE : the TX lock is held when this function is called */ /* loop until we can grab as many packets out of the queue as we can */ while (true) { send_flags = 0; if (list_empty(&ep->txq)) break; /* get packet at head, but don't remove it */ packet = list_first_entry(&ep->txq, struct htc_packet, list); ath6kl_dbg(ATH6KL_DBG_HTC, "%s: got head packet:0x%p , queue depth: %d\n", __func__, packet, get_queue_depth(&ep->txq)); transfer_len = packet->act_len + HTC_HDR_LENGTH; if (transfer_len <= target->tgt_cred_sz) { credits_required = 1; } else { /* figure out how many credits this message requires */ credits_required = transfer_len / target->tgt_cred_sz; remainder = transfer_len % target->tgt_cred_sz; if (remainder) credits_required++; } ath6kl_dbg(ATH6KL_DBG_HTC, "%s: creds required:%d got:%d\n", __func__, credits_required, ep->cred_dist.credits); if (ep->eid == ENDPOINT_0) { /* * endpoint 0 is special, it always has a credit and * does not require credit based flow control */ credits_required = 0; } else { if (ep->cred_dist.credits < credits_required) break; ep->cred_dist.credits -= credits_required; ep->ep_st.cred_cosumd += credits_required; /* check if we need credits back from the target */ if (ep->cred_dist.credits < ep->cred_dist.cred_per_msg) { /* tell the target we need credits ASAP! */ send_flags |= HTC_FLAGS_NEED_CREDIT_UPDATE; ep->ep_st.cred_low_indicate += 1; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: host needs credits\n", __func__); } } /* now we can fully dequeue */ packet = list_first_entry(&ep->txq, struct htc_packet, list); list_del(&packet->list); /* save the number of credits this packet consumed */ packet->info.tx.cred_used = credits_required; /* save send flags */ packet->info.tx.flags = send_flags; packet->info.tx.seqno = ep->seqno; ep->seqno++; /* queue this packet into the caller's queue */ list_add_tail(&packet->list, queue); } } static void get_htc_packet(struct htc_target *target, struct htc_endpoint *ep, struct list_head *queue, int resources) { struct htc_packet *packet; /* NOTE : the TX lock is held when this function is called */ /* loop until we can grab as many packets out of the queue as we can */ while (resources) { if (list_empty(&ep->txq)) break; packet = list_first_entry(&ep->txq, struct htc_packet, list); list_del(&packet->list); ath6kl_dbg(ATH6KL_DBG_HTC, "%s: got packet:0x%p , new queue depth: %d\n", __func__, packet, get_queue_depth(&ep->txq)); packet->info.tx.seqno = ep->seqno; packet->info.tx.flags = 0; packet->info.tx.cred_used = 0; ep->seqno++; /* queue this packet into the caller's queue */ list_add_tail(&packet->list, queue); resources--; } } static int htc_issue_packets(struct htc_target *target, struct htc_endpoint *ep, struct list_head *pkt_queue) { int status = 0; u16 payload_len; struct sk_buff *skb; struct htc_frame_hdr *htc_hdr; struct htc_packet *packet; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: queue: 0x%p, pkts %d\n", __func__, pkt_queue, get_queue_depth(pkt_queue)); while (!list_empty(pkt_queue)) { packet = list_first_entry(pkt_queue, struct htc_packet, list); list_del(&packet->list); skb = packet->skb; if (!skb) { WARN_ON_ONCE(1); status = -EINVAL; break; } payload_len = packet->act_len; /* setup HTC frame header */ htc_hdr = skb_push(skb, sizeof(*htc_hdr)); if (!htc_hdr) { WARN_ON_ONCE(1); status = -EINVAL; break; } packet->info.tx.flags |= HTC_FLAGS_TX_FIXUP_NETBUF; put_unaligned_le16(payload_len, &htc_hdr->payld_len); htc_hdr->flags = packet->info.tx.flags; htc_hdr->eid = (u8) packet->endpoint; htc_hdr->ctrl[0] = 0; htc_hdr->ctrl[1] = (u8) packet->info.tx.seqno; spin_lock_bh(&target->tx_lock); /* store in look up queue to match completions */ list_add_tail(&packet->list, &ep->pipe.tx_lookup_queue); ep->ep_st.tx_issued += 1; spin_unlock_bh(&target->tx_lock); status = ath6kl_hif_pipe_send(target->dev->ar, ep->pipe.pipeid_ul, NULL, skb); if (status != 0) { if (status != -ENOMEM) { /* TODO: if more than 1 endpoint maps to the * same PipeID, it is possible to run out of * resources in the HIF layer. * Don't emit the error */ ath6kl_dbg(ATH6KL_DBG_HTC, "%s: failed status:%d\n", __func__, status); } spin_lock_bh(&target->tx_lock); list_del(&packet->list); /* reclaim credits */ ep->cred_dist.credits += packet->info.tx.cred_used; spin_unlock_bh(&target->tx_lock); /* put it back into the callers queue */ list_add(&packet->list, pkt_queue); break; } } if (status != 0) { while (!list_empty(pkt_queue)) { if (status != -ENOMEM) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: failed pkt:0x%p status:%d\n", __func__, packet, status); } packet = list_first_entry(pkt_queue, struct htc_packet, list); list_del(&packet->list); packet->status = status; send_packet_completion(target, packet); } } return status; } static enum htc_send_queue_result htc_try_send(struct htc_target *target, struct htc_endpoint *ep, struct list_head *txq) { struct list_head send_queue; /* temp queue to hold packets */ struct htc_packet *packet, *tmp_pkt; struct ath6kl *ar = target->dev->ar; enum htc_send_full_action action; int tx_resources, overflow, txqueue_depth, i, good_pkts; u8 pipeid; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: (queue:0x%p depth:%d)\n", __func__, txq, (txq == NULL) ? 0 : get_queue_depth(txq)); /* init the local send queue */ INIT_LIST_HEAD(&send_queue); /* * txq equals to NULL means * caller didn't provide a queue, just wants us to * check queues and send */ if (txq != NULL) { if (list_empty(txq)) { /* empty queue */ return HTC_SEND_QUEUE_DROP; } spin_lock_bh(&target->tx_lock); txqueue_depth = get_queue_depth(&ep->txq); spin_unlock_bh(&target->tx_lock); if (txqueue_depth >= ep->max_txq_depth) { /* we've already overflowed */ overflow = get_queue_depth(txq); } else { /* get how much we will overflow by */ overflow = txqueue_depth; overflow += get_queue_depth(txq); /* get how much we will overflow the TX queue by */ overflow -= ep->max_txq_depth; } /* if overflow is negative or zero, we are okay */ if (overflow > 0) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: Endpoint %d, TX queue will overflow :%d, Tx Depth:%d, Max:%d\n", __func__, ep->eid, overflow, txqueue_depth, ep->max_txq_depth); } if ((overflow <= 0) || (ep->ep_cb.tx_full == NULL)) { /* * all packets will fit or caller did not provide send * full indication handler -- just move all of them * to the local send_queue object */ list_splice_tail_init(txq, &send_queue); } else { good_pkts = get_queue_depth(txq) - overflow; if (good_pkts < 0) { WARN_ON_ONCE(1); return HTC_SEND_QUEUE_DROP; } /* we have overflowed, and a callback is provided */ /* dequeue all non-overflow packets to the sendqueue */ for (i = 0; i < good_pkts; i++) { /* pop off caller's queue */ packet = list_first_entry(txq, struct htc_packet, list); /* move to local queue */ list_move_tail(&packet->list, &send_queue); } /* * the caller's queue has all the packets that won't fit * walk through the caller's queue and indicate each to * the send full handler */ list_for_each_entry_safe(packet, tmp_pkt, txq, list) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: Indicate overflowed TX pkts: %p\n", __func__, packet); action = ep->ep_cb.tx_full(ep->target, packet); if (action == HTC_SEND_FULL_DROP) { /* callback wants the packet dropped */ ep->ep_st.tx_dropped += 1; /* leave this one in the caller's queue * for cleanup */ } else { /* callback wants to keep this packet, * move from caller's queue to the send * queue */ list_move_tail(&packet->list, &send_queue); } } if (list_empty(&send_queue)) { /* no packets made it in, caller will cleanup */ return HTC_SEND_QUEUE_DROP; } } } if (!ep->pipe.tx_credit_flow_enabled) { tx_resources = ath6kl_hif_pipe_get_free_queue_number(ar, ep->pipe.pipeid_ul); } else { tx_resources = 0; } spin_lock_bh(&target->tx_lock); if (!list_empty(&send_queue)) { /* transfer packets to tail */ list_splice_tail_init(&send_queue, &ep->txq); if (!list_empty(&send_queue)) { WARN_ON_ONCE(1); spin_unlock_bh(&target->tx_lock); return HTC_SEND_QUEUE_DROP; } INIT_LIST_HEAD(&send_queue); } /* increment tx processing count on entry */ ep->tx_proc_cnt++; if (ep->tx_proc_cnt > 1) { /* * Another thread or task is draining the TX queues on this * endpoint that thread will reset the tx processing count * when the queue is drained. */ ep->tx_proc_cnt--; spin_unlock_bh(&target->tx_lock); return HTC_SEND_QUEUE_OK; } /***** beyond this point only 1 thread may enter ******/ /* * Now drain the endpoint TX queue for transmission as long as we have * enough transmit resources. */ while (true) { if (get_queue_depth(&ep->txq) == 0) break; if (ep->pipe.tx_credit_flow_enabled) { /* * Credit based mechanism provides flow control * based on target transmit resource availability, * we assume that the HIF layer will always have * bus resources greater than target transmit * resources. */ get_htc_packet_credit_based(target, ep, &send_queue); } else { /* * Get all packets for this endpoint that we can * for this pass. */ get_htc_packet(target, ep, &send_queue, tx_resources); } if (get_queue_depth(&send_queue) == 0) { /* * Didn't get packets due to out of resources or TX * queue was drained. */ break; } spin_unlock_bh(&target->tx_lock); /* send what we can */ htc_issue_packets(target, ep, &send_queue); if (!ep->pipe.tx_credit_flow_enabled) { pipeid = ep->pipe.pipeid_ul; tx_resources = ath6kl_hif_pipe_get_free_queue_number(ar, pipeid); } spin_lock_bh(&target->tx_lock); } /* done with this endpoint, we can clear the count */ ep->tx_proc_cnt = 0; spin_unlock_bh(&target->tx_lock); return HTC_SEND_QUEUE_OK; } /* htc control packet manipulation */ static void destroy_htc_txctrl_packet(struct htc_packet *packet) { struct sk_buff *skb; skb = packet->skb; dev_kfree_skb(skb); kfree(packet); } static struct htc_packet *build_htc_txctrl_packet(void) { struct htc_packet *packet = NULL; struct sk_buff *skb; packet = kzalloc(sizeof(struct htc_packet), GFP_KERNEL); if (packet == NULL) return NULL; skb = __dev_alloc_skb(HTC_CONTROL_BUFFER_SIZE, GFP_KERNEL); if (skb == NULL) { kfree(packet); return NULL; } packet->skb = skb; return packet; } static void htc_free_txctrl_packet(struct htc_target *target, struct htc_packet *packet) { destroy_htc_txctrl_packet(packet); } static struct htc_packet *htc_alloc_txctrl_packet(struct htc_target *target) { return build_htc_txctrl_packet(); } static void htc_txctrl_complete(struct htc_target *target, struct htc_packet *packet) { htc_free_txctrl_packet(target, packet); } #define MAX_MESSAGE_SIZE 1536 static int htc_setup_target_buffer_assignments(struct htc_target *target) { int status, credits, credit_per_maxmsg, i; struct htc_pipe_txcredit_alloc *entry; unsigned int hif_usbaudioclass = 0; credit_per_maxmsg = MAX_MESSAGE_SIZE / target->tgt_cred_sz; if (MAX_MESSAGE_SIZE % target->tgt_cred_sz) credit_per_maxmsg++; /* TODO, this should be configured by the caller! */ credits = target->tgt_creds; entry = &target->pipe.txcredit_alloc[0]; status = -ENOMEM; /* FIXME: hif_usbaudioclass is always zero */ if (hif_usbaudioclass) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: For USB Audio Class- Total:%d\n", __func__, credits); entry++; entry++; /* Setup VO Service To have Max Credits */ entry->service_id = WMI_DATA_VO_SVC; entry->credit_alloc = (credits - 6); if (entry->credit_alloc == 0) entry->credit_alloc++; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_CONTROL_SVC; entry->credit_alloc = credit_per_maxmsg; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; /* leftovers go to best effort */ entry++; entry++; entry->service_id = WMI_DATA_BE_SVC; entry->credit_alloc = (u8) credits; status = 0; } else { entry++; entry->service_id = WMI_DATA_VI_SVC; entry->credit_alloc = credits / 4; if (entry->credit_alloc == 0) entry->credit_alloc++; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_DATA_VO_SVC; entry->credit_alloc = credits / 4; if (entry->credit_alloc == 0) entry->credit_alloc++; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_CONTROL_SVC; entry->credit_alloc = credit_per_maxmsg; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_DATA_BK_SVC; entry->credit_alloc = credit_per_maxmsg; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; /* leftovers go to best effort */ entry++; entry->service_id = WMI_DATA_BE_SVC; entry->credit_alloc = (u8) credits; status = 0; } if (status == 0) { for (i = 0; i < ENDPOINT_MAX; i++) { if (target->pipe.txcredit_alloc[i].service_id != 0) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Service Index : %d TX : 0x%2.2X : alloc:%d\n", i, target->pipe.txcredit_alloc[i]. service_id, target->pipe.txcredit_alloc[i]. credit_alloc); } } } return status; } /* process credit reports and call distribution function */ static void htc_process_credit_report(struct htc_target *target, struct htc_credit_report *rpt, int num_entries, enum htc_endpoint_id from_ep) { int total_credits = 0, i; struct htc_endpoint *ep; /* lock out TX while we update credits */ spin_lock_bh(&target->tx_lock); for (i = 0; i < num_entries; i++, rpt++) { if (rpt->eid >= ENDPOINT_MAX) { WARN_ON_ONCE(1); spin_unlock_bh(&target->tx_lock); return; } ep = &target->endpoint[rpt->eid]; ep->cred_dist.credits += rpt->credits; if (ep->cred_dist.credits && get_queue_depth(&ep->txq)) { spin_unlock_bh(&target->tx_lock); htc_try_send(target, ep, NULL); spin_lock_bh(&target->tx_lock); } total_credits += rpt->credits; } ath6kl_dbg(ATH6KL_DBG_HTC, "Report indicated %d credits to distribute\n", total_credits); spin_unlock_bh(&target->tx_lock); } /* flush endpoint TX queue */ static void htc_flush_tx_endpoint(struct htc_target *target, struct htc_endpoint *ep, u16 tag) { struct htc_packet *packet; spin_lock_bh(&target->tx_lock); while (get_queue_depth(&ep->txq)) { packet = list_first_entry(&ep->txq, struct htc_packet, list); list_del(&packet->list); packet->status = 0; send_packet_completion(target, packet); } spin_unlock_bh(&target->tx_lock); } /* * In the adapted HIF layer, struct sk_buff * are passed between HIF and HTC, * since upper layers expects struct htc_packet containers we use the completed * skb and lookup it's corresponding HTC packet buffer from a lookup list. * This is extra overhead that can be fixed by re-aligning HIF interfaces with * HTC. */ static struct htc_packet *htc_lookup_tx_packet(struct htc_target *target, struct htc_endpoint *ep, struct sk_buff *skb) { struct htc_packet *packet, *tmp_pkt, *found_packet = NULL; spin_lock_bh(&target->tx_lock); /* * iterate from the front of tx lookup queue * this lookup should be fast since lower layers completes in-order and * so the completed packet should be at the head of the list generally */ list_for_each_entry_safe(packet, tmp_pkt, &ep->pipe.tx_lookup_queue, list) { /* check for removal */ if (skb == packet->skb) { /* found it */ list_del(&packet->list); found_packet = packet; break; } } spin_unlock_bh(&target->tx_lock); return found_packet; } static int ath6kl_htc_pipe_tx_complete(struct ath6kl *ar, struct sk_buff *skb) { struct htc_target *target = ar->htc_target; struct htc_frame_hdr *htc_hdr; struct htc_endpoint *ep; struct htc_packet *packet; u8 ep_id, *netdata; netdata = skb->data; htc_hdr = (struct htc_frame_hdr *) netdata; ep_id = htc_hdr->eid; ep = &target->endpoint[ep_id]; packet = htc_lookup_tx_packet(target, ep, skb); if (packet == NULL) { /* may have already been flushed and freed */ ath6kl_err("HTC TX lookup failed!\n"); } else { /* will be giving this buffer back to upper layers */ packet->status = 0; send_packet_completion(target, packet); } skb = NULL; if (!ep->pipe.tx_credit_flow_enabled) { /* * note: when using TX credit flow, the re-checking of queues * happens when credits flow back from the target. in the * non-TX credit case, we recheck after the packet completes */ htc_try_send(target, ep, NULL); } return 0; } static int htc_send_packets_multiple(struct htc_target *target, struct list_head *pkt_queue) { struct htc_endpoint *ep; struct htc_packet *packet, *tmp_pkt; if (list_empty(pkt_queue)) return -EINVAL; /* get first packet to find out which ep the packets will go into */ packet = list_first_entry(pkt_queue, struct htc_packet, list); if (packet->endpoint >= ENDPOINT_MAX) { WARN_ON_ONCE(1); return -EINVAL; } ep = &target->endpoint[packet->endpoint]; htc_try_send(target, ep, pkt_queue); /* do completion on any packets that couldn't get in */ if (!list_empty(pkt_queue)) { list_for_each_entry_safe(packet, tmp_pkt, pkt_queue, list) { packet->status = -ENOMEM; } do_send_completion(ep, pkt_queue); } return 0; } /* htc pipe rx path */ static struct htc_packet *alloc_htc_packet_container(struct htc_target *target) { struct htc_packet *packet; spin_lock_bh(&target->rx_lock); if (target->pipe.htc_packet_pool == NULL) { spin_unlock_bh(&target->rx_lock); return NULL; } packet = target->pipe.htc_packet_pool; target->pipe.htc_packet_pool = (struct htc_packet *) packet->list.next; spin_unlock_bh(&target->rx_lock); packet->list.next = NULL; return packet; } static void free_htc_packet_container(struct htc_target *target, struct htc_packet *packet) { struct list_head *lh; spin_lock_bh(&target->rx_lock); if (target->pipe.htc_packet_pool == NULL) { target->pipe.htc_packet_pool = packet; packet->list.next = NULL; } else { lh = (struct list_head *) target->pipe.htc_packet_pool; packet->list.next = lh; target->pipe.htc_packet_pool = packet; } spin_unlock_bh(&target->rx_lock); } static int htc_process_trailer(struct htc_target *target, u8 *buffer, int len, enum htc_endpoint_id from_ep) { struct htc_credit_report *report; struct htc_record_hdr *record; u8 *record_buf; int status = 0; while (len > 0) { if (len < sizeof(struct htc_record_hdr)) { status = -EINVAL; break; } /* these are byte aligned structs */ record = (struct htc_record_hdr *) buffer; len -= sizeof(struct htc_record_hdr); buffer += sizeof(struct htc_record_hdr); if (record->len > len) { /* no room left in buffer for record */ ath6kl_dbg(ATH6KL_DBG_HTC, "invalid length: %d (id:%d) buffer has: %d bytes left\n", record->len, record->rec_id, len); status = -EINVAL; break; } /* start of record follows the header */ record_buf = buffer; switch (record->rec_id) { case HTC_RECORD_CREDITS: if (record->len < sizeof(struct htc_credit_report)) { WARN_ON_ONCE(1); return -EINVAL; } report = (struct htc_credit_report *) record_buf; htc_process_credit_report(target, report, record->len / sizeof(*report), from_ep); break; default: ath6kl_dbg(ATH6KL_DBG_HTC, "unhandled record: id:%d length:%d\n", record->rec_id, record->len); break; } /* advance buffer past this record for next time around */ buffer += record->len; len -= record->len; } return status; } static void do_recv_completion(struct htc_endpoint *ep, struct list_head *queue_to_indicate) { struct htc_packet *packet; if (list_empty(queue_to_indicate)) { /* nothing to indicate */ return; } /* using legacy EpRecv */ while (!list_empty(queue_to_indicate)) { packet = list_first_entry(queue_to_indicate, struct htc_packet, list); list_del(&packet->list); ep->ep_cb.rx(ep->target, packet); } return; } static void recv_packet_completion(struct htc_target *target, struct htc_endpoint *ep, struct htc_packet *packet) { struct list_head container; INIT_LIST_HEAD(&container); list_add_tail(&packet->list, &container); /* do completion */ do_recv_completion(ep, &container); } static int ath6kl_htc_pipe_rx_complete(struct ath6kl *ar, struct sk_buff *skb, u8 pipeid) { struct htc_target *target = ar->htc_target; u8 *netdata, *trailer, hdr_info; struct htc_frame_hdr *htc_hdr; u32 netlen, trailerlen = 0; struct htc_packet *packet; struct htc_endpoint *ep; u16 payload_len; int status = 0; /* * ar->htc_target can be NULL due to a race condition that can occur * during driver initialization(we do 'ath6kl_hif_power_on' before * initializing 'ar->htc_target' via 'ath6kl_htc_create'). * 'ath6kl_hif_power_on' assigns 'ath6kl_recv_complete' as * usb_complete_t/callback function for 'usb_fill_bulk_urb'. * Thus the possibility of ar->htc_target being NULL * via ath6kl_recv_complete -> ath6kl_usb_io_comp_work. */ if (!target) { ath6kl_dbg(ATH6KL_DBG_HTC, "Target not yet initialized\n"); status = -EINVAL; goto free_skb; } netdata = skb->data; netlen = skb->len; htc_hdr = (struct htc_frame_hdr *) netdata; if (htc_hdr->eid >= ENDPOINT_MAX) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Rx: invalid EndpointID=%d\n", htc_hdr->eid); status = -EINVAL; goto free_skb; } ep = &target->endpoint[htc_hdr->eid]; payload_len = le16_to_cpu(get_unaligned(&htc_hdr->payld_len)); if (netlen < (payload_len + HTC_HDR_LENGTH)) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Rx: insufficient length, got:%d expected =%zu\n", netlen, payload_len + HTC_HDR_LENGTH); status = -EINVAL; goto free_skb; } /* get flags to check for trailer */ hdr_info = htc_hdr->flags; if (hdr_info & HTC_FLG_RX_TRAILER) { /* extract the trailer length */ hdr_info = htc_hdr->ctrl[0]; if ((hdr_info < sizeof(struct htc_record_hdr)) || (hdr_info > payload_len)) { ath6kl_dbg(ATH6KL_DBG_HTC, "invalid header: payloadlen should be %d, CB[0]: %d\n", payload_len, hdr_info); status = -EINVAL; goto free_skb; } trailerlen = hdr_info; /* process trailer after hdr/apps payload */ trailer = (u8 *) htc_hdr + HTC_HDR_LENGTH + payload_len - hdr_info; status = htc_process_trailer(target, trailer, hdr_info, htc_hdr->eid); if (status != 0) goto free_skb; } if (((int) payload_len - (int) trailerlen) <= 0) { /* zero length packet with trailer, just drop these */ goto free_skb; } if (htc_hdr->eid == ENDPOINT_0) { /* handle HTC control message */ if (target->htc_flags & HTC_OP_STATE_SETUP_COMPLETE) { /* * fatal: target should not send unsolicited * messageson the endpoint 0 */ ath6kl_dbg(ATH6KL_DBG_HTC, "HTC ignores Rx Ctrl after setup complete\n"); status = -EINVAL; goto free_skb; } /* remove HTC header */ skb_pull(skb, HTC_HDR_LENGTH); netdata = skb->data; netlen = skb->len; spin_lock_bh(&target->rx_lock); target->pipe.ctrl_response_valid = true; target->pipe.ctrl_response_len = min_t(int, netlen, HTC_MAX_CTRL_MSG_LEN); memcpy(target->pipe.ctrl_response_buf, netdata, target->pipe.ctrl_response_len); spin_unlock_bh(&target->rx_lock); dev_kfree_skb(skb); skb = NULL; goto free_skb; } /* * TODO: the message based HIF architecture allocates net bufs * for recv packets since it bridges that HIF to upper layers, * which expects HTC packets, we form the packets here */ packet = alloc_htc_packet_container(target); if (packet == NULL) { status = -ENOMEM; goto free_skb; } packet->status = 0; packet->endpoint = htc_hdr->eid; packet->pkt_cntxt = skb; /* TODO: for backwards compatibility */ packet->buf = skb_push(skb, 0) + HTC_HDR_LENGTH; packet->act_len = netlen - HTC_HDR_LENGTH - trailerlen; /* * TODO: this is a hack because the driver layer will set the * actual len of the skb again which will just double the len */ skb_trim(skb, 0); recv_packet_completion(target, ep, packet); /* recover the packet container */ free_htc_packet_container(target, packet); skb = NULL; free_skb: dev_kfree_skb(skb); return status; } static void htc_flush_rx_queue(struct htc_target *target, struct htc_endpoint *ep) { struct list_head container; struct htc_packet *packet; spin_lock_bh(&target->rx_lock); while (1) { if (list_empty(&ep->rx_bufq)) break; packet = list_first_entry(&ep->rx_bufq, struct htc_packet, list); list_del(&packet->list); spin_unlock_bh(&target->rx_lock); packet->status = -ECANCELED; packet->act_len = 0; ath6kl_dbg(ATH6KL_DBG_HTC, "Flushing RX packet:0x%p, length:%d, ep:%d\n", packet, packet->buf_len, packet->endpoint); INIT_LIST_HEAD(&container); list_add_tail(&packet->list, &container); /* give the packet back */ do_recv_completion(ep, &container); spin_lock_bh(&target->rx_lock); } spin_unlock_bh(&target->rx_lock); } /* polling routine to wait for a control packet to be received */ static int htc_wait_recv_ctrl_message(struct htc_target *target) { int count = HTC_TARGET_RESPONSE_POLL_COUNT; while (count > 0) { spin_lock_bh(&target->rx_lock); if (target->pipe.ctrl_response_valid) { target->pipe.ctrl_response_valid = false; spin_unlock_bh(&target->rx_lock); break; } spin_unlock_bh(&target->rx_lock); count--; msleep_interruptible(HTC_TARGET_RESPONSE_POLL_WAIT); } if (count <= 0) { ath6kl_warn("htc pipe control receive timeout!\n"); return -ETIMEDOUT; } return 0; } static void htc_rxctrl_complete(struct htc_target *context, struct htc_packet *packet) { struct sk_buff *skb = packet->skb; if (packet->endpoint == ENDPOINT_0 && packet->status == -ECANCELED && skb != NULL) dev_kfree_skb(skb); } /* htc pipe initialization */ static void reset_endpoint_states(struct htc_target *target) { struct htc_endpoint *ep; int i; for (i = ENDPOINT_0; i < ENDPOINT_MAX; i++) { ep = &target->endpoint[i]; ep->svc_id = 0; ep->len_max = 0; ep->max_txq_depth = 0; ep->eid = i; INIT_LIST_HEAD(&ep->txq); INIT_LIST_HEAD(&ep->pipe.tx_lookup_queue); INIT_LIST_HEAD(&ep->rx_bufq); ep->target = target; ep->pipe.tx_credit_flow_enabled = true; } } /* start HTC, this is called after all services are connected */ static int htc_config_target_hif_pipe(struct htc_target *target) { return 0; } /* htc service functions */ static u8 htc_get_credit_alloc(struct htc_target *target, u16 service_id) { u8 allocation = 0; int i; for (i = 0; i < ENDPOINT_MAX; i++) { if (target->pipe.txcredit_alloc[i].service_id == service_id) allocation = target->pipe.txcredit_alloc[i].credit_alloc; } if (allocation == 0) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Service TX : 0x%2.2X : allocation is zero!\n", service_id); } return allocation; } static int ath6kl_htc_pipe_conn_service(struct htc_target *target, struct htc_service_connect_req *conn_req, struct htc_service_connect_resp *conn_resp) { struct ath6kl *ar = target->dev->ar; struct htc_packet *packet = NULL; struct htc_conn_service_resp *resp_msg; struct htc_conn_service_msg *conn_msg; enum htc_endpoint_id assigned_epid = ENDPOINT_MAX; bool disable_credit_flowctrl = false; unsigned int max_msg_size = 0; struct htc_endpoint *ep; int length, status = 0; struct sk_buff *skb; u8 tx_alloc; u16 flags; if (conn_req->svc_id == 0) { WARN_ON_ONCE(1); status = -EINVAL; goto free_packet; } if (conn_req->svc_id == HTC_CTRL_RSVD_SVC) { /* special case for pseudo control service */ assigned_epid = ENDPOINT_0; max_msg_size = HTC_MAX_CTRL_MSG_LEN; tx_alloc = 0; } else { tx_alloc = htc_get_credit_alloc(target, conn_req->svc_id); if (tx_alloc == 0) { status = -ENOMEM; goto free_packet; } /* allocate a packet to send to the target */ packet = htc_alloc_txctrl_packet(target); if (packet == NULL) { WARN_ON_ONCE(1); status = -ENOMEM; goto free_packet; } skb = packet->skb; length = sizeof(struct htc_conn_service_msg); /* assemble connect service message */ conn_msg = skb_put(skb, length); if (conn_msg == NULL) { WARN_ON_ONCE(1); status = -EINVAL; goto free_packet; } memset(conn_msg, 0, sizeof(struct htc_conn_service_msg)); conn_msg->msg_id = cpu_to_le16(HTC_MSG_CONN_SVC_ID); conn_msg->svc_id = cpu_to_le16(conn_req->svc_id); conn_msg->conn_flags = cpu_to_le16(conn_req->conn_flags & ~HTC_CONN_FLGS_SET_RECV_ALLOC_MASK); /* tell target desired recv alloc for this ep */ flags = tx_alloc << HTC_CONN_FLGS_SET_RECV_ALLOC_SHIFT; conn_msg->conn_flags |= cpu_to_le16(flags); if (conn_req->conn_flags & HTC_CONN_FLGS_DISABLE_CRED_FLOW_CTRL) { disable_credit_flowctrl = true; } set_htc_pkt_info(packet, NULL, (u8 *) conn_msg, length, ENDPOINT_0, HTC_SERVICE_TX_PACKET_TAG); status = ath6kl_htc_pipe_tx(target, packet); /* we don't own it anymore */ packet = NULL; if (status != 0) goto free_packet; /* wait for response */ status = htc_wait_recv_ctrl_message(target); if (status != 0) goto free_packet; /* we controlled the buffer creation so it has to be * properly aligned */ resp_msg = (struct htc_conn_service_resp *) target->pipe.ctrl_response_buf; if (resp_msg->msg_id != cpu_to_le16(HTC_MSG_CONN_SVC_RESP_ID) || (target->pipe.ctrl_response_len < sizeof(*resp_msg))) { /* this message is not valid */ WARN_ON_ONCE(1); status = -EINVAL; goto free_packet; } ath6kl_dbg(ATH6KL_DBG_TRC, "%s: service 0x%X conn resp: status: %d ep: %d\n", __func__, resp_msg->svc_id, resp_msg->status, resp_msg->eid); conn_resp->resp_code = resp_msg->status; /* check response status */ if (resp_msg->status != HTC_SERVICE_SUCCESS) { ath6kl_dbg(ATH6KL_DBG_HTC, "Target failed service 0x%X connect request (status:%d)\n", resp_msg->svc_id, resp_msg->status); status = -EINVAL; goto free_packet; } assigned_epid = (enum htc_endpoint_id) resp_msg->eid; max_msg_size = le16_to_cpu(resp_msg->max_msg_sz); } /* the rest are parameter checks so set the error status */ status = -EINVAL; if (assigned_epid >= ENDPOINT_MAX) { WARN_ON_ONCE(1); goto free_packet; } if (max_msg_size == 0) { WARN_ON_ONCE(1); goto free_packet; } ep = &target->endpoint[assigned_epid]; ep->eid = assigned_epid; if (ep->svc_id != 0) { /* endpoint already in use! */ WARN_ON_ONCE(1); goto free_packet; } /* return assigned endpoint to caller */ conn_resp->endpoint = assigned_epid; conn_resp->len_max = max_msg_size; /* setup the endpoint */ ep->svc_id = conn_req->svc_id; /* this marks ep in use */ ep->max_txq_depth = conn_req->max_txq_depth; ep->len_max = max_msg_size; ep->cred_dist.credits = tx_alloc; ep->cred_dist.cred_sz = target->tgt_cred_sz; ep->cred_dist.cred_per_msg = max_msg_size / target->tgt_cred_sz; if (max_msg_size % target->tgt_cred_sz) ep->cred_dist.cred_per_msg++; /* copy all the callbacks */ ep->ep_cb = conn_req->ep_cb; /* initialize tx_drop_packet_threshold */ ep->tx_drop_packet_threshold = MAX_HI_COOKIE_NUM; status = ath6kl_hif_pipe_map_service(ar, ep->svc_id, &ep->pipe.pipeid_ul, &ep->pipe.pipeid_dl); if (status != 0) goto free_packet; ath6kl_dbg(ATH6KL_DBG_HTC, "SVC Ready: 0x%4.4X: ULpipe:%d DLpipe:%d id:%d\n", ep->svc_id, ep->pipe.pipeid_ul, ep->pipe.pipeid_dl, ep->eid); if (disable_credit_flowctrl && ep->pipe.tx_credit_flow_enabled) { ep->pipe.tx_credit_flow_enabled = false; ath6kl_dbg(ATH6KL_DBG_HTC, "SVC: 0x%4.4X ep:%d TX flow control off\n", ep->svc_id, assigned_epid); } free_packet: if (packet != NULL) htc_free_txctrl_packet(target, packet); return status; } /* htc export functions */ static void *ath6kl_htc_pipe_create(struct ath6kl *ar) { int status = 0; struct htc_endpoint *ep = NULL; struct htc_target *target = NULL; struct htc_packet *packet; int i; target = kzalloc(sizeof(struct htc_target), GFP_KERNEL); if (target == NULL) { ath6kl_err("htc create unable to allocate memory\n"); status = -ENOMEM; goto fail_htc_create; } spin_lock_init(&target->htc_lock); spin_lock_init(&target->rx_lock); spin_lock_init(&target->tx_lock); reset_endpoint_states(target); for (i = 0; i < HTC_PACKET_CONTAINER_ALLOCATION; i++) { packet = kzalloc(sizeof(struct htc_packet), GFP_KERNEL); if (packet != NULL) free_htc_packet_container(target, packet); } target->dev = kzalloc(sizeof(*target->dev), GFP_KERNEL); if (!target->dev) { ath6kl_err("unable to allocate memory\n"); status = -ENOMEM; goto fail_htc_create; } target->dev->ar = ar; target->dev->htc_cnxt = target; /* Get HIF default pipe for HTC message exchange */ ep = &target->endpoint[ENDPOINT_0]; ath6kl_hif_pipe_get_default(ar, &ep->pipe.pipeid_ul, &ep->pipe.pipeid_dl); return target; fail_htc_create: if (status != 0) { if (target != NULL) ath6kl_htc_pipe_cleanup(target); target = NULL; } return target; } /* cleanup the HTC instance */ static void ath6kl_htc_pipe_cleanup(struct htc_target *target) { struct htc_packet *packet; while (true) { packet = alloc_htc_packet_container(target); if (packet == NULL) break; kfree(packet); } kfree(target->dev); /* kfree our instance */ kfree(target); } static int ath6kl_htc_pipe_start(struct htc_target *target) { struct sk_buff *skb; struct htc_setup_comp_ext_msg *setup; struct htc_packet *packet; htc_config_target_hif_pipe(target); /* allocate a buffer to send */ packet = htc_alloc_txctrl_packet(target); if (packet == NULL) { WARN_ON_ONCE(1); return -ENOMEM; } skb = packet->skb; /* assemble setup complete message */ setup = skb_put(skb, sizeof(*setup)); memset(setup, 0, sizeof(struct htc_setup_comp_ext_msg)); setup->msg_id = cpu_to_le16(HTC_MSG_SETUP_COMPLETE_EX_ID); ath6kl_dbg(ATH6KL_DBG_HTC, "HTC using TX credit flow control\n"); set_htc_pkt_info(packet, NULL, (u8 *) setup, sizeof(struct htc_setup_comp_ext_msg), ENDPOINT_0, HTC_SERVICE_TX_PACKET_TAG); target->htc_flags |= HTC_OP_STATE_SETUP_COMPLETE; return ath6kl_htc_pipe_tx(target, packet); } static void ath6kl_htc_pipe_stop(struct htc_target *target) { int i; struct htc_endpoint *ep; /* cleanup endpoints */ for (i = 0; i < ENDPOINT_MAX; i++) { ep = &target->endpoint[i]; htc_flush_rx_queue(target, ep); htc_flush_tx_endpoint(target, ep, HTC_TX_PACKET_TAG_ALL); } reset_endpoint_states(target); target->htc_flags &= ~HTC_OP_STATE_SETUP_COMPLETE; } static int ath6kl_htc_pipe_get_rxbuf_num(struct htc_target *target, enum htc_endpoint_id endpoint) { int num; spin_lock_bh(&target->rx_lock); num = get_queue_depth(&(target->endpoint[endpoint].rx_bufq)); spin_unlock_bh(&target->rx_lock); return num; } static int ath6kl_htc_pipe_tx(struct htc_target *target, struct htc_packet *packet) { struct list_head queue; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: endPointId: %d, buffer: 0x%p, length: %d\n", __func__, packet->endpoint, packet->buf, packet->act_len); INIT_LIST_HEAD(&queue); list_add_tail(&packet->list, &queue); return htc_send_packets_multiple(target, &queue); } static int ath6kl_htc_pipe_wait_target(struct htc_target *target) { struct htc_ready_ext_msg *ready_msg; struct htc_service_connect_req connect; struct htc_service_connect_resp resp; int status = 0; status = htc_wait_recv_ctrl_message(target); if (status != 0) return status; if (target->pipe.ctrl_response_len < sizeof(*ready_msg)) { ath6kl_warn("invalid htc pipe ready msg len: %d\n", target->pipe.ctrl_response_len); return -ECOMM; } ready_msg = (struct htc_ready_ext_msg *) target->pipe.ctrl_response_buf; if (ready_msg->ver2_0_info.msg_id != cpu_to_le16(HTC_MSG_READY_ID)) { ath6kl_warn("invalid htc pipe ready msg: 0x%x\n", ready_msg->ver2_0_info.msg_id); return -ECOMM; } ath6kl_dbg(ATH6KL_DBG_HTC, "Target Ready! : transmit resources : %d size:%d\n", ready_msg->ver2_0_info.cred_cnt, ready_msg->ver2_0_info.cred_sz); target->tgt_creds = le16_to_cpu(ready_msg->ver2_0_info.cred_cnt); target->tgt_cred_sz = le16_to_cpu(ready_msg->ver2_0_info.cred_sz); if ((target->tgt_creds == 0) || (target->tgt_cred_sz == 0)) return -ECOMM; htc_setup_target_buffer_assignments(target); /* setup our pseudo HTC control endpoint connection */ memset(&connect, 0, sizeof(connect)); memset(&resp, 0, sizeof(resp)); connect.ep_cb.tx_complete = htc_txctrl_complete; connect.ep_cb.rx = htc_rxctrl_complete; connect.max_txq_depth = NUM_CONTROL_TX_BUFFERS; connect.svc_id = HTC_CTRL_RSVD_SVC; /* connect fake service */ status = ath6kl_htc_pipe_conn_service(target, &connect, &resp); return status; } static void ath6kl_htc_pipe_flush_txep(struct htc_target *target, enum htc_endpoint_id endpoint, u16 tag) { struct htc_endpoint *ep = &target->endpoint[endpoint]; if (ep->svc_id == 0) { WARN_ON_ONCE(1); /* not in use.. */ return; } htc_flush_tx_endpoint(target, ep, tag); } static int ath6kl_htc_pipe_add_rxbuf_multiple(struct htc_target *target, struct list_head *pkt_queue) { struct htc_packet *packet, *tmp_pkt, *first; struct htc_endpoint *ep; int status = 0; if (list_empty(pkt_queue)) return -EINVAL; first = list_first_entry(pkt_queue, struct htc_packet, list); if (first->endpoint >= ENDPOINT_MAX) { WARN_ON_ONCE(1); return -EINVAL; } ath6kl_dbg(ATH6KL_DBG_HTC, "%s: epid: %d, cnt:%d, len: %d\n", __func__, first->endpoint, get_queue_depth(pkt_queue), first->buf_len); ep = &target->endpoint[first->endpoint]; spin_lock_bh(&target->rx_lock); /* store receive packets */ list_splice_tail_init(pkt_queue, &ep->rx_bufq); spin_unlock_bh(&target->rx_lock); if (status != 0) { /* walk through queue and mark each one canceled */ list_for_each_entry_safe(packet, tmp_pkt, pkt_queue, list) { packet->status = -ECANCELED; } do_recv_completion(ep, pkt_queue); } return status; } static void ath6kl_htc_pipe_activity_changed(struct htc_target *target, enum htc_endpoint_id ep, bool active) { /* TODO */ } static void ath6kl_htc_pipe_flush_rx_buf(struct htc_target *target) { struct htc_endpoint *endpoint; struct htc_packet *packet, *tmp_pkt; int i; for (i = ENDPOINT_0; i < ENDPOINT_MAX; i++) { endpoint = &target->endpoint[i]; spin_lock_bh(&target->rx_lock); list_for_each_entry_safe(packet, tmp_pkt, &endpoint->rx_bufq, list) { list_del(&packet->list); spin_unlock_bh(&target->rx_lock); ath6kl_dbg(ATH6KL_DBG_HTC, "htc rx flush pkt 0x%p len %d ep %d\n", packet, packet->buf_len, packet->endpoint); dev_kfree_skb(packet->pkt_cntxt); spin_lock_bh(&target->rx_lock); } spin_unlock_bh(&target->rx_lock); } } static int ath6kl_htc_pipe_credit_setup(struct htc_target *target, struct ath6kl_htc_credit_info *info) { return 0; } static const struct ath6kl_htc_ops ath6kl_htc_pipe_ops = { .create = ath6kl_htc_pipe_create, .wait_target = ath6kl_htc_pipe_wait_target, .start = ath6kl_htc_pipe_start, .conn_service = ath6kl_htc_pipe_conn_service, .tx = ath6kl_htc_pipe_tx, .stop = ath6kl_htc_pipe_stop, .cleanup = ath6kl_htc_pipe_cleanup, .flush_txep = ath6kl_htc_pipe_flush_txep, .flush_rx_buf = ath6kl_htc_pipe_flush_rx_buf, .activity_changed = ath6kl_htc_pipe_activity_changed, .get_rxbuf_num = ath6kl_htc_pipe_get_rxbuf_num, .add_rxbuf_multiple = ath6kl_htc_pipe_add_rxbuf_multiple, .credit_setup = ath6kl_htc_pipe_credit_setup, .tx_complete = ath6kl_htc_pipe_tx_complete, .rx_complete = ath6kl_htc_pipe_rx_complete, }; void ath6kl_htc_pipe_attach(struct ath6kl *ar) { ar->htc_ops = &ath6kl_htc_pipe_ops; }
4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 // SPDX-License-Identifier: GPL-2.0 /* * thermal_hwmon.c - Generic Thermal Management hwmon support. * * Code based on Intel thermal_core.c. Copyrights of the original code: * Copyright (C) 2008 Intel Corp * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> * * Copyright (C) 2013 Texas Instruments * Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com> */ #include <linux/err.h> #include <linux/export.h> #include <linux/hwmon.h> #include <linux/slab.h> #include <linux/thermal.h> #include "thermal_hwmon.h" #include "thermal_core.h" /* hwmon sys I/F */ /* thermal zone devices with the same type share one hwmon device */ struct thermal_hwmon_device { char type[THERMAL_NAME_LENGTH]; struct device *device; int count; struct list_head tz_list; struct list_head node; }; struct thermal_hwmon_attr { struct device_attribute attr; char name[16]; }; /* one temperature input for each thermal zone */ struct thermal_hwmon_temp { struct list_head hwmon_node; struct thermal_zone_device *tz; struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ }; static LIST_HEAD(thermal_hwmon_list); static DEFINE_MUTEX(thermal_hwmon_list_lock); static ssize_t temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) { int temperature; int ret; struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); struct thermal_hwmon_temp *temp = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_input); struct thermal_zone_device *tz = temp->tz; ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; return sprintf(buf, "%d\n", temperature); } static ssize_t temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); struct thermal_hwmon_temp *temp = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_crit); struct thermal_zone_device *tz = temp->tz; int temperature; int ret; guard(thermal_zone)(tz); ret = tz->ops.get_crit_temp(tz, &temperature); if (ret) return ret; return sprintf(buf, "%d\n", temperature); } static struct thermal_hwmon_device * thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; char type[THERMAL_NAME_LENGTH]; mutex_lock(&thermal_hwmon_list_lock); list_for_each_entry(hwmon, &thermal_hwmon_list, node) { strscpy(type, tz->type); strreplace(type, '-', '_'); if (!strcmp(hwmon->type, type)) { mutex_unlock(&thermal_hwmon_list_lock); return hwmon; } } mutex_unlock(&thermal_hwmon_list_lock); return NULL; } /* Find the temperature input matching a given thermal zone */ static struct thermal_hwmon_temp * thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, const struct thermal_zone_device *tz) { struct thermal_hwmon_temp *temp; mutex_lock(&thermal_hwmon_list_lock); list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) if (temp->tz == tz) { mutex_unlock(&thermal_hwmon_list_lock); return temp; } mutex_unlock(&thermal_hwmon_list_lock); return NULL; } static bool thermal_zone_crit_temp_valid(struct thermal_zone_device *tz) { int temp; return tz->ops.get_crit_temp && !tz->ops.get_crit_temp(tz, &temp); } int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; struct thermal_hwmon_temp *temp; int new_hwmon_device = 1; int result; hwmon = thermal_hwmon_lookup_by_type(tz); if (hwmon) { new_hwmon_device = 0; goto register_sys_interface; } hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return -ENOMEM; INIT_LIST_HEAD(&hwmon->tz_list); strscpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); strreplace(hwmon->type, '-', '_'); hwmon->device = hwmon_device_register_for_thermal(&tz->device, hwmon->type, hwmon); if (IS_ERR(hwmon->device)) { result = PTR_ERR(hwmon->device); goto free_mem; } register_sys_interface: temp = kzalloc(sizeof(*temp), GFP_KERNEL); if (!temp) { result = -ENOMEM; goto unregister_name; } temp->tz = tz; hwmon->count++; snprintf(temp->temp_input.name, sizeof(temp->temp_input.name), "temp%d_input", hwmon->count); temp->temp_input.attr.attr.name = temp->temp_input.name; temp->temp_input.attr.attr.mode = 0444; temp->temp_input.attr.show = temp_input_show; sysfs_attr_init(&temp->temp_input.attr.attr); result = device_create_file(hwmon->device, &temp->temp_input.attr); if (result) goto free_temp_mem; if (thermal_zone_crit_temp_valid(tz)) { snprintf(temp->temp_crit.name, sizeof(temp->temp_crit.name), "temp%d_crit", hwmon->count); temp->temp_crit.attr.attr.name = temp->temp_crit.name; temp->temp_crit.attr.attr.mode = 0444; temp->temp_crit.attr.show = temp_crit_show; sysfs_attr_init(&temp->temp_crit.attr.attr); result = device_create_file(hwmon->device, &temp->temp_crit.attr); if (result) goto unregister_input; } mutex_lock(&thermal_hwmon_list_lock); if (new_hwmon_device) list_add_tail(&hwmon->node, &thermal_hwmon_list); list_add_tail(&temp->hwmon_node, &hwmon->tz_list); mutex_unlock(&thermal_hwmon_list_lock); return 0; unregister_input: device_remove_file(hwmon->device, &temp->temp_input.attr); free_temp_mem: kfree(temp); unregister_name: if (new_hwmon_device) hwmon_device_unregister(hwmon->device); free_mem: kfree(hwmon); return result; } EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs); void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; struct thermal_hwmon_temp *temp; hwmon = thermal_hwmon_lookup_by_type(tz); if (unlikely(!hwmon)) { /* Should never happen... */ dev_dbg(&tz->device, "hwmon device lookup failed!\n"); return; } temp = thermal_hwmon_lookup_temp(hwmon, tz); if (unlikely(!temp)) { /* Should never happen... */ dev_dbg(&tz->device, "temperature input lookup failed!\n"); return; } device_remove_file(hwmon->device, &temp->temp_input.attr); if (thermal_zone_crit_temp_valid(tz)) device_remove_file(hwmon->device, &temp->temp_crit.attr); mutex_lock(&thermal_hwmon_list_lock); list_del(&temp->hwmon_node); kfree(temp); if (!list_empty(&hwmon->tz_list)) { mutex_unlock(&thermal_hwmon_list_lock); return; } list_del(&hwmon->node); mutex_unlock(&thermal_hwmon_list_lock); hwmon_device_unregister(hwmon->device); kfree(hwmon); } EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs); static void devm_thermal_hwmon_release(struct device *dev, void *res) { thermal_remove_hwmon_sysfs(*(struct thermal_zone_device **)res); } int devm_thermal_add_hwmon_sysfs(struct device *dev, struct thermal_zone_device *tz) { struct thermal_zone_device **ptr; int ret; ptr = devres_alloc(devm_thermal_hwmon_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) { dev_warn(dev, "Failed to allocate device resource data\n"); return -ENOMEM; } ret = thermal_add_hwmon_sysfs(tz); if (ret) { dev_warn(dev, "Failed to add hwmon sysfs attributes\n"); devres_free(ptr); return ret; } *ptr = tz; devres_add(dev, ptr); return ret; } EXPORT_SYMBOL_GPL(devm_thermal_add_hwmon_sysfs); MODULE_IMPORT_NS("HWMON_THERMAL");
13 7 9 13 13 6 6 12 7 5 5 1 4 5 1 4 3 3 3 7 1 6 6 6 10 6 4 4 1 4 2 3 4 2 2 3 4 8 4 4 5 7 4 4 7 7 7 5 1 4 3 7 7 8 3 5 1 1 4 6 6 1 4 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 // SPDX-License-Identifier: GPL-2.0 /* * RTC subsystem, interface functions * * Copyright (C) 2005 Tower Technologies * Author: Alessandro Zummo <a.zummo@towertech.it> * * based on arch/arm/common/rtctime.c */ #include <linux/rtc.h> #include <linux/sched.h> #include <linux/module.h> #include <linux/log2.h> #include <linux/workqueue.h> #define CREATE_TRACE_POINTS #include <trace/events/rtc.h> static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer); static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer); static void rtc_add_offset(struct rtc_device *rtc, struct rtc_time *tm) { time64_t secs; if (!rtc->offset_secs) return; secs = rtc_tm_to_time64(tm); /* * Since the reading time values from RTC device are always in the RTC * original valid range, but we need to skip the overlapped region * between expanded range and original range, which is no need to add * the offset. */ if ((rtc->start_secs > rtc->range_min && secs >= rtc->start_secs) || (rtc->start_secs < rtc->range_min && secs <= (rtc->start_secs + rtc->range_max - rtc->range_min))) return; rtc_time64_to_tm(secs + rtc->offset_secs, tm); } static void rtc_subtract_offset(struct rtc_device *rtc, struct rtc_time *tm) { time64_t secs; if (!rtc->offset_secs) return; secs = rtc_tm_to_time64(tm); /* * If the setting time values are in the valid range of RTC hardware * device, then no need to subtract the offset when setting time to RTC * device. Otherwise we need to subtract the offset to make the time * values are valid for RTC hardware device. */ if (secs >= rtc->range_min && secs <= rtc->range_max) return; rtc_time64_to_tm(secs - rtc->offset_secs, tm); } static int rtc_valid_range(struct rtc_device *rtc, struct rtc_time *tm) { if (rtc->range_min != rtc->range_max) { time64_t time = rtc_tm_to_time64(tm); time64_t range_min = rtc->set_start_time ? rtc->start_secs : rtc->range_min; timeu64_t range_max = rtc->set_start_time ? (rtc->start_secs + rtc->range_max - rtc->range_min) : rtc->range_max; if (time < range_min || time > range_max) return -ERANGE; } return 0; } static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) { int err; if (!rtc->ops) { err = -ENODEV; } else if (!rtc->ops->read_time) { err = -EINVAL; } else { memset(tm, 0, sizeof(struct rtc_time)); err = rtc->ops->read_time(rtc->dev.parent, tm); if (err < 0) { dev_dbg(&rtc->dev, "read_time: fail to read: %d\n", err); return err; } rtc_add_offset(rtc, tm); err = rtc_valid_tm(tm); if (err < 0) dev_dbg(&rtc->dev, "read_time: rtc_time isn't valid\n"); } return err; } int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; err = __rtc_read_time(rtc, tm); mutex_unlock(&rtc->ops_lock); trace_rtc_read_time(rtc_tm_to_time64(tm), err); return err; } EXPORT_SYMBOL_GPL(rtc_read_time); int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) { int err, uie; err = rtc_valid_tm(tm); if (err != 0) return err; err = rtc_valid_range(rtc, tm); if (err) return err; rtc_subtract_offset(rtc, tm); #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL uie = rtc->uie_rtctimer.enabled || rtc->uie_irq_active; #else uie = rtc->uie_rtctimer.enabled; #endif if (uie) { err = rtc_update_irq_enable(rtc, 0); if (err) return err; } err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (!rtc->ops) err = -ENODEV; else if (rtc->ops->set_time) err = rtc->ops->set_time(rtc->dev.parent, tm); else err = -EINVAL; pm_stay_awake(rtc->dev.parent); mutex_unlock(&rtc->ops_lock); /* A timer might have just expired */ schedule_work(&rtc->irqwork); if (uie) { err = rtc_update_irq_enable(rtc, 1); if (err) return err; } trace_rtc_set_time(rtc_tm_to_time64(tm), err); return err; } EXPORT_SYMBOL_GPL(rtc_set_time); static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (!rtc->ops) { err = -ENODEV; } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->read_alarm) { err = -EINVAL; } else { alarm->enabled = 0; alarm->pending = 0; alarm->time.tm_sec = -1; alarm->time.tm_min = -1; alarm->time.tm_hour = -1; alarm->time.tm_mday = -1; alarm->time.tm_mon = -1; alarm->time.tm_year = -1; alarm->time.tm_wday = -1; alarm->time.tm_yday = -1; alarm->time.tm_isdst = -1; err = rtc->ops->read_alarm(rtc->dev.parent, alarm); } mutex_unlock(&rtc->ops_lock); trace_rtc_read_alarm(err?0:rtc_tm_to_time64(&alarm->time), err); return err; } int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; struct rtc_time before, now; int first_time = 1; time64_t t_now, t_alm; enum { none, day, month, year } missing = none; unsigned int days; /* The lower level RTC driver may return -1 in some fields, * creating invalid alarm->time values, for reasons like: * * - The hardware may not be capable of filling them in; * many alarms match only on time-of-day fields, not * day/month/year calendar data. * * - Some hardware uses illegal values as "wildcard" match * values, which non-Linux firmware (like a BIOS) may try * to set up as e.g. "alarm 15 minutes after each hour". * Linux uses only oneshot alarms. * * When we see that here, we deal with it by using values from * a current RTC timestamp for any missing (-1) values. The * RTC driver prevents "periodic alarm" modes. * * But this can be racey, because some fields of the RTC timestamp * may have wrapped in the interval since we read the RTC alarm, * which would lead to us inserting inconsistent values in place * of the -1 fields. * * Reading the alarm and timestamp in the reverse sequence * would have the same race condition, and not solve the issue. * * So, we must first read the RTC timestamp, * then read the RTC alarm value, * and then read a second RTC timestamp. * * If any fields of the second timestamp have changed * when compared with the first timestamp, then we know * our timestamp may be inconsistent with that used by * the low-level rtc_read_alarm_internal() function. * * So, when the two timestamps disagree, we just loop and do * the process again to get a fully consistent set of values. * * This could all instead be done in the lower level driver, * but since more than one lower level RTC implementation needs it, * then it's probably best to do it here instead of there.. */ /* Get the "before" timestamp */ err = rtc_read_time(rtc, &before); if (err < 0) return err; do { if (!first_time) memcpy(&before, &now, sizeof(struct rtc_time)); first_time = 0; /* get the RTC alarm values, which may be incomplete */ err = rtc_read_alarm_internal(rtc, alarm); if (err) return err; /* full-function RTCs won't have such missing fields */ err = rtc_valid_tm(&alarm->time); if (!err) goto done; /* get the "after" timestamp, to detect wrapped fields */ err = rtc_read_time(rtc, &now); if (err < 0) return err; /* note that tm_sec is a "don't care" value here: */ } while (before.tm_min != now.tm_min || before.tm_hour != now.tm_hour || before.tm_mon != now.tm_mon || before.tm_year != now.tm_year); /* Fill in the missing alarm fields using the timestamp; we * know there's at least one since alarm->time is invalid. */ if (alarm->time.tm_sec == -1) alarm->time.tm_sec = now.tm_sec; if (alarm->time.tm_min == -1) alarm->time.tm_min = now.tm_min; if (alarm->time.tm_hour == -1) alarm->time.tm_hour = now.tm_hour; /* For simplicity, only support date rollover for now */ if (alarm->time.tm_mday < 1 || alarm->time.tm_mday > 31) { alarm->time.tm_mday = now.tm_mday; missing = day; } if ((unsigned int)alarm->time.tm_mon >= 12) { alarm->time.tm_mon = now.tm_mon; if (missing == none) missing = month; } if (alarm->time.tm_year == -1) { alarm->time.tm_year = now.tm_year; if (missing == none) missing = year; } /* Can't proceed if alarm is still invalid after replacing * missing fields. */ err = rtc_valid_tm(&alarm->time); if (err) goto done; /* with luck, no rollover is needed */ t_now = rtc_tm_to_time64(&now); t_alm = rtc_tm_to_time64(&alarm->time); if (t_now < t_alm) goto done; switch (missing) { /* 24 hour rollover ... if it's now 10am Monday, an alarm that * that will trigger at 5am will do so at 5am Tuesday, which * could also be in the next month or year. This is a common * case, especially for PCs. */ case day: dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day"); t_alm += 24 * 60 * 60; rtc_time64_to_tm(t_alm, &alarm->time); break; /* Month rollover ... if it's the 31th, an alarm on the 3rd will * be next month. An alarm matching on the 30th, 29th, or 28th * may end up in the month after that! Many newer PCs support * this type of alarm. */ case month: dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month"); do { if (alarm->time.tm_mon < 11) { alarm->time.tm_mon++; } else { alarm->time.tm_mon = 0; alarm->time.tm_year++; } days = rtc_month_days(alarm->time.tm_mon, alarm->time.tm_year); } while (days < alarm->time.tm_mday); break; /* Year rollover ... easy except for leap years! */ case year: dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year"); do { alarm->time.tm_year++; } while (!is_leap_year(alarm->time.tm_year + 1900) && rtc_valid_tm(&alarm->time) != 0); break; default: dev_warn(&rtc->dev, "alarm rollover not handled\n"); } err = rtc_valid_tm(&alarm->time); done: if (err && alarm->enabled) dev_warn(&rtc->dev, "invalid alarm value: %ptR\n", &alarm->time); else rtc_add_offset(rtc, &alarm->time); return err; } int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (!rtc->ops) { err = -ENODEV; } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) { err = -EINVAL; } else { memset(alarm, 0, sizeof(struct rtc_wkalrm)); alarm->enabled = rtc->aie_timer.enabled; alarm->time = rtc_ktime_to_tm(rtc->aie_timer.node.expires); } mutex_unlock(&rtc->ops_lock); trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err); return err; } EXPORT_SYMBOL_GPL(rtc_read_alarm); static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { struct rtc_time tm; time64_t now, scheduled; int err; err = rtc_valid_tm(&alarm->time); if (err) return err; scheduled = rtc_tm_to_time64(&alarm->time); /* Make sure we're not setting alarms in the past */ err = __rtc_read_time(rtc, &tm); if (err) return err; now = rtc_tm_to_time64(&tm); if (scheduled <= now) return -ETIME; /* * XXX - We just checked to make sure the alarm time is not * in the past, but there is still a race window where if * the is alarm set for the next second and the second ticks * over right here, before we set the alarm. */ rtc_subtract_offset(rtc, &alarm->time); if (!rtc->ops) err = -ENODEV; else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) err = -EINVAL; else err = rtc->ops->set_alarm(rtc->dev.parent, alarm); trace_rtc_set_alarm(rtc_tm_to_time64(&alarm->time), err); return err; } int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { ktime_t alarm_time; int err; if (!rtc->ops) return -ENODEV; else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) return -EINVAL; err = rtc_valid_tm(&alarm->time); if (err != 0) return err; err = rtc_valid_range(rtc, &alarm->time); if (err) return err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (rtc->aie_timer.enabled) rtc_timer_remove(rtc, &rtc->aie_timer); alarm_time = rtc_tm_to_ktime(alarm->time); /* * Round down so we never miss a deadline, checking for past deadline is * done in __rtc_set_alarm */ if (test_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features)) alarm_time = ktime_sub_ns(alarm_time, (u64)alarm->time.tm_sec * NSEC_PER_SEC); rtc->aie_timer.node.expires = alarm_time; rtc->aie_timer.period = 0; if (alarm->enabled) err = rtc_timer_enqueue(rtc, &rtc->aie_timer); mutex_unlock(&rtc->ops_lock); return err; } EXPORT_SYMBOL_GPL(rtc_set_alarm); /* Called once per device from rtc_device_register */ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; struct rtc_time now; err = rtc_valid_tm(&alarm->time); if (err != 0) return err; err = rtc_read_time(rtc, &now); if (err) return err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); rtc->aie_timer.period = 0; /* Alarm has to be enabled & in the future for us to enqueue it */ if (alarm->enabled && (rtc_tm_to_ktime(now) < rtc->aie_timer.node.expires)) { rtc->aie_timer.enabled = 1; timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node); trace_rtc_timer_enqueue(&rtc->aie_timer); } mutex_unlock(&rtc->ops_lock); return err; } EXPORT_SYMBOL_GPL(rtc_initialize_alarm); int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (rtc->aie_timer.enabled != enabled) { if (enabled) err = rtc_timer_enqueue(rtc, &rtc->aie_timer); else rtc_timer_remove(rtc, &rtc->aie_timer); } if (err) /* nothing */; else if (!rtc->ops) err = -ENODEV; else if (!test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->alarm_irq_enable) err = -EINVAL; else err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled); mutex_unlock(&rtc->ops_lock); trace_rtc_alarm_irq_enable(enabled, err); return err; } EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable); int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL if (enabled == 0 && rtc->uie_irq_active) { mutex_unlock(&rtc->ops_lock); return rtc_dev_update_irq_enable_emul(rtc, 0); } #endif /* make sure we're changing state */ if (rtc->uie_rtctimer.enabled == enabled) goto out; if (!test_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features) || !test_bit(RTC_FEATURE_ALARM, rtc->features)) { mutex_unlock(&rtc->ops_lock); #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL return rtc_dev_update_irq_enable_emul(rtc, enabled); #else return -EINVAL; #endif } if (enabled) { struct rtc_time tm; ktime_t now, onesec; err = __rtc_read_time(rtc, &tm); if (err) goto out; onesec = ktime_set(1, 0); now = rtc_tm_to_ktime(tm); rtc->uie_rtctimer.node.expires = ktime_add(now, onesec); rtc->uie_rtctimer.period = ktime_set(1, 0); err = rtc_timer_enqueue(rtc, &rtc->uie_rtctimer); } else { rtc_timer_remove(rtc, &rtc->uie_rtctimer); } out: mutex_unlock(&rtc->ops_lock); return err; } EXPORT_SYMBOL_GPL(rtc_update_irq_enable); /** * rtc_handle_legacy_irq - AIE, UIE and PIE event hook * @rtc: pointer to the rtc device * @num: number of occurence of the event * @mode: type of the event, RTC_AF, RTC_UF of RTC_PF * * This function is called when an AIE, UIE or PIE mode interrupt * has occurred (or been emulated). * */ void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode) { unsigned long flags; /* mark one irq of the appropriate mode */ spin_lock_irqsave(&rtc->irq_lock, flags); rtc->irq_data = (rtc->irq_data + (num << 8)) | (RTC_IRQF | mode); spin_unlock_irqrestore(&rtc->irq_lock, flags); wake_up_interruptible(&rtc->irq_queue); kill_fasync(&rtc->async_queue, SIGIO, POLL_IN); } /** * rtc_aie_update_irq - AIE mode rtctimer hook * @rtc: pointer to the rtc_device * * This functions is called when the aie_timer expires. */ void rtc_aie_update_irq(struct rtc_device *rtc) { rtc_handle_legacy_irq(rtc, 1, RTC_AF); } /** * rtc_uie_update_irq - UIE mode rtctimer hook * @rtc: pointer to the rtc_device * * This functions is called when the uie_timer expires. */ void rtc_uie_update_irq(struct rtc_device *rtc) { rtc_handle_legacy_irq(rtc, 1, RTC_UF); } /** * rtc_pie_update_irq - PIE mode hrtimer hook * @timer: pointer to the pie mode hrtimer * * This function is used to emulate PIE mode interrupts * using an hrtimer. This function is called when the periodic * hrtimer expires. */ enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer) { struct rtc_device *rtc; ktime_t period; u64 count; rtc = container_of(timer, struct rtc_device, pie_timer); period = NSEC_PER_SEC / rtc->irq_freq; count = hrtimer_forward_now(timer, period); rtc_handle_legacy_irq(rtc, count, RTC_PF); return HRTIMER_RESTART; } /** * rtc_update_irq - Triggered when a RTC interrupt occurs. * @rtc: the rtc device * @num: how many irqs are being reported (usually one) * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF * Context: any */ void rtc_update_irq(struct rtc_device *rtc, unsigned long num, unsigned long events) { if (IS_ERR_OR_NULL(rtc)) return; pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); } EXPORT_SYMBOL_GPL(rtc_update_irq); struct rtc_device *rtc_class_open(const char *name) { struct device *dev; struct rtc_device *rtc = NULL; dev = class_find_device_by_name(&rtc_class, name); if (dev) rtc = to_rtc_device(dev); if (rtc) { if (!try_module_get(rtc->owner)) { put_device(dev); rtc = NULL; } } return rtc; } EXPORT_SYMBOL_GPL(rtc_class_open); void rtc_class_close(struct rtc_device *rtc) { module_put(rtc->owner); put_device(&rtc->dev); } EXPORT_SYMBOL_GPL(rtc_class_close); static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled) { /* * We always cancel the timer here first, because otherwise * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); * when we manage to start the timer before the callback * returns HRTIMER_RESTART. * * We cannot use hrtimer_cancel() here as a running callback * could be blocked on rtc->irq_task_lock and hrtimer_cancel() * would spin forever. */ if (hrtimer_try_to_cancel(&rtc->pie_timer) < 0) return -1; if (enabled) { ktime_t period = NSEC_PER_SEC / rtc->irq_freq; hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL); } return 0; } /** * rtc_irq_set_state - enable/disable 2^N Hz periodic IRQs * @rtc: the rtc device * @enabled: true to enable periodic IRQs * Context: any * * Note that rtc_irq_set_freq() should previously have been used to * specify the desired frequency of periodic IRQ. */ int rtc_irq_set_state(struct rtc_device *rtc, int enabled) { int err = 0; while (rtc_update_hrtimer(rtc, enabled) < 0) cpu_relax(); rtc->pie_enabled = enabled; trace_rtc_irq_set_state(enabled, err); return err; } /** * rtc_irq_set_freq - set 2^N Hz periodic IRQ frequency for IRQ * @rtc: the rtc device * @freq: positive frequency * Context: any * * Note that rtc_irq_set_state() is used to enable or disable the * periodic IRQs. */ int rtc_irq_set_freq(struct rtc_device *rtc, int freq) { int err = 0; if (freq <= 0 || freq > RTC_MAX_FREQ) return -EINVAL; rtc->irq_freq = freq; while (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0) cpu_relax(); trace_rtc_irq_set_freq(freq, err); return err; } /** * rtc_timer_enqueue - Adds a rtc_timer to the rtc_device timerqueue * @rtc: rtc device * @timer: timer being added. * * Enqueues a timer onto the rtc devices timerqueue and sets * the next alarm event appropriately. * * Sets the enabled bit on the added timer. * * Must hold ops_lock for proper serialization of timerqueue */ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) { struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; int err; err = __rtc_read_time(rtc, &tm); if (err) return err; timer->enabled = 1; now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ while (next) { if (next->expires >= now) break; next = timerqueue_iterate_next(next); } timerqueue_add(&rtc->timerqueue, &timer->node); trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1; err = __rtc_set_alarm(rtc, &alarm); if (err == -ETIME) { pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); } else if (err) { timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; return err; } } return 0; } static void rtc_alarm_disable(struct rtc_device *rtc) { if (!rtc->ops || !test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->alarm_irq_enable) return; rtc->ops->alarm_irq_enable(rtc->dev.parent, false); trace_rtc_alarm_irq_enable(0, 0); } /** * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue * @rtc: rtc device * @timer: timer being removed. * * Removes a timer onto the rtc devices timerqueue and sets * the next alarm event appropriately. * * Clears the enabled bit on the removed timer. * * Must hold ops_lock for proper serialization of timerqueue */ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer) { struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; if (next == &timer->node) { struct rtc_wkalrm alarm; int err; next = timerqueue_getnext(&rtc->timerqueue); if (!next) { rtc_alarm_disable(rtc); return; } alarm.time = rtc_ktime_to_tm(next->expires); alarm.enabled = 1; err = __rtc_set_alarm(rtc, &alarm); if (err == -ETIME) { pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); } } } /** * rtc_timer_do_work - Expires rtc timers * @work: work item * * Expires rtc timers. Reprograms next alarm event if needed. * Called via worktask. * * Serializes access to timerqueue via ops_lock mutex */ void rtc_timer_do_work(struct work_struct *work) { struct rtc_timer *timer; struct timerqueue_node *next; ktime_t now; struct rtc_time tm; int err; struct rtc_device *rtc = container_of(work, struct rtc_device, irqwork); mutex_lock(&rtc->ops_lock); again: err = __rtc_read_time(rtc, &tm); if (err) { mutex_unlock(&rtc->ops_lock); return; } now = rtc_tm_to_ktime(tm); while ((next = timerqueue_getnext(&rtc->timerqueue))) { if (next->expires > now) break; /* expire timer */ timer = container_of(next, struct rtc_timer, node); timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; if (timer->func) timer->func(timer->rtc); trace_rtc_timer_fired(timer); /* Re-add/fwd periodic timers */ if (ktime_to_ns(timer->period)) { timer->node.expires = ktime_add(timer->node.expires, timer->period); timer->enabled = 1; timerqueue_add(&rtc->timerqueue, &timer->node); trace_rtc_timer_enqueue(timer); } } /* Set next alarm */ if (next) { struct rtc_wkalrm alarm; int err; int retry = 3; alarm.time = rtc_ktime_to_tm(next->expires); alarm.enabled = 1; reprogram: err = __rtc_set_alarm(rtc, &alarm); if (err == -ETIME) { goto again; } else if (err) { if (retry-- > 0) goto reprogram; timer = container_of(next, struct rtc_timer, node); timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; dev_err(&rtc->dev, "__rtc_set_alarm: err=%d\n", err); goto again; } } else { rtc_alarm_disable(rtc); } pm_relax(rtc->dev.parent); mutex_unlock(&rtc->ops_lock); } /* rtc_timer_init - Initializes an rtc_timer * @timer: timer to be intiialized * @f: function pointer to be called when timer fires * @rtc: pointer to the rtc_device * * Kernel interface to initializing an rtc_timer. */ void rtc_timer_init(struct rtc_timer *timer, void (*f)(struct rtc_device *r), struct rtc_device *rtc) { timerqueue_init(&timer->node); timer->enabled = 0; timer->func = f; timer->rtc = rtc; } /* rtc_timer_start - Sets an rtc_timer to fire in the future * @ rtc: rtc device to be used * @ timer: timer being set * @ expires: time at which to expire the timer * @ period: period that the timer will recur * * Kernel interface to set an rtc_timer */ int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, ktime_t expires, ktime_t period) { int ret = 0; mutex_lock(&rtc->ops_lock); if (timer->enabled) rtc_timer_remove(rtc, timer); timer->node.expires = expires; timer->period = period; ret = rtc_timer_enqueue(rtc, timer); mutex_unlock(&rtc->ops_lock); return ret; } /* rtc_timer_cancel - Stops an rtc_timer * @ rtc: rtc device to be used * @ timer: timer being set * * Kernel interface to cancel an rtc_timer */ void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer) { mutex_lock(&rtc->ops_lock); if (timer->enabled) rtc_timer_remove(rtc, timer); mutex_unlock(&rtc->ops_lock); } /** * rtc_read_offset - Read the amount of rtc offset in parts per billion * @rtc: rtc device to be used * @offset: the offset in parts per billion * * see below for details. * * Kernel interface to read rtc clock offset * Returns 0 on success, or a negative number on error. * If read_offset() is not implemented for the rtc, return -EINVAL */ int rtc_read_offset(struct rtc_device *rtc, long *offset) { int ret; if (!rtc->ops) return -ENODEV; if (!rtc->ops->read_offset) return -EINVAL; mutex_lock(&rtc->ops_lock); ret = rtc->ops->read_offset(rtc->dev.parent, offset); mutex_unlock(&rtc->ops_lock); trace_rtc_read_offset(*offset, ret); return ret; } /** * rtc_set_offset - Adjusts the duration of the average second * @rtc: rtc device to be used * @offset: the offset in parts per billion * * Some rtc's allow an adjustment to the average duration of a second * to compensate for differences in the actual clock rate due to temperature, * the crystal, capacitor, etc. * * The adjustment applied is as follows: * t = t0 * (1 + offset * 1e-9) * where t0 is the measured length of 1 RTC second with offset = 0 * * Kernel interface to adjust an rtc clock offset. * Return 0 on success, or a negative number on error. * If the rtc offset is not setable (or not implemented), return -EINVAL */ int rtc_set_offset(struct rtc_device *rtc, long offset) { int ret; if (!rtc->ops) return -ENODEV; if (!rtc->ops->set_offset) return -EINVAL; mutex_lock(&rtc->ops_lock); ret = rtc->ops->set_offset(rtc->dev.parent, offset); mutex_unlock(&rtc->ops_lock); trace_rtc_set_offset(offset, ret); return ret; }
7 1 7 7 7 5 1 7 7 7 7 7 7 7 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 // SPDX-License-Identifier: GPL-2.0-or-later /* * SPCA508 chip based cameras subdriver * * Copyright (C) 2009 Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca508" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA508 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 subtype; #define CreativeVista 0 #define HamaUSBSightcam 1 #define HamaUSBSightcam2 2 #define IntelEasyPCCamera 3 #define MicroInnovationIC200 4 #define ViewQuestVQ110 5 }; static const struct v4l2_pix_format sif_mode[] = { {160, 120, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* Frame packet header offsets for the spca508 */ #define SPCA508_OFFSET_DATA 37 /* * Initialization data: this is the first set-up data written to the * device (before the open data). */ static const u16 spca508_init_data[][2] = { {0x0000, 0x870b}, {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8110}, /* Disable all outputs */ /* READ {0x0000, 0x8114} -> 0000: 00 */ {0x0000, 0x8114}, /* SW GPIO data */ {0x0008, 0x8110}, /* Enable charge pump output */ {0x0002, 0x8116}, /* 200 kHz pump clock */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE:) */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8111}, /* Normal mode (not reset) */ {0x0098, 0x8110}, /* Enable charge pump output, sync.serial,external 2x clock */ {0x000d, 0x8114}, /* SW GPIO data */ {0x0002, 0x8116}, /* 200 kHz pump clock */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* --------------------------------------- */ {0x000f, 0x8402}, /* memory bank */ {0x0000, 0x8403}, /* ... address */ /* --------------------------------------- */ /* 0x88__ is Synchronous Serial Interface. */ /* TBD: This table could be expressed more compactly */ /* using spca508_write_i2c_vector(). */ /* TBD: Should see if the values in spca50x_i2c_data */ /* would work with the VQ110 instead of the values */ /* below. */ {0x00c0, 0x8804}, /* SSI slave addr */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0080, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0000, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0011, 0x8801}, /* SSI reg addr */ {0x0040, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0013, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0014, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0015, 0x8801}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0016, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0017, 0x8801}, {0x0036, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0018, 0x8801}, {0x00ec, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001a, 0x8801}, {0x0094, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001b, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0027, 0x8801}, {0x00a2, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0028, 0x8801}, {0x0040, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002b, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002c, 0x8801}, {0x00fe, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002d, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0038, 0x8801}, {0x0083, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0033, 0x8801}, {0x0081, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0034, 0x8801}, {0x004a, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0039, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0010, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0006, 0x8801}, {0x0058, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0000, 0x8801}, {0x0004, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0040, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0041, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0042, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0043, 0x8801}, {0x0028, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0044, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0045, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0046, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0047, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0048, 0x8801}, {0x004c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0049, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004b, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* --------------------------------------- */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0001, 0x870c}, /* CKOx2 output */ /* --------------------------------------- */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x0001, 0x8606}, /* reserved */ {0x0064, 0x8607}, /* Line memory read counter (H) 0x6480=25,728 */ {0x002a, 0x8601}, /* CDSP sharp interpolation mode, * line sel for color sep, edge enhance enab */ {0x0000, 0x8602}, /* optical black level for user settng = 0 */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x000a, 0x8603}, /* optical black level calc mode: * auto; optical black offset = 10 */ {0x00df, 0x865b}, /* Horiz offset for valid pixels (L)=0xdf */ {0x0012, 0x865c}, /* Vert offset for valid lines (L)=0x12 */ /* The following two lines seem to be the "wrong" resolution. */ /* But perhaps these indicate the actual size of the sensor */ /* rather than the size of the current video mode. */ {0x0058, 0x865d}, /* Horiz valid pixels (*4) (L) = 352 */ {0x0048, 0x865e}, /* Vert valid lines (*4) (L) = 288 */ {0x0015, 0x8608}, /* A11 Coef ... */ {0x0030, 0x8609}, {0x00fb, 0x860a}, {0x003e, 0x860b}, {0x00ce, 0x860c}, {0x00f4, 0x860d}, {0x00eb, 0x860e}, {0x00dc, 0x860f}, {0x0039, 0x8610}, {0x0001, 0x8611}, /* R offset for white balance ... */ {0x0000, 0x8612}, {0x0001, 0x8613}, {0x0000, 0x8614}, {0x005b, 0x8651}, /* R gain for white balance ... */ {0x0040, 0x8652}, {0x0060, 0x8653}, {0x0040, 0x8654}, {0x0000, 0x8655}, {0x0001, 0x863f}, /* Fixed gamma correction enable, USB control, * lum filter disable, lum noise clip disable */ {0x00a1, 0x8656}, /* Window1 size 256x256, Windows2 size 64x64, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8657}, /* Edge gain high thresh */ {0x0020, 0x8658}, /* Edge gain low thresh */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ /* -------------------------------- */ {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xa908, 0x8802}, {0x0034, 0x8801}, /* SSI reg addr */ {0x00ca, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x1f08, 0x8802}, {0x0006, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* ----- Read back coefs we wrote earlier. */ /* READ { 0x0000, 0x8608 } -> 0000: 15 */ /* READ { 0x0000, 0x8609 } -> 0000: 30 */ /* READ { 0x0000, 0x860a } -> 0000: fb */ /* READ { 0x0000, 0x860b } -> 0000: 3e */ /* READ { 0x0000, 0x860c } -> 0000: ce */ /* READ { 0x0000, 0x860d } -> 0000: f4 */ /* READ { 0x0000, 0x860e } -> 0000: eb */ /* READ { 0x0000, 0x860f } -> 0000: dc */ /* READ { 0x0000, 0x8610 } -> 0000: 39 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xb008, 0x8802}, {0x0006, 0x8801}, {0x007d, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* This chunk is seemingly redundant with */ /* earlier commands (A11 Coef...), but if I disable it, */ /* the image appears too dark. Maybe there was some kind of */ /* reset since the earlier commands, so this is necessary again. */ {0x0015, 0x8608}, {0x0030, 0x8609}, {0xfffb, 0x860a}, {0x003e, 0x860b}, {0xffce, 0x860c}, {0xfff4, 0x860d}, {0xffeb, 0x860e}, {0xffdc, 0x860f}, {0x0039, 0x8610}, {0x0018, 0x8657}, {0x0000, 0x8508}, /* Disable compression. */ /* Previous line was: {0x0021, 0x8508}, * Enable compression. */ {0x0032, 0x850b}, /* compression stuff */ {0x0003, 0x8509}, /* compression stuff */ {0x0011, 0x850a}, /* compression stuff */ {0x0021, 0x850d}, /* compression stuff */ {0x0010, 0x850c}, /* compression stuff */ {0x0003, 0x8500}, /* *** Video mode: 160x120 */ {0x0001, 0x8501}, /* Hardware-dominated snap control */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8617}, /* Window1 start X (*2) */ {0x0008, 0x8618}, /* Window1 start Y (*2) */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0058, 0x8619}, /* Window2 start X (*2) */ {0x0008, 0x861a}, /* Window2 start Y (*2) */ {0x00ff, 0x8615}, /* High lum thresh for white balance */ {0x0000, 0x8616}, /* Low lum thresh for white balance */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ /* READ { 0x0000, 0x8656 } -> 0000: 61 */ {0x0028, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ {0x0010, 0x8801}, /* SSI reg addr */ {0x003e, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0000, 0x8801}, {0x001f, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0001, 0x8602}, /* optical black level for user settning = 1 */ /* Original: */ {0x0023, 0x8700}, /* Clock speed 48Mhz/(3+2)/4= 2.4 Mhz */ {0x000f, 0x8602}, /* optical black level for user settning = 15 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0010, 0x8801}, {0x007b, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x002f, 0x8651}, /* R gain for white balance ... */ {0x0080, 0x8653}, /* READ { 0x0000, 0x8655 } -> 0000: 00 */ {0x0000, 0x8655}, {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE: (ALT=0) ) */ {} }; /* * Initialization data for Intel EasyPC Camera CS110 */ static const u16 spca508cs110_init_data[][2] = { {0x0000, 0x870b}, /* Reset CTL3 */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8111}, /* Normal operation on reset */ {0x0090, 0x8110}, /* External Clock 2x & Synchronous Serial Interface Output */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, /* Initial sequence Synchronous Serial Interface */ {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock two DataByte */ {0x0001, 0x8801}, {0x000a, 0x8805}, /* a - NWG: Dunno what this is about */ {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* 3 Vertical Offset for Valid Lines(L) */ {0x0058, 0x865d}, /* 58 Horizontal Valid Pixel Window(L) */ {0x0006, 0x8660}, /* Nibble data + input order */ {0x000a, 0x8602}, /* Optical black level set to 0x0a */ {0x0000, 0x8603}, /* Optical black level Offset */ /* {0x0000, 0x8611}, * 0 R Offset for white Balance */ /* {0x0000, 0x8612}, * 1 Gr Offset for white Balance */ /* {0x0000, 0x8613}, * 1f B Offset for white Balance */ /* {0x0000, 0x8614}, * f0 Gb Offset for white Balance */ {0x0040, 0x8651}, /* 2b BLUE gain for white balance good at all 60 */ {0x0030, 0x8652}, /* 41 Gr Gain for white Balance (L) */ {0x0035, 0x8653}, /* 26 RED gain for white balance */ {0x0035, 0x8654}, /* 40Gb Gain for white Balance (L) */ {0x0041, 0x863f}, /* Fixed Gamma correction enabled (makes colours look better) */ {0x0000, 0x8655}, /* High bits for white balance*****brightness control*** */ {} }; static const u16 spca508_sightcam_init_data[][2] = { /* This line seems to setup the frame/canvas */ {0x000f, 0x8402}, /* These 6 lines are needed to startup the webcam */ {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, /* This part seems to make the pictures darker? (autobrightness?) */ {0x0001, 0x8801}, {0x0004, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, {0x0004, 0x8801}, {0x00b4, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x00e0, 0x8800}, {0x0007, 0x8801}, {0x000c, 0x8800}, /* This section is just needed, it probably * does something like the previous section, * but the cam won't start if it's not included. */ {0x0014, 0x8801}, {0x0008, 0x8800}, {0x0015, 0x8801}, {0x0067, 0x8800}, {0x0016, 0x8801}, {0x0000, 0x8800}, {0x0017, 0x8801}, {0x0020, 0x8800}, {0x0018, 0x8801}, {0x0044, 0x8800}, /* Makes the picture darker - and the * cam won't start if not included */ {0x001e, 0x8801}, {0x00ea, 0x8800}, {0x001f, 0x8801}, {0x0001, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, /* seems to place the colors ontop of each other #1 */ {0x0006, 0x8704}, {0x0001, 0x870c}, {0x0016, 0x8600}, {0x0002, 0x8606}, /* if not included the pictures becomes _very_ dark */ {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0000, 0x8602}, /* seems to place the colors ontop of each other #2 */ {0x0016, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, /* webcam won't start if not included */ {0x0007, 0x865b}, {0x0001, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, /* adjusts the colors */ {0x0049, 0x8651}, {0x0040, 0x8652}, {0x004c, 0x8653}, {0x0040, 0x8654}, {} }; static const u16 spca508_sightcam2_init_data[][2] = { {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0008, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0009, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000a, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000b, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000c, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000d, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000e, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0007, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000f, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0018, 0x8660}, {0x0010, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0011, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0000, 0x86b0}, {0x0034, 0x86b1}, {0x0000, 0x86b2}, {0x0049, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0012, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0013, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0001, 0x86b0}, {0x00aa, 0x86b1}, {0x0000, 0x86b2}, {0x00e4, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0018, 0x8660}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x0000, 0x8800}, {0x0008, 0x8801}, {0x0005, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x000c, 0x8619}, {0x0004, 0x861a}, {0x00a1, 0x8656}, {0x00c8, 0x8615}, {0x0032, 0x8616}, {0x0030, 0x8112}, {0x0020, 0x8112}, {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x0064, 0x8619}, /* This line starts it all, it is not needed here */ /* since it has been build into the driver */ /* jfm: don't start now */ /* {0x0030, 0x8112}, */ {} }; /* * Initialization data for Creative Webcam Vista */ static const u16 spca508_vista_init_data[][2] = { {0x0008, 0x8200}, /* Clear register */ {0x0000, 0x870b}, /* Reset CTL3 */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8110}, /* Disable everything */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0000, 0x8114}, {0x0003, 0x8111}, {0x0000, 0x8111}, {0x0090, 0x8110}, /* Enable: SSI output, External 2X clock output */ {0x0020, 0x8112}, {0x0000, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, /* Will write 2 bytes (DATA1+DATA2) */ {0x0020, 0x8801}, /* Register address for SSI read/write */ {0x0044, 0x8805}, /* DATA2 */ {0x0004, 0x8800}, /* DATA1 -> write triggered */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003c, 0x8801}, {0x0001, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0001, 0x8801}, {0x000a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x000e, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0028, 0x8801}, {0x002e, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0039, 0x8801}, {0x0013, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003b, 0x8801}, {0x000c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0035, 0x8801}, {0x0028, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0050, 0x8703}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x870c}, /* Select CKOx2 output */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x0023, 0x8601}, {0x0010, 0x8602}, {0x000a, 0x8603}, {0x009a, 0x8600}, {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* Vertical offset for valid lines (L) */ {0x0058, 0x865d}, /* Horizontal valid pixels window (L) */ {0x0048, 0x865e}, /* Vertical valid lines window (L) */ {0x0000, 0x865f}, {0x0006, 0x8660}, /* Enable nibble data input, select nibble input order */ {0x0013, 0x8608}, /* A11 Coeficients for color correction */ {0x0028, 0x8609}, /* Note: these values are confirmed at the end of array */ {0x0005, 0x860a}, /* ... */ {0x0025, 0x860b}, {0x00e1, 0x860c}, {0x00fa, 0x860d}, {0x00f4, 0x860e}, {0x00e8, 0x860f}, {0x0025, 0x8610}, /* A33 Coef. */ {0x00fc, 0x8611}, /* White balance offset: R */ {0x0001, 0x8612}, /* White balance offset: Gr */ {0x00fe, 0x8613}, /* White balance offset: B */ {0x0000, 0x8614}, /* White balance offset: Gb */ {0x0064, 0x8651}, /* R gain for white balance (L) */ {0x0040, 0x8652}, /* Gr gain for white balance (L) */ {0x0066, 0x8653}, /* B gain for white balance (L) */ {0x0040, 0x8654}, /* Gb gain for white balance (L) */ {0x0001, 0x863f}, /* Enable fixed gamma correction */ {0x00a1, 0x8656}, /* Size - Window1: 256x256, Window2: 128x128, * UV division: UV no change, * Enable New edge enhancement */ {0x0018, 0x8657}, /* Edge gain high threshold */ {0x0020, 0x8658}, /* Edge gain low threshold */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ {0x0064, 0x8607}, /* UV filter enable */ {0x0016, 0x8660}, {0x0000, 0x86b0}, /* Bad pixels compensation address */ {0x00dc, 0x86b1}, /* X coord for bad pixels compensation (L) */ {0x0000, 0x86b2}, {0x0009, 0x86b3}, /* Y coord for bad pixels compensation (L) */ {0x0000, 0x86b4}, {0x0001, 0x86b0}, {0x00f5, 0x86b1}, {0x0000, 0x86b2}, {0x00c6, 0x86b3}, {0x0000, 0x86b4}, {0x0002, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d7, 0x86b3}, {0x0000, 0x86b4}, {0x0003, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x0004, 0x86b0}, {0x001d, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x001e, 0x8660}, /* READ { 0x0000, 0x8608 } -> 0000: 13 */ /* READ { 0x0000, 0x8609 } -> 0000: 28 */ /* READ { 0x0000, 0x8610 } -> 0000: 05 */ /* READ { 0x0000, 0x8611 } -> 0000: 25 */ /* READ { 0x0000, 0x8612 } -> 0000: e1 */ /* READ { 0x0000, 0x8613 } -> 0000: fa */ /* READ { 0x0000, 0x8614 } -> 0000: f4 */ /* READ { 0x0000, 0x8615 } -> 0000: e8 */ /* READ { 0x0000, 0x8616 } -> 0000: 25 */ {} }; static int reg_write(struct gspca_dev *gspca_dev, u16 index, u16 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, /* request */ USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write i:0x%04x = 0x%02x\n", index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); return ret; } /* read 1 byte */ /* returns: negative is error, pos or zero is data */ static int reg_read(struct gspca_dev *gspca_dev, u16 index) /* wIndex */ { int ret; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0, /* register */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, 1, 500); /* timeout */ gspca_dbg(gspca_dev, D_USBI, "reg read i:%04x --> %02x\n", index, gspca_dev->usb_buf[0]); if (ret < 0) { pr_err("reg_read err %d\n", ret); return ret; } return gspca_dev->usb_buf[0]; } /* send 1 or 2 bytes to the sensor via the Synchronous Serial Interface */ static int ssi_w(struct gspca_dev *gspca_dev, u16 reg, u16 val) { int ret, retry; ret = reg_write(gspca_dev, 0x8802, reg >> 8); if (ret < 0) goto out; ret = reg_write(gspca_dev, 0x8801, reg & 0x00ff); if (ret < 0) goto out; if ((reg & 0xff00) == 0x1000) { /* if 2 bytes */ ret = reg_write(gspca_dev, 0x8805, val & 0x00ff); if (ret < 0) goto out; val >>= 8; } ret = reg_write(gspca_dev, 0x8800, val); if (ret < 0) goto out; /* poll until not busy */ retry = 10; for (;;) { ret = reg_read(gspca_dev, 0x8803); if (ret < 0) break; if (gspca_dev->usb_buf[0] == 0) break; if (--retry <= 0) { gspca_err(gspca_dev, "ssi_w busy %02x\n", gspca_dev->usb_buf[0]); ret = -1; break; } msleep(8); } out: return ret; } static int write_vector(struct gspca_dev *gspca_dev, const u16 (*data)[2]) { int ret = 0; while ((*data)[1] != 0) { if ((*data)[1] & 0x8000) { if ((*data)[1] == 0xdd00) /* delay */ msleep((*data)[0]); else ret = reg_write(gspca_dev, (*data)[1], (*data)[0]); } else { ret = ssi_w(gspca_dev, (*data)[1], (*data)[0]); } if (ret < 0) break; data++; } return ret; } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; const u16 (*init_data)[2]; static const u16 (*(init_data_tb[]))[2] = { spca508_vista_init_data, /* CreativeVista 0 */ spca508_sightcam_init_data, /* HamaUSBSightcam 1 */ spca508_sightcam2_init_data, /* HamaUSBSightcam2 2 */ spca508cs110_init_data, /* IntelEasyPCCamera 3 */ spca508cs110_init_data, /* MicroInnovationIC200 4 */ spca508_init_data, /* ViewQuestVQ110 5 */ }; int data1, data2; /* Read from global register the USB product and vendor IDs, just to * prove that we can communicate with the device. This works, which * confirms at we are communicating properly and that the device * is a 508. */ data1 = reg_read(gspca_dev, 0x8104); data2 = reg_read(gspca_dev, 0x8105); gspca_dbg(gspca_dev, D_PROBE, "Webcam Vendor ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8106); data2 = reg_read(gspca_dev, 0x8107); gspca_dbg(gspca_dev, D_PROBE, "Webcam Product ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8621); gspca_dbg(gspca_dev, D_PROBE, "Window 1 average luminance: %d\n", data1); cam = &gspca_dev->cam; cam->cam_mode = sif_mode; cam->nmodes = ARRAY_SIZE(sif_mode); sd->subtype = id->driver_info; init_data = init_data_tb[sd->subtype]; return write_vector(gspca_dev, init_data); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { return 0; } static int sd_start(struct gspca_dev *gspca_dev) { int mode; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; reg_write(gspca_dev, 0x8500, mode); switch (mode) { case 0: case 1: reg_write(gspca_dev, 0x8700, 0x28); /* clock */ break; default: /* case 2: */ /* case 3: */ reg_write(gspca_dev, 0x8700, 0x23); /* clock */ break; } reg_write(gspca_dev, 0x8112, 0x10 | 0x20); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* Video ISO disable, Video Drop Packet enable: */ reg_write(gspca_dev, 0x8112, 0x20); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { switch (data[0]) { case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); data += SPCA508_OFFSET_DATA; len -= SPCA508_OFFSET_DATA; gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); break; case 0xff: /* drop */ break; default: data += 1; len -= 1; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); break; } } static void setbrightness(struct gspca_dev *gspca_dev, s32 brightness) { /* MX seem contrast */ reg_write(gspca_dev, 0x8651, brightness); reg_write(gspca_dev, 0x8652, brightness); reg_write(gspca_dev, 0x8653, brightness); reg_write(gspca_dev, 0x8654, brightness); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0130, 0x0130), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x041e, 0x4018), .driver_info = CreativeVista}, {USB_DEVICE(0x0733, 0x0110), .driver_info = ViewQuestVQ110}, {USB_DEVICE(0x0af9, 0x0010), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x0af9, 0x0011), .driver_info = HamaUSBSightcam2}, {USB_DEVICE(0x8086, 0x0110), .driver_info = IntelEasyPCCamera}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
2 1 8 5 3 3 3 3 3 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 // SPDX-License-Identifier: GPL-2.0 /* USB Driver layer for GSM modems Copyright (C) 2005 Matthias Urlichs <smurf@smurf.noris.de> Portions copied from the Keyspan driver by Hugh Blemings <hugh@blemings.org> History: see the git log. Work sponsored by: Sigos GmbH, Germany <info@sigos.de> This driver exists because the "normal" serial driver doesn't work too well with GSM modems. Issues: - data loss -- one single Receive URB is not nearly enough - controlling the baud rate doesn't make sense */ #define DRIVER_AUTHOR "Matthias Urlichs <smurf@smurf.noris.de>" #define DRIVER_DESC "USB Driver for GSM modems" #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/serial.h> #include <linux/serial.h> #include "usb-wwan.h" /* * Generate DTR/RTS signals on the port using the SET_CONTROL_LINE_STATE request * in CDC ACM. */ static int usb_wwan_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct usb_wwan_port_private *portdata; int val = 0; int ifnum; int res; portdata = usb_get_serial_port_data(port); if (portdata->dtr_state) val |= USB_CDC_CTRL_DTR; if (portdata->rts_state) val |= USB_CDC_CTRL_RTS; ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber; res = usb_autopm_get_interface(serial->interface); if (res) return res; res = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), USB_CDC_REQ_SET_CONTROL_LINE_STATE, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, val, ifnum, NULL, 0, USB_CTRL_SET_TIMEOUT); usb_autopm_put_interface(port->serial->interface); return res; } void usb_wwan_dtr_rts(struct usb_serial_port *port, int on) { struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; intfdata = usb_get_serial_data(port->serial); if (!intfdata->use_send_setup) return; portdata = usb_get_serial_port_data(port); /* FIXME: locking */ portdata->rts_state = on; portdata->dtr_state = on; usb_wwan_send_setup(port); } EXPORT_SYMBOL(usb_wwan_dtr_rts); int usb_wwan_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned int value; struct usb_wwan_port_private *portdata; portdata = usb_get_serial_port_data(port); value = ((portdata->rts_state) ? TIOCM_RTS : 0) | ((portdata->dtr_state) ? TIOCM_DTR : 0) | ((portdata->cts_state) ? TIOCM_CTS : 0) | ((portdata->dsr_state) ? TIOCM_DSR : 0) | ((portdata->dcd_state) ? TIOCM_CAR : 0) | ((portdata->ri_state) ? TIOCM_RNG : 0); return value; } EXPORT_SYMBOL(usb_wwan_tiocmget); int usb_wwan_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; portdata = usb_get_serial_port_data(port); intfdata = usb_get_serial_data(port->serial); if (!intfdata->use_send_setup) return -EINVAL; /* FIXME: what locks portdata fields ? */ if (set & TIOCM_RTS) portdata->rts_state = 1; if (set & TIOCM_DTR) portdata->dtr_state = 1; if (clear & TIOCM_RTS) portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; return usb_wwan_send_setup(port); } EXPORT_SYMBOL(usb_wwan_tiocmset); int usb_wwan_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count) { struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; int i; int left, todo; struct urb *this_urb = NULL; /* spurious */ int err; unsigned long flags; portdata = usb_get_serial_port_data(port); intfdata = usb_get_serial_data(port->serial); dev_dbg(&port->dev, "%s: write (%d chars)\n", __func__, count); left = count; for (i = 0; left > 0 && i < N_OUT_URB; i++) { todo = left; if (todo > OUT_BUFLEN) todo = OUT_BUFLEN; this_urb = portdata->out_urbs[i]; if (test_and_set_bit(i, &portdata->out_busy)) { if (time_before(jiffies, portdata->tx_start_time[i] + 10 * HZ)) continue; usb_unlink_urb(this_urb); continue; } dev_dbg(&port->dev, "%s: endpoint %d buf %d\n", __func__, usb_pipeendpoint(this_urb->pipe), i); err = usb_autopm_get_interface_async(port->serial->interface); if (err < 0) { clear_bit(i, &portdata->out_busy); break; } /* send the data */ memcpy(this_urb->transfer_buffer, buf, todo); this_urb->transfer_buffer_length = todo; spin_lock_irqsave(&intfdata->susp_lock, flags); if (intfdata->suspended) { usb_anchor_urb(this_urb, &portdata->delayed); spin_unlock_irqrestore(&intfdata->susp_lock, flags); } else { intfdata->in_flight++; spin_unlock_irqrestore(&intfdata->susp_lock, flags); err = usb_submit_urb(this_urb, GFP_ATOMIC); if (err) { dev_err(&port->dev, "%s: submit urb %d failed: %d\n", __func__, i, err); clear_bit(i, &portdata->out_busy); spin_lock_irqsave(&intfdata->susp_lock, flags); intfdata->in_flight--; spin_unlock_irqrestore(&intfdata->susp_lock, flags); usb_autopm_put_interface_async(port->serial->interface); break; } } portdata->tx_start_time[i] = jiffies; buf += todo; left -= todo; } count -= left; dev_dbg(&port->dev, "%s: wrote (did %d)\n", __func__, count); return count; } EXPORT_SYMBOL(usb_wwan_write); static void usb_wwan_indat_callback(struct urb *urb) { int err; int endpoint; struct usb_serial_port *port; struct device *dev; unsigned char *data = urb->transfer_buffer; int status = urb->status; endpoint = usb_pipeendpoint(urb->pipe); port = urb->context; dev = &port->dev; if (status) { dev_dbg(dev, "%s: nonzero status: %d on endpoint %02x.\n", __func__, status, endpoint); /* don't resubmit on fatal errors */ if (status == -ESHUTDOWN || status == -ENOENT) return; } else { if (urb->actual_length) { tty_insert_flip_string(&port->port, data, urb->actual_length); tty_flip_buffer_push(&port->port); } else dev_dbg(dev, "%s: empty read urb received\n", __func__); } /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { if (err != -EPERM && err != -ENODEV) { dev_err(dev, "%s: resubmit read urb failed. (%d)\n", __func__, err); /* busy also in error unless we are killed */ usb_mark_last_busy(port->serial->dev); } } else { usb_mark_last_busy(port->serial->dev); } } static void usb_wwan_outdat_callback(struct urb *urb) { struct usb_serial_port *port; struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; unsigned long flags; int i; port = urb->context; intfdata = usb_get_serial_data(port->serial); usb_serial_port_softint(port); usb_autopm_put_interface_async(port->serial->interface); portdata = usb_get_serial_port_data(port); spin_lock_irqsave(&intfdata->susp_lock, flags); intfdata->in_flight--; spin_unlock_irqrestore(&intfdata->susp_lock, flags); for (i = 0; i < N_OUT_URB; ++i) { if (portdata->out_urbs[i] == urb) { smp_mb__before_atomic(); clear_bit(i, &portdata->out_busy); break; } } } unsigned int usb_wwan_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_wwan_port_private *portdata; int i; unsigned int data_len = 0; struct urb *this_urb; portdata = usb_get_serial_port_data(port); for (i = 0; i < N_OUT_URB; i++) { this_urb = portdata->out_urbs[i]; if (this_urb && !test_bit(i, &portdata->out_busy)) data_len += OUT_BUFLEN; } dev_dbg(&port->dev, "%s: %u\n", __func__, data_len); return data_len; } EXPORT_SYMBOL(usb_wwan_write_room); unsigned int usb_wwan_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_wwan_port_private *portdata; int i; unsigned int data_len = 0; struct urb *this_urb; portdata = usb_get_serial_port_data(port); for (i = 0; i < N_OUT_URB; i++) { this_urb = portdata->out_urbs[i]; /* FIXME: This locking is insufficient as this_urb may go unused during the test */ if (this_urb && test_bit(i, &portdata->out_busy)) data_len += this_urb->transfer_buffer_length; } dev_dbg(&port->dev, "%s: %u\n", __func__, data_len); return data_len; } EXPORT_SYMBOL(usb_wwan_chars_in_buffer); int usb_wwan_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; struct usb_serial *serial = port->serial; int i, err; struct urb *urb; portdata = usb_get_serial_port_data(port); intfdata = usb_get_serial_data(serial); if (port->interrupt_in_urb) { err = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (err) { dev_err(&port->dev, "%s: submit int urb failed: %d\n", __func__, err); } } /* Start reading from the IN endpoint */ for (i = 0; i < N_IN_URB; i++) { urb = portdata->in_urbs[i]; if (!urb) continue; err = usb_submit_urb(urb, GFP_KERNEL); if (err) { dev_err(&port->dev, "%s: submit read urb %d failed: %d\n", __func__, i, err); } } spin_lock_irq(&intfdata->susp_lock); if (++intfdata->open_ports == 1) serial->interface->needs_remote_wakeup = 1; spin_unlock_irq(&intfdata->susp_lock); /* this balances a get in the generic USB serial code */ usb_autopm_put_interface(serial->interface); return 0; } EXPORT_SYMBOL(usb_wwan_open); static void unbusy_queued_urb(struct urb *urb, struct usb_wwan_port_private *portdata) { int i; for (i = 0; i < N_OUT_URB; i++) { if (urb == portdata->out_urbs[i]) { clear_bit(i, &portdata->out_busy); break; } } } void usb_wwan_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct urb *urb; portdata = usb_get_serial_port_data(port); /* * Need to take susp_lock to make sure port is not already being * resumed, but no need to hold it due to the tty-port initialized * flag. */ spin_lock_irq(&intfdata->susp_lock); if (--intfdata->open_ports == 0) serial->interface->needs_remote_wakeup = 0; spin_unlock_irq(&intfdata->susp_lock); for (;;) { urb = usb_get_from_anchor(&portdata->delayed); if (!urb) break; unbusy_queued_urb(urb, portdata); usb_autopm_put_interface_async(serial->interface); } for (i = 0; i < N_IN_URB; i++) usb_kill_urb(portdata->in_urbs[i]); for (i = 0; i < N_OUT_URB; i++) usb_kill_urb(portdata->out_urbs[i]); usb_kill_urb(port->interrupt_in_urb); usb_autopm_get_interface_no_resume(serial->interface); } EXPORT_SYMBOL(usb_wwan_close); static struct urb *usb_wwan_setup_urb(struct usb_serial_port *port, int endpoint, int dir, void *ctx, char *buf, int len, void (*callback) (struct urb *)) { struct usb_serial *serial = port->serial; struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct urb *urb; urb = usb_alloc_urb(0, GFP_KERNEL); /* No ISO */ if (!urb) return NULL; usb_fill_bulk_urb(urb, serial->dev, usb_sndbulkpipe(serial->dev, endpoint) | dir, buf, len, callback, ctx); if (intfdata->use_zlp && dir == USB_DIR_OUT) urb->transfer_flags |= URB_ZERO_PACKET; return urb; } int usb_wwan_port_probe(struct usb_serial_port *port) { struct usb_wwan_port_private *portdata; struct urb *urb; u8 *buffer; int i; if (!port->bulk_in_size || !port->bulk_out_size) return -ENODEV; portdata = kzalloc(sizeof(*portdata), GFP_KERNEL); if (!portdata) return -ENOMEM; init_usb_anchor(&portdata->delayed); for (i = 0; i < N_IN_URB; i++) { buffer = (u8 *)__get_free_page(GFP_KERNEL); if (!buffer) goto bail_out_error; portdata->in_buffer[i] = buffer; urb = usb_wwan_setup_urb(port, port->bulk_in_endpointAddress, USB_DIR_IN, port, buffer, IN_BUFLEN, usb_wwan_indat_callback); portdata->in_urbs[i] = urb; } for (i = 0; i < N_OUT_URB; i++) { buffer = kmalloc(OUT_BUFLEN, GFP_KERNEL); if (!buffer) goto bail_out_error2; portdata->out_buffer[i] = buffer; urb = usb_wwan_setup_urb(port, port->bulk_out_endpointAddress, USB_DIR_OUT, port, buffer, OUT_BUFLEN, usb_wwan_outdat_callback); portdata->out_urbs[i] = urb; } usb_set_serial_port_data(port, portdata); return 0; bail_out_error2: for (i = 0; i < N_OUT_URB; i++) { usb_free_urb(portdata->out_urbs[i]); kfree(portdata->out_buffer[i]); } bail_out_error: for (i = 0; i < N_IN_URB; i++) { usb_free_urb(portdata->in_urbs[i]); free_page((unsigned long)portdata->in_buffer[i]); } kfree(portdata); return -ENOMEM; } EXPORT_SYMBOL_GPL(usb_wwan_port_probe); void usb_wwan_port_remove(struct usb_serial_port *port) { int i; struct usb_wwan_port_private *portdata; portdata = usb_get_serial_port_data(port); usb_set_serial_port_data(port, NULL); for (i = 0; i < N_IN_URB; i++) { usb_free_urb(portdata->in_urbs[i]); free_page((unsigned long)portdata->in_buffer[i]); } for (i = 0; i < N_OUT_URB; i++) { usb_free_urb(portdata->out_urbs[i]); kfree(portdata->out_buffer[i]); } kfree(portdata); } EXPORT_SYMBOL(usb_wwan_port_remove); #ifdef CONFIG_PM static void stop_urbs(struct usb_serial *serial) { int i, j; struct usb_serial_port *port; struct usb_wwan_port_private *portdata; for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; portdata = usb_get_serial_port_data(port); if (!portdata) continue; for (j = 0; j < N_IN_URB; j++) usb_kill_urb(portdata->in_urbs[j]); for (j = 0; j < N_OUT_URB; j++) usb_kill_urb(portdata->out_urbs[j]); usb_kill_urb(port->interrupt_in_urb); } } int usb_wwan_suspend(struct usb_serial *serial, pm_message_t message) { struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); spin_lock_irq(&intfdata->susp_lock); if (PMSG_IS_AUTO(message)) { if (intfdata->in_flight) { spin_unlock_irq(&intfdata->susp_lock); return -EBUSY; } } intfdata->suspended = 1; spin_unlock_irq(&intfdata->susp_lock); stop_urbs(serial); return 0; } EXPORT_SYMBOL(usb_wwan_suspend); /* Caller must hold susp_lock. */ static int usb_wwan_submit_delayed_urbs(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct usb_wwan_intf_private *data = usb_get_serial_data(serial); struct usb_wwan_port_private *portdata; struct urb *urb; int err_count = 0; int err; portdata = usb_get_serial_port_data(port); for (;;) { urb = usb_get_from_anchor(&portdata->delayed); if (!urb) break; err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { dev_err(&port->dev, "%s: submit urb failed: %d\n", __func__, err); err_count++; unbusy_queued_urb(urb, portdata); usb_autopm_put_interface_async(serial->interface); continue; } data->in_flight++; } if (err_count) return -EIO; return 0; } int usb_wwan_resume(struct usb_serial *serial) { int i, j; struct usb_serial_port *port; struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct usb_wwan_port_private *portdata; struct urb *urb; int err; int err_count = 0; spin_lock_irq(&intfdata->susp_lock); for (i = 0; i < serial->num_ports; i++) { port = serial->port[i]; if (!tty_port_initialized(&port->port)) continue; portdata = usb_get_serial_port_data(port); if (port->interrupt_in_urb) { err = usb_submit_urb(port->interrupt_in_urb, GFP_ATOMIC); if (err) { dev_err(&port->dev, "%s: submit int urb failed: %d\n", __func__, err); err_count++; } } err = usb_wwan_submit_delayed_urbs(port); if (err) err_count++; for (j = 0; j < N_IN_URB; j++) { urb = portdata->in_urbs[j]; err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { dev_err(&port->dev, "%s: submit read urb %d failed: %d\n", __func__, i, err); err_count++; } } } intfdata->suspended = 0; spin_unlock_irq(&intfdata->susp_lock); if (err_count) return -EIO; return 0; } EXPORT_SYMBOL(usb_wwan_resume); #endif MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL v2");
2 2 1 1 1 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 // SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** ******************************************************************************* ******************************************************************************/ #include <linux/module.h> #include "dlm_internal.h" #include "lockspace.h" #include "member.h" #include "recoverd.h" #include "dir.h" #include "midcomms.h" #include "config.h" #include "memory.h" #include "lock.h" #include "recover.h" #include "requestqueue.h" #include "user.h" #include "ast.h" static int ls_count; static struct mutex ls_lock; static struct list_head lslist; static spinlock_t lslist_lock; static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len) { ssize_t ret = len; int n; int rc = kstrtoint(buf, 0, &n); if (rc) return rc; ls = dlm_find_lockspace_local(ls); if (!ls) return -EINVAL; switch (n) { case 0: dlm_ls_stop(ls); break; case 1: dlm_ls_start(ls); break; default: ret = -EINVAL; } dlm_put_lockspace(ls); return ret; } static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len) { int rc = kstrtoint(buf, 0, &ls->ls_uevent_result); if (rc) return rc; set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags); wake_up(&ls->ls_uevent_wait); return len; } static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf) { return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id); } static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len) { int rc = kstrtouint(buf, 0, &ls->ls_global_id); if (rc) return rc; return len; } static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf) { return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls)); } static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len) { int val; int rc = kstrtoint(buf, 0, &val); if (rc) return rc; if (val == 1) set_bit(LSFL_NODIR, &ls->ls_flags); return len; } static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf) { uint32_t status = dlm_recover_status(ls); return snprintf(buf, PAGE_SIZE, "%x\n", status); } static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf) { return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid); } struct dlm_attr { struct attribute attr; ssize_t (*show)(struct dlm_ls *, char *); ssize_t (*store)(struct dlm_ls *, const char *, size_t); }; static struct dlm_attr dlm_attr_control = { .attr = {.name = "control", .mode = S_IWUSR}, .store = dlm_control_store }; static struct dlm_attr dlm_attr_event = { .attr = {.name = "event_done", .mode = S_IWUSR}, .store = dlm_event_store }; static struct dlm_attr dlm_attr_id = { .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR}, .show = dlm_id_show, .store = dlm_id_store }; static struct dlm_attr dlm_attr_nodir = { .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR}, .show = dlm_nodir_show, .store = dlm_nodir_store }; static struct dlm_attr dlm_attr_recover_status = { .attr = {.name = "recover_status", .mode = S_IRUGO}, .show = dlm_recover_status_show }; static struct dlm_attr dlm_attr_recover_nodeid = { .attr = {.name = "recover_nodeid", .mode = S_IRUGO}, .show = dlm_recover_nodeid_show }; static struct attribute *dlm_attrs[] = { &dlm_attr_control.attr, &dlm_attr_event.attr, &dlm_attr_id.attr, &dlm_attr_nodir.attr, &dlm_attr_recover_status.attr, &dlm_attr_recover_nodeid.attr, NULL, }; ATTRIBUTE_GROUPS(dlm); static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); struct dlm_attr *a = container_of(attr, struct dlm_attr, attr); return a->show ? a->show(ls, buf) : 0; } static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len) { struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); struct dlm_attr *a = container_of(attr, struct dlm_attr, attr); return a->store ? a->store(ls, buf, len) : len; } static const struct sysfs_ops dlm_attr_ops = { .show = dlm_attr_show, .store = dlm_attr_store, }; static struct kobj_type dlm_ktype = { .default_groups = dlm_groups, .sysfs_ops = &dlm_attr_ops, }; static struct kset *dlm_kset; static int do_uevent(struct dlm_ls *ls, int in, unsigned int release_recover) { char message[512] = {}; char *envp[] = { message, NULL }; if (in) { kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE); } else { snprintf(message, 511, "RELEASE_RECOVER=%u", release_recover); kobject_uevent_env(&ls->ls_kobj, KOBJ_OFFLINE, envp); } log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving"); /* dlm_controld will see the uevent, do the necessary group management and then write to sysfs to wake us */ wait_event(ls->ls_uevent_wait, test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); log_rinfo(ls, "group event done %d", ls->ls_uevent_result); return ls->ls_uevent_result; } static int dlm_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name); return 0; } static const struct kset_uevent_ops dlm_uevent_ops = { .uevent = dlm_uevent, }; int __init dlm_lockspace_init(void) { ls_count = 0; mutex_init(&ls_lock); INIT_LIST_HEAD(&lslist); spin_lock_init(&lslist_lock); dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj); if (!dlm_kset) { printk(KERN_WARNING "%s: can not create kset\n", __func__); return -ENOMEM; } return 0; } void dlm_lockspace_exit(void) { kset_unregister(dlm_kset); } struct dlm_ls *dlm_find_lockspace_global(uint32_t id) { struct dlm_ls *ls; spin_lock_bh(&lslist_lock); list_for_each_entry(ls, &lslist, ls_list) { if (ls->ls_global_id == id) { atomic_inc(&ls->ls_count); goto out; } } ls = NULL; out: spin_unlock_bh(&lslist_lock); return ls; } struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace) { struct dlm_ls *ls = lockspace; atomic_inc(&ls->ls_count); return ls; } struct dlm_ls *dlm_find_lockspace_device(int minor) { struct dlm_ls *ls; spin_lock_bh(&lslist_lock); list_for_each_entry(ls, &lslist, ls_list) { if (ls->ls_device.minor == minor) { atomic_inc(&ls->ls_count); goto out; } } ls = NULL; out: spin_unlock_bh(&lslist_lock); return ls; } void dlm_put_lockspace(struct dlm_ls *ls) { if (atomic_dec_and_test(&ls->ls_count)) wake_up(&ls->ls_count_wait); } static void remove_lockspace(struct dlm_ls *ls) { retry: wait_event(ls->ls_count_wait, atomic_read(&ls->ls_count) == 0); spin_lock_bh(&lslist_lock); if (atomic_read(&ls->ls_count) != 0) { spin_unlock_bh(&lslist_lock); goto retry; } WARN_ON(ls->ls_create_count != 0); list_del(&ls->ls_list); spin_unlock_bh(&lslist_lock); } static int threads_start(void) { int error; /* Thread for sending/receiving messages for all lockspace's */ error = dlm_midcomms_start(); if (error) log_print("cannot start dlm midcomms %d", error); return error; } static int lkb_idr_free(struct dlm_lkb *lkb) { if (lkb->lkb_lvbptr && test_bit(DLM_IFL_MSTCPY_BIT, &lkb->lkb_iflags)) dlm_free_lvb(lkb->lkb_lvbptr); dlm_free_lkb(lkb); return 0; } static void rhash_free_rsb(void *ptr, void *arg) { struct dlm_rsb *rsb = ptr; dlm_free_rsb(rsb); } static void free_lockspace(struct work_struct *work) { struct dlm_ls *ls = container_of(work, struct dlm_ls, ls_free_work); struct dlm_lkb *lkb; unsigned long id; /* * Free all lkb's in xa */ xa_for_each(&ls->ls_lkbxa, id, lkb) { lkb_idr_free(lkb); } xa_destroy(&ls->ls_lkbxa); /* * Free all rsb's on rsbtbl */ rhashtable_free_and_destroy(&ls->ls_rsbtbl, rhash_free_rsb, NULL); kfree(ls); } static int new_lockspace(const char *name, const char *cluster, uint32_t flags, int lvblen, const struct dlm_lockspace_ops *ops, void *ops_arg, int *ops_result, dlm_lockspace_t **lockspace) { struct dlm_ls *ls; int namelen = strlen(name); int error; if (namelen > DLM_LOCKSPACE_LEN || namelen == 0) return -EINVAL; if (lvblen % 8) return -EINVAL; if (!try_module_get(THIS_MODULE)) return -EINVAL; if (!dlm_user_daemon_available()) { log_print("dlm user daemon not available"); error = -EUNATCH; goto out; } if (ops && ops_result) { if (!dlm_config.ci_recover_callbacks) *ops_result = -EOPNOTSUPP; else *ops_result = 0; } if (!cluster) log_print("dlm cluster name '%s' is being used without an application provided cluster name", dlm_config.ci_cluster_name); if (dlm_config.ci_recover_callbacks && cluster && strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) { log_print("dlm cluster name '%s' does not match " "the application cluster name '%s'", dlm_config.ci_cluster_name, cluster); error = -EBADR; goto out; } error = 0; spin_lock_bh(&lslist_lock); list_for_each_entry(ls, &lslist, ls_list) { WARN_ON(ls->ls_create_count <= 0); if (ls->ls_namelen != namelen) continue; if (memcmp(ls->ls_name, name, namelen)) continue; if (flags & DLM_LSFL_NEWEXCL) { error = -EEXIST; break; } ls->ls_create_count++; *lockspace = ls; error = 1; break; } spin_unlock_bh(&lslist_lock); if (error) goto out; error = -ENOMEM; ls = kzalloc(sizeof(*ls), GFP_NOFS); if (!ls) goto out; memcpy(ls->ls_name, name, namelen); ls->ls_namelen = namelen; ls->ls_lvblen = lvblen; atomic_set(&ls->ls_count, 0); init_waitqueue_head(&ls->ls_count_wait); ls->ls_flags = 0; if (ops && dlm_config.ci_recover_callbacks) { ls->ls_ops = ops; ls->ls_ops_arg = ops_arg; } if (flags & DLM_LSFL_SOFTIRQ) set_bit(LSFL_SOFTIRQ, &ls->ls_flags); /* ls_exflags are forced to match among nodes, and we don't * need to require all nodes to have some flags set */ ls->ls_exflags = (flags & ~(DLM_LSFL_FS | DLM_LSFL_NEWEXCL | DLM_LSFL_SOFTIRQ)); INIT_LIST_HEAD(&ls->ls_slow_inactive); INIT_LIST_HEAD(&ls->ls_slow_active); rwlock_init(&ls->ls_rsbtbl_lock); error = rhashtable_init(&ls->ls_rsbtbl, &dlm_rhash_rsb_params); if (error) goto out_lsfree; xa_init_flags(&ls->ls_lkbxa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_BH); rwlock_init(&ls->ls_lkbxa_lock); INIT_LIST_HEAD(&ls->ls_waiters); spin_lock_init(&ls->ls_waiters_lock); INIT_LIST_HEAD(&ls->ls_orphans); spin_lock_init(&ls->ls_orphans_lock); INIT_LIST_HEAD(&ls->ls_nodes); INIT_LIST_HEAD(&ls->ls_nodes_gone); ls->ls_num_nodes = 0; ls->ls_low_nodeid = 0; ls->ls_total_weight = 0; ls->ls_node_array = NULL; memset(&ls->ls_local_rsb, 0, sizeof(struct dlm_rsb)); ls->ls_local_rsb.res_ls = ls; ls->ls_debug_rsb_dentry = NULL; ls->ls_debug_waiters_dentry = NULL; init_waitqueue_head(&ls->ls_uevent_wait); ls->ls_uevent_result = 0; init_completion(&ls->ls_recovery_done); ls->ls_recovery_result = -1; spin_lock_init(&ls->ls_cb_lock); INIT_LIST_HEAD(&ls->ls_cb_delay); INIT_WORK(&ls->ls_free_work, free_lockspace); ls->ls_recoverd_task = NULL; mutex_init(&ls->ls_recoverd_active); spin_lock_init(&ls->ls_recover_lock); spin_lock_init(&ls->ls_rcom_spin); get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t)); ls->ls_recover_status = 0; ls->ls_recover_seq = get_random_u64(); ls->ls_recover_args = NULL; init_rwsem(&ls->ls_in_recovery); rwlock_init(&ls->ls_recv_active); INIT_LIST_HEAD(&ls->ls_requestqueue); rwlock_init(&ls->ls_requestqueue_lock); spin_lock_init(&ls->ls_clear_proc_locks); /* Due backwards compatibility with 3.1 we need to use maximum * possible dlm message size to be sure the message will fit and * not having out of bounds issues. However on sending side 3.2 * might send less. */ ls->ls_recover_buf = kmalloc(DLM_MAX_SOCKET_BUFSIZE, GFP_NOFS); if (!ls->ls_recover_buf) { error = -ENOMEM; goto out_lkbxa; } ls->ls_slot = 0; ls->ls_num_slots = 0; ls->ls_slots_size = 0; ls->ls_slots = NULL; INIT_LIST_HEAD(&ls->ls_recover_list); spin_lock_init(&ls->ls_recover_list_lock); xa_init_flags(&ls->ls_recover_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_BH); spin_lock_init(&ls->ls_recover_xa_lock); ls->ls_recover_list_count = 0; init_waitqueue_head(&ls->ls_wait_general); INIT_LIST_HEAD(&ls->ls_masters_list); rwlock_init(&ls->ls_masters_lock); INIT_LIST_HEAD(&ls->ls_dir_dump_list); rwlock_init(&ls->ls_dir_dump_lock); INIT_LIST_HEAD(&ls->ls_scan_list); spin_lock_init(&ls->ls_scan_lock); timer_setup(&ls->ls_scan_timer, dlm_rsb_scan, TIMER_DEFERRABLE); spin_lock_bh(&lslist_lock); ls->ls_create_count = 1; list_add(&ls->ls_list, &lslist); spin_unlock_bh(&lslist_lock); if (flags & DLM_LSFL_FS) set_bit(LSFL_FS, &ls->ls_flags); error = dlm_callback_start(ls); if (error) { log_error(ls, "can't start dlm_callback %d", error); goto out_delist; } init_waitqueue_head(&ls->ls_recover_lock_wait); /* * Once started, dlm_recoverd first looks for ls in lslist, then * initializes ls_in_recovery as locked in "down" mode. We need * to wait for the wakeup from dlm_recoverd because in_recovery * has to start out in down mode. */ error = dlm_recoverd_start(ls); if (error) { log_error(ls, "can't start dlm_recoverd %d", error); goto out_callback; } wait_event(ls->ls_recover_lock_wait, test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); ls->ls_kobj.kset = dlm_kset; error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, "%s", ls->ls_name); if (error) goto out_recoverd; kobject_uevent(&ls->ls_kobj, KOBJ_ADD); /* This uevent triggers dlm_controld in userspace to add us to the group of nodes that are members of this lockspace (managed by the cluster infrastructure.) Once it's done that, it tells us who the current lockspace members are (via configfs) and then tells the lockspace to start running (via sysfs) in dlm_ls_start(). */ error = do_uevent(ls, 1, 0); if (error < 0) goto out_recoverd; /* wait until recovery is successful or failed */ wait_for_completion(&ls->ls_recovery_done); error = ls->ls_recovery_result; if (error) goto out_members; dlm_create_debug_file(ls); log_rinfo(ls, "join complete"); *lockspace = ls; return 0; out_members: do_uevent(ls, 0, 0); dlm_clear_members(ls); kfree(ls->ls_node_array); out_recoverd: dlm_recoverd_stop(ls); out_callback: dlm_callback_stop(ls); out_delist: spin_lock_bh(&lslist_lock); list_del(&ls->ls_list); spin_unlock_bh(&lslist_lock); xa_destroy(&ls->ls_recover_xa); kfree(ls->ls_recover_buf); out_lkbxa: xa_destroy(&ls->ls_lkbxa); rhashtable_destroy(&ls->ls_rsbtbl); out_lsfree: kobject_put(&ls->ls_kobj); kfree(ls); out: module_put(THIS_MODULE); return error; } static int __dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags, int lvblen, const struct dlm_lockspace_ops *ops, void *ops_arg, int *ops_result, dlm_lockspace_t **lockspace) { int error = 0; mutex_lock(&ls_lock); if (!ls_count) error = threads_start(); if (error) goto out; error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg, ops_result, lockspace); if (!error) ls_count++; if (error > 0) error = 0; if (!ls_count) { dlm_midcomms_shutdown(); dlm_midcomms_stop(); } out: mutex_unlock(&ls_lock); return error; } int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags, int lvblen, const struct dlm_lockspace_ops *ops, void *ops_arg, int *ops_result, dlm_lockspace_t **lockspace) { return __dlm_new_lockspace(name, cluster, flags | DLM_LSFL_FS, lvblen, ops, ops_arg, ops_result, lockspace); } int dlm_new_user_lockspace(const char *name, const char *cluster, uint32_t flags, int lvblen, const struct dlm_lockspace_ops *ops, void *ops_arg, int *ops_result, dlm_lockspace_t **lockspace) { if (flags & DLM_LSFL_SOFTIRQ) return -EINVAL; return __dlm_new_lockspace(name, cluster, flags, lvblen, ops, ops_arg, ops_result, lockspace); } /* NOTE: We check the lkbxa here rather than the resource table. This is because there may be LKBs queued as ASTs that have been unlinked from their RSBs and are pending deletion once the AST has been delivered */ static int lockspace_busy(struct dlm_ls *ls, unsigned int release_option) { struct dlm_lkb *lkb; unsigned long id; int rv = 0; read_lock_bh(&ls->ls_lkbxa_lock); if (release_option == DLM_RELEASE_NO_LOCKS) { xa_for_each(&ls->ls_lkbxa, id, lkb) { rv = 1; break; } } else if (release_option == DLM_RELEASE_UNUSED) { /* TODO: handle this UNUSED option as NO_LOCKS in later patch */ xa_for_each(&ls->ls_lkbxa, id, lkb) { if (lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV) { rv = 1; break; } } } else { rv = 0; } read_unlock_bh(&ls->ls_lkbxa_lock); return rv; } static int release_lockspace(struct dlm_ls *ls, unsigned int release_option) { int busy, rv; busy = lockspace_busy(ls, release_option); spin_lock_bh(&lslist_lock); if (ls->ls_create_count == 1) { if (busy) { rv = -EBUSY; } else { /* remove_lockspace takes ls off lslist */ ls->ls_create_count = 0; rv = 0; } } else if (ls->ls_create_count > 1) { rv = --ls->ls_create_count; } else { rv = -EINVAL; } spin_unlock_bh(&lslist_lock); if (rv) { log_debug(ls, "release_lockspace no remove %d", rv); return rv; } if (ls_count == 1) dlm_midcomms_version_wait(); dlm_device_deregister(ls); if (release_option != DLM_RELEASE_NO_EVENT && dlm_user_daemon_available()) do_uevent(ls, 0, (release_option == DLM_RELEASE_RECOVER)); dlm_recoverd_stop(ls); /* clear the LSFL_RUNNING flag to fast up * time_shutdown_sync(), we don't care anymore */ clear_bit(LSFL_RUNNING, &ls->ls_flags); timer_shutdown_sync(&ls->ls_scan_timer); if (ls_count == 1) { dlm_clear_members(ls); dlm_midcomms_shutdown(); } dlm_callback_stop(ls); remove_lockspace(ls); dlm_delete_debug_file(ls); kobject_put(&ls->ls_kobj); xa_destroy(&ls->ls_recover_xa); kfree(ls->ls_recover_buf); /* * Free structures on any other lists */ dlm_purge_requestqueue(ls); kfree(ls->ls_recover_args); dlm_clear_members(ls); dlm_clear_members_gone(ls); kfree(ls->ls_node_array); log_rinfo(ls, "%s final free", __func__); /* delayed free of data structures see free_lockspace() */ queue_work(dlm_wq, &ls->ls_free_work); module_put(THIS_MODULE); return 0; } /* * Called when a system has released all its locks and is not going to use the * lockspace any longer. We free everything we're managing for this lockspace. * Remaining nodes will go through the recovery process as if we'd died. The * lockspace must continue to function as usual, participating in recoveries, * until this returns. * * See DLM_RELEASE defines for release_option values and their meaning. */ int dlm_release_lockspace(void *lockspace, unsigned int release_option) { struct dlm_ls *ls; int error; if (release_option > __DLM_RELEASE_MAX) return -EINVAL; ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; dlm_put_lockspace(ls); mutex_lock(&ls_lock); error = release_lockspace(ls, release_option); if (!error) ls_count--; if (!ls_count) dlm_midcomms_stop(); mutex_unlock(&ls_lock); return error; } void dlm_stop_lockspaces(void) { struct dlm_ls *ls; int count; restart: count = 0; spin_lock_bh(&lslist_lock); list_for_each_entry(ls, &lslist, ls_list) { if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) { count++; continue; } spin_unlock_bh(&lslist_lock); log_error(ls, "no userland control daemon, stopping lockspace"); dlm_ls_stop(ls); goto restart; } spin_unlock_bh(&lslist_lock); if (count) log_print("dlm user daemon left %d lockspaces", count); }
1 1 1 1 2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 // SPDX-License-Identifier: GPL-2.0 /* * When connected to the machine, the Thrustmaster wheels appear as * a «generic» hid gamepad called "Thrustmaster FFB Wheel". * * When in this mode not every functionality of the wheel, like the force feedback, * are available. To enable all functionalities of a Thrustmaster wheel we have to send * to it a specific USB CONTROL request with a code different for each wheel. * * This driver tries to understand which model of Thrustmaster wheel the generic * "Thrustmaster FFB Wheel" really is and then sends the appropriate control code. * * Copyright (c) 2020-2021 Dario Pagani <dario.pagani.146+linuxk@gmail.com> * Copyright (c) 2020-2021 Kim Kuparinen <kimi.h.kuparinen@gmail.com> */ #include <linux/hid.h> #include <linux/usb.h> #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> /* * These interrupts are used to prevent a nasty crash when initializing the * T300RS. Used in thrustmaster_interrupts(). */ static const u8 setup_0[] = { 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const u8 setup_1[] = { 0x0a, 0x04, 0x90, 0x03, 0x00, 0x00, 0x00, 0x00 }; static const u8 setup_2[] = { 0x0a, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00 }; static const u8 setup_3[] = { 0x0a, 0x04, 0x12, 0x10, 0x00, 0x00, 0x00, 0x00 }; static const u8 setup_4[] = { 0x0a, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00 }; static const u8 *const setup_arr[] = { setup_0, setup_1, setup_2, setup_3, setup_4 }; static const unsigned int setup_arr_sizes[] = { ARRAY_SIZE(setup_0), ARRAY_SIZE(setup_1), ARRAY_SIZE(setup_2), ARRAY_SIZE(setup_3), ARRAY_SIZE(setup_4) }; /* * This struct contains for each type of * Thrustmaster wheel * * Note: The values are stored in the CPU * endianness, the USB protocols always use * little endian; the macro cpu_to_le[BIT]() * must be used when preparing USB packets * and vice-versa */ struct tm_wheel_info { uint16_t wheel_type; /* * See when the USB control out packet is prepared... * @TODO The TMX seems to require multiple control codes to switch. */ uint16_t switch_value; char const *const wheel_name; }; /* * Known wheels. * Note: TMX does not work as it requires 2 control packets */ static const struct tm_wheel_info tm_wheels_infos[] = { {0x0306, 0x0006, "Thrustmaster T150RS"}, {0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"}, {0x0206, 0x0005, "Thrustmaster T300RS"}, {0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"}, {0x020a, 0x0005, "Thrustmaster T300RS (Sparco R383 Mod)"}, {0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"}, {0x0002, 0x0002, "Thrustmaster T500RS"} //{0x0407, 0x0001, "Thrustmaster TMX"} }; static const uint8_t tm_wheels_infos_length = 7; /* * This structs contains (in little endian) the response data * of the wheel to the request 73 * * A sufficient research to understand what each field does is not * beign conducted yet. The position and meaning of fields are a * just a very optimistic guess based on instinct.... */ struct __packed tm_wheel_response { /* * Seems to be the type of packet * - 0x0049 if is data.a (15 bytes) * - 0x0047 if is data.b (7 bytes) */ uint16_t type; union { struct __packed { uint16_t field0; uint16_t field1; /* * Seems to be the model code of the wheel * Read table thrustmaster_wheels to values */ uint16_t model; uint16_t field2; uint16_t field3; uint16_t field4; uint16_t field5; } a; struct __packed { uint16_t field0; uint16_t field1; uint16_t model; } b; } data; }; struct tm_wheel { struct usb_device *usb_dev; struct urb *urb; struct usb_ctrlrequest *model_request; struct tm_wheel_response *response; struct usb_ctrlrequest *change_request; }; /* The control packet to send to wheel */ static const struct usb_ctrlrequest model_request = { .bRequestType = 0xc1, .bRequest = 73, .wValue = 0, .wIndex = 0, .wLength = cpu_to_le16(0x0010) }; static const struct usb_ctrlrequest change_request = { .bRequestType = 0x41, .bRequest = 83, .wValue = 0, // Will be filled by the driver .wIndex = 0, .wLength = 0 }; /* * On some setups initializing the T300RS crashes the kernel, * these interrupts fix that particular issue. So far they haven't caused any * adverse effects in other wheels. */ static void thrustmaster_interrupts(struct hid_device *hdev) { int ret, trans, i, b_ep; u8 *send_buf = kmalloc(256, GFP_KERNEL); struct usb_host_endpoint *ep; struct device *dev = &hdev->dev; struct usb_interface *usbif = to_usb_interface(dev->parent); struct usb_device *usbdev = interface_to_usbdev(usbif); if (!send_buf) { hid_err(hdev, "failed allocating send buffer\n"); return; } if (usbif->cur_altsetting->desc.bNumEndpoints < 2) { kfree(send_buf); hid_err(hdev, "Wrong number of endpoints?\n"); return; } ep = &usbif->cur_altsetting->endpoint[1]; b_ep = ep->desc.bEndpointAddress; /* Are the expected endpoints present? */ u8 ep_addr[2] = {b_ep, 0}; if (!usb_check_int_endpoints(usbif, ep_addr)) { kfree(send_buf); hid_err(hdev, "Unexpected non-int endpoint\n"); return; } for (i = 0; i < ARRAY_SIZE(setup_arr); ++i) { memcpy(send_buf, setup_arr[i], setup_arr_sizes[i]); ret = usb_interrupt_msg(usbdev, usb_sndintpipe(usbdev, b_ep), send_buf, setup_arr_sizes[i], &trans, USB_CTRL_SET_TIMEOUT); if (ret) { hid_err(hdev, "setup data couldn't be sent\n"); kfree(send_buf); return; } } kfree(send_buf); } static void thrustmaster_change_handler(struct urb *urb) { struct hid_device *hdev = urb->context; // The wheel seems to kill himself before answering the host and therefore is violating the USB protocol... if (urb->status == 0 || urb->status == -EPROTO || urb->status == -EPIPE) hid_info(hdev, "Success?! The wheel should have been initialized!\n"); else hid_warn(hdev, "URB to change wheel mode seems to have failed with error %d\n", urb->status); } /* * Called by the USB subsystem when the wheel responses to our request * to get [what it seems to be] the wheel's model. * * If the model id is recognized then we send an opportune USB CONTROL REQUEST * to switch the wheel to its full capabilities */ static void thrustmaster_model_handler(struct urb *urb) { struct hid_device *hdev = urb->context; struct tm_wheel *tm_wheel = hid_get_drvdata(hdev); uint16_t model = 0; int i, ret; const struct tm_wheel_info *twi = NULL; if (urb->status) { hid_err(hdev, "URB to get model id failed with error %d\n", urb->status); return; } if (tm_wheel->response->type == cpu_to_le16(0x49)) model = le16_to_cpu(tm_wheel->response->data.a.model); else if (tm_wheel->response->type == cpu_to_le16(0x47)) model = le16_to_cpu(tm_wheel->response->data.b.model); else { hid_err(hdev, "Unknown packet type 0x%x, unable to proceed further with wheel init\n", tm_wheel->response->type); return; } for (i = 0; i < tm_wheels_infos_length && !twi; i++) if (tm_wheels_infos[i].wheel_type == model) twi = tm_wheels_infos + i; if (twi) hid_info(hdev, "Wheel with model id 0x%x is a %s\n", model, twi->wheel_name); else { hid_err(hdev, "Unknown wheel's model id 0x%x, unable to proceed further with wheel init\n", model); return; } tm_wheel->change_request->wValue = cpu_to_le16(twi->switch_value); usb_fill_control_urb( tm_wheel->urb, tm_wheel->usb_dev, usb_sndctrlpipe(tm_wheel->usb_dev, 0), (char *)tm_wheel->change_request, NULL, 0, // We do not expect any response from the wheel thrustmaster_change_handler, hdev ); ret = usb_submit_urb(tm_wheel->urb, GFP_ATOMIC); if (ret) hid_err(hdev, "Error %d while submitting the change URB. I am unable to initialize this wheel...\n", ret); } static void thrustmaster_remove(struct hid_device *hdev) { struct tm_wheel *tm_wheel = hid_get_drvdata(hdev); usb_kill_urb(tm_wheel->urb); kfree(tm_wheel->change_request); kfree(tm_wheel->response); kfree(tm_wheel->model_request); usb_free_urb(tm_wheel->urb); kfree(tm_wheel); hid_hw_stop(hdev); } /* * Function called by HID when a hid Thrustmaster FFB wheel is connected to the host. * This function starts the hid dev, tries to allocate the tm_wheel data structure and * finally send an USB CONTROL REQUEST to the wheel to get [what it seems to be] its * model type. */ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret = 0; struct tm_wheel *tm_wheel = NULL; if (!hid_is_usb(hdev)) return -EINVAL; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed with error %d\n", ret); goto error0; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed with error %d\n", ret); goto error0; } // Now we allocate the tm_wheel tm_wheel = kzalloc(sizeof(struct tm_wheel), GFP_KERNEL); if (!tm_wheel) { ret = -ENOMEM; goto error1; } tm_wheel->urb = usb_alloc_urb(0, GFP_ATOMIC); if (!tm_wheel->urb) { ret = -ENOMEM; goto error2; } tm_wheel->model_request = kmemdup(&model_request, sizeof(struct usb_ctrlrequest), GFP_KERNEL); if (!tm_wheel->model_request) { ret = -ENOMEM; goto error3; } tm_wheel->response = kzalloc(sizeof(struct tm_wheel_response), GFP_KERNEL); if (!tm_wheel->response) { ret = -ENOMEM; goto error4; } tm_wheel->change_request = kmemdup(&change_request, sizeof(struct usb_ctrlrequest), GFP_KERNEL); if (!tm_wheel->change_request) { ret = -ENOMEM; goto error5; } tm_wheel->usb_dev = interface_to_usbdev(to_usb_interface(hdev->dev.parent)); hid_set_drvdata(hdev, tm_wheel); thrustmaster_interrupts(hdev); usb_fill_control_urb( tm_wheel->urb, tm_wheel->usb_dev, usb_rcvctrlpipe(tm_wheel->usb_dev, 0), (char *)tm_wheel->model_request, tm_wheel->response, sizeof(struct tm_wheel_response), thrustmaster_model_handler, hdev ); ret = usb_submit_urb(tm_wheel->urb, GFP_ATOMIC); if (ret) { hid_err(hdev, "Error %d while submitting the URB. I am unable to initialize this wheel...\n", ret); goto error6; } return ret; error6: kfree(tm_wheel->change_request); error5: kfree(tm_wheel->response); error4: kfree(tm_wheel->model_request); error3: usb_free_urb(tm_wheel->urb); error2: kfree(tm_wheel); error1: hid_hw_stop(hdev); error0: return ret; } static const struct hid_device_id thrustmaster_devices[] = { { HID_USB_DEVICE(0x044f, 0xb65d)}, {} }; MODULE_DEVICE_TABLE(hid, thrustmaster_devices); static struct hid_driver thrustmaster_driver = { .name = "hid-thrustmaster", .id_table = thrustmaster_devices, .probe = thrustmaster_probe, .remove = thrustmaster_remove, }; module_hid_driver(thrustmaster_driver); MODULE_AUTHOR("Dario Pagani <dario.pagani.146+linuxk@gmail.com>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Driver to initialize some steering wheel joysticks from Thrustmaster");
7 58 2 1 5 12 12 107 119 105 8 2 12 18 18 17 1 1 1 1 111 26 26 34 34 54 53 8 8 8 8 56 57 8 49 8 109 111 92 2 57 3 1 2 94 93 5 93 94 2 112 111 112 9 9 9 9 9 9 9 9 9 7 4 63 63 63 63 63 4 4 1 3 4 4 107 107 106 106 94 1 92 93 13 5 5 5 5 5 5 5 13 13 5 5 3 3 2 109 110 111 57 1 110 93 58 58 58 3 57 110 111 92 57 109 112 11 109 112 112 3 1 2 1 3 3 3 3 2 2 2 1 4 7 52 52 52 52 52 7 7 53 45 7 7 7 59 51 7 2 2 59 59 2 59 59 58 46 4 3 53 58 58 59 44 46 46 13 59 105 107 107 107 5 63 63 1 1 1 1 1 1 1 1 11 11 111 107 2 13 109 1 96 54 6 32 32 32 31 4 4 4 4 4 4 31 31 25 25 23 23 23 23 1 1 1 1 1 5 1 4 1 35 34 35 6 6 5 6 6 107 123 123 23 22 1 5 2 2 3 4 1 1 5 7 3 5 5 5 2 4 5 4 1 32 26 6 6 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 // SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * MMU support * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "irq.h" #include "ioapic.h" #include "mmu.h" #include "mmu_internal.h" #include "tdp_mmu.h" #include "x86.h" #include "kvm_cache_regs.h" #include "smm.h" #include "kvm_emulate.h" #include "page_track.h" #include "cpuid.h" #include "spte.h" #include <linux/kvm_host.h> #include <linux/types.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/moduleparam.h> #include <linux/export.h> #include <linux/swap.h> #include <linux/hugetlb.h> #include <linux/compiler.h> #include <linux/srcu.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/uaccess.h> #include <linux/hash.h> #include <linux/kern_levels.h> #include <linux/kstrtox.h> #include <linux/kthread.h> #include <linux/wordpart.h> #include <asm/page.h> #include <asm/memtype.h> #include <asm/cmpxchg.h> #include <asm/io.h> #include <asm/set_memory.h> #include <asm/spec-ctrl.h> #include <asm/vmx.h> #include "trace.h" static bool nx_hugepage_mitigation_hard_disabled; int __read_mostly nx_huge_pages = -1; static uint __read_mostly nx_huge_pages_recovery_period_ms; #ifdef CONFIG_PREEMPT_RT /* Recovery can cause latency spikes, disable it for PREEMPT_RT. */ static uint __read_mostly nx_huge_pages_recovery_ratio = 0; #else static uint __read_mostly nx_huge_pages_recovery_ratio = 60; #endif static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp); static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops nx_huge_pages_ops = { .set = set_nx_huge_pages, .get = get_nx_huge_pages, }; static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = { .set = set_nx_huge_pages_recovery_param, .get = param_get_uint, }; module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); __MODULE_PARM_TYPE(nx_huge_pages, "bool"); module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_param_ops, &nx_huge_pages_recovery_ratio, 0644); __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); module_param_cb(nx_huge_pages_recovery_period_ms, &nx_huge_pages_recovery_param_ops, &nx_huge_pages_recovery_period_ms, 0644); __MODULE_PARM_TYPE(nx_huge_pages_recovery_period_ms, "uint"); static bool __read_mostly force_flush_and_sync_on_reuse; module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644); /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: * 1. the guest-virtual to guest-physical * 2. while doing 1. it walks guest-physical to host-physical * If the hardware supports that we don't need to do shadow paging. */ bool tdp_enabled = false; static bool __ro_after_init tdp_mmu_allowed; #ifdef CONFIG_X86_64 bool __read_mostly tdp_mmu_enabled = true; module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444); EXPORT_SYMBOL_FOR_KVM_INTERNAL(tdp_mmu_enabled); #endif static int max_huge_page_level __read_mostly; static int tdp_root_level __read_mostly; static int max_tdp_level __read_mostly; #define PTE_PREFETCH_NUM 8 #include <trace/events/kvm.h> /* make pte_list_desc fit well in cache lines */ #define PTE_LIST_EXT 14 /* * struct pte_list_desc is the core data structure used to implement a custom * list for tracking a set of related SPTEs, e.g. all the SPTEs that map a * given GFN when used in the context of rmaps. Using a custom list allows KVM * to optimize for the common case where many GFNs will have at most a handful * of SPTEs pointing at them, i.e. allows packing multiple SPTEs into a small * memory footprint, which in turn improves runtime performance by exploiting * cache locality. * * A list is comprised of one or more pte_list_desc objects (descriptors). * Each individual descriptor stores up to PTE_LIST_EXT SPTEs. If a descriptor * is full and a new SPTEs needs to be added, a new descriptor is allocated and * becomes the head of the list. This means that by definitions, all tail * descriptors are full. * * Note, the meta data fields are deliberately placed at the start of the * structure to optimize the cacheline layout; accessing the descriptor will * touch only a single cacheline so long as @spte_count<=6 (or if only the * descriptors metadata is accessed). */ struct pte_list_desc { struct pte_list_desc *more; /* The number of PTEs stored in _this_ descriptor. */ u32 spte_count; /* The number of PTEs stored in all tails of this descriptor. */ u32 tail_count; u64 *sptes[PTE_LIST_EXT]; }; struct kvm_shadow_walk_iterator { u64 addr; hpa_t shadow_addr; u64 *sptep; int level; unsigned index; }; #define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker) \ for (shadow_walk_init_using_root(&(_walker), (_vcpu), \ (_root), (_addr)); \ shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) #define for_each_shadow_entry(_vcpu, _addr, _walker) \ for (shadow_walk_init(&(_walker), _vcpu, _addr); \ shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) #define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte) \ for (shadow_walk_init(&(_walker), _vcpu, _addr); \ shadow_walk_okay(&(_walker)) && \ ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \ __shadow_walk_next(&(_walker), spte)) static struct kmem_cache *pte_list_desc_cache; struct kmem_cache *mmu_page_header_cache; static void mmu_spte_set(u64 *sptep, u64 spte); struct kvm_mmu_role_regs { const unsigned long cr0; const unsigned long cr4; const u64 efer; }; #define CREATE_TRACE_POINTS #include "mmutrace.h" /* * Yes, lot's of underscores. They're a hint that you probably shouldn't be * reading from the role_regs. Once the root_role is constructed, it becomes * the single source of truth for the MMU's state. */ #define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag) \ static inline bool __maybe_unused \ ____is_##reg##_##name(const struct kvm_mmu_role_regs *regs) \ { \ return !!(regs->reg & flag); \ } BUILD_MMU_ROLE_REGS_ACCESSOR(cr0, pg, X86_CR0_PG); BUILD_MMU_ROLE_REGS_ACCESSOR(cr0, wp, X86_CR0_WP); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pse, X86_CR4_PSE); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pae, X86_CR4_PAE); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smep, X86_CR4_SMEP); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smap, X86_CR4_SMAP); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pke, X86_CR4_PKE); BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, la57, X86_CR4_LA57); BUILD_MMU_ROLE_REGS_ACCESSOR(efer, nx, EFER_NX); BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA); /* * The MMU itself (with a valid role) is the single source of truth for the * MMU. Do not use the regs used to build the MMU/role, nor the vCPU. The * regs don't account for dependencies, e.g. clearing CR4 bits if CR0.PG=1, * and the vCPU may be incorrect/irrelevant. */ #define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name) \ static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu) \ { \ return !!(mmu->cpu_role. base_or_ext . reg##_##name); \ } BUILD_MMU_ROLE_ACCESSOR(base, cr0, wp); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pse); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smep); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smap); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pke); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, la57); BUILD_MMU_ROLE_ACCESSOR(base, efer, nx); BUILD_MMU_ROLE_ACCESSOR(ext, efer, lma); static inline bool is_cr0_pg(struct kvm_mmu *mmu) { return mmu->cpu_role.base.level > 0; } static inline bool is_cr4_pae(struct kvm_mmu *mmu) { return !mmu->cpu_role.base.has_4_byte_gpte; } static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) { struct kvm_mmu_role_regs regs = { .cr0 = kvm_read_cr0_bits(vcpu, KVM_MMU_CR0_ROLE_BITS), .cr4 = kvm_read_cr4_bits(vcpu, KVM_MMU_CR4_ROLE_BITS), .efer = vcpu->arch.efer, }; return regs; } static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu) { return kvm_read_cr3(vcpu); } static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) return kvm_read_cr3(vcpu); return mmu->get_guest_pgd(vcpu); } static inline bool kvm_available_flush_remote_tlbs_range(void) { #if IS_ENABLED(CONFIG_HYPERV) return kvm_x86_ops.flush_remote_tlbs_range; #else return false; #endif } static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index); /* Flush the range of guest memory mapped by the given SPTE. */ static void kvm_flush_remote_tlbs_sptep(struct kvm *kvm, u64 *sptep) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); gfn_t gfn = kvm_mmu_page_get_gfn(sp, spte_index(sptep)); kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); } static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, unsigned int access) { u64 spte = make_mmio_spte(vcpu, gfn, access); trace_mark_mmio_spte(sptep, gfn, spte); mmu_spte_set(sptep, spte); } static gfn_t get_mmio_spte_gfn(u64 spte) { u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN) & shadow_nonpresent_or_rsvd_mask; return gpa >> PAGE_SHIFT; } static unsigned get_mmio_spte_access(u64 spte) { return spte & shadow_mmio_access_mask; } static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) { u64 kvm_gen, spte_gen, gen; gen = kvm_vcpu_memslots(vcpu)->generation; if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) return false; kvm_gen = gen & MMIO_SPTE_GEN_MASK; spte_gen = get_mmio_spte_generation(spte); trace_check_mmio_spte(spte, kvm_gen, spte_gen); return likely(kvm_gen == spte_gen); } static int is_cpuid_PSE36(void) { return 1; } #ifdef CONFIG_X86_64 static void __set_spte(u64 *sptep, u64 spte) { KVM_MMU_WARN_ON(is_ept_ve_possible(spte)); WRITE_ONCE(*sptep, spte); } static void __update_clear_spte_fast(u64 *sptep, u64 spte) { KVM_MMU_WARN_ON(is_ept_ve_possible(spte)); WRITE_ONCE(*sptep, spte); } static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) { KVM_MMU_WARN_ON(is_ept_ve_possible(spte)); return xchg(sptep, spte); } static u64 __get_spte_lockless(u64 *sptep) { return READ_ONCE(*sptep); } #else union split_spte { struct { u32 spte_low; u32 spte_high; }; u64 spte; }; static void count_spte_clear(u64 *sptep, u64 spte) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); if (is_shadow_present_pte(spte)) return; /* Ensure the spte is completely set before we increase the count */ smp_wmb(); sp->clear_spte_count++; } static void __set_spte(u64 *sptep, u64 spte) { union split_spte *ssptep, sspte; ssptep = (union split_spte *)sptep; sspte = (union split_spte)spte; ssptep->spte_high = sspte.spte_high; /* * If we map the spte from nonpresent to present, We should store * the high bits firstly, then set present bit, so cpu can not * fetch this spte while we are setting the spte. */ smp_wmb(); WRITE_ONCE(ssptep->spte_low, sspte.spte_low); } static void __update_clear_spte_fast(u64 *sptep, u64 spte) { union split_spte *ssptep, sspte; ssptep = (union split_spte *)sptep; sspte = (union split_spte)spte; WRITE_ONCE(ssptep->spte_low, sspte.spte_low); /* * If we map the spte from present to nonpresent, we should clear * present bit firstly to avoid vcpu fetch the old high bits. */ smp_wmb(); ssptep->spte_high = sspte.spte_high; count_spte_clear(sptep, spte); } static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) { union split_spte *ssptep, sspte, orig; ssptep = (union split_spte *)sptep; sspte = (union split_spte)spte; /* xchg acts as a barrier before the setting of the high bits */ orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); orig.spte_high = ssptep->spte_high; ssptep->spte_high = sspte.spte_high; count_spte_clear(sptep, spte); return orig.spte; } /* * The idea using the light way get the spte on x86_32 guest is from * gup_get_pte (mm/gup.c). * * An spte tlb flush may be pending, because they are coalesced and * we are running out of the MMU lock. Therefore * we need to protect against in-progress updates of the spte. * * Reading the spte while an update is in progress may get the old value * for the high part of the spte. The race is fine for a present->non-present * change (because the high part of the spte is ignored for non-present spte), * but for a present->present change we must reread the spte. * * All such changes are done in two steps (present->non-present and * non-present->present), hence it is enough to count the number of * present->non-present updates: if it changed while reading the spte, * we might have hit the race. This is done using clear_spte_count. */ static u64 __get_spte_lockless(u64 *sptep) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); union split_spte spte, *orig = (union split_spte *)sptep; int count; retry: count = sp->clear_spte_count; smp_rmb(); spte.spte_low = orig->spte_low; smp_rmb(); spte.spte_high = orig->spte_high; smp_rmb(); if (unlikely(spte.spte_low != orig->spte_low || count != sp->clear_spte_count)) goto retry; return spte.spte; } #endif /* Rules for using mmu_spte_set: * Set the sptep from nonpresent to present. * Note: the sptep being assigned *must* be either not present * or in a state where the hardware will not attempt to update * the spte. */ static void mmu_spte_set(u64 *sptep, u64 new_spte) { WARN_ON_ONCE(is_shadow_present_pte(*sptep)); __set_spte(sptep, new_spte); } /* Rules for using mmu_spte_update: * Update the state bits, it means the mapped pfn is not changed. * * Returns true if the TLB needs to be flushed */ static bool mmu_spte_update(u64 *sptep, u64 new_spte) { u64 old_spte = *sptep; WARN_ON_ONCE(!is_shadow_present_pte(new_spte)); check_spte_writable_invariants(new_spte); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); return false; } if (!spte_needs_atomic_update(old_spte)) __update_clear_spte_fast(sptep, new_spte); else old_spte = __update_clear_spte_slow(sptep, new_spte); WARN_ON_ONCE(!is_shadow_present_pte(old_spte) || spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); return leaf_spte_change_needs_tlb_flush(old_spte, new_spte); } /* * Rules for using mmu_spte_clear_track_bits: * It sets the sptep from present to nonpresent, and track the * state bits, it is used to clear the last level sptep. * Returns the old PTE. */ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) { u64 old_spte = *sptep; int level = sptep_to_sp(sptep)->role.level; if (!is_shadow_present_pte(old_spte) || !spte_needs_atomic_update(old_spte)) __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); else old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE); if (!is_shadow_present_pte(old_spte)) return old_spte; kvm_update_page_stats(kvm, level, -1); return old_spte; } /* * Rules for using mmu_spte_clear_no_track: * Directly clear spte without caring the state bits of sptep, * it is used to set the upper level spte. */ static void mmu_spte_clear_no_track(u64 *sptep) { __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); } static u64 mmu_spte_get_lockless(u64 *sptep) { return __get_spte_lockless(sptep); } static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu) { return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct; } static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) { if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_begin(); } else { /* * Prevent page table teardown by making any free-er wait during * kvm_flush_remote_tlbs() IPI to all active vcpus. */ local_irq_disable(); /* * Make sure a following spte read is not reordered ahead of the write * to vcpu->mode. */ smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); } } static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) { if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_end(); } else { /* * Make sure the write to vcpu->mode is not reordered in front of * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. */ smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); } } static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) { int r; /* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */ r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, 1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM); if (r) return r; if (kvm_has_mirrored_tdp(vcpu->kvm)) { r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_external_spt_cache, PT64_ROOT_MAX_LEVEL); if (r) return r; } r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache, PT64_ROOT_MAX_LEVEL); if (r) return r; if (maybe_indirect) { r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadowed_info_cache, PT64_ROOT_MAX_LEVEL); if (r) return r; } return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache, PT64_ROOT_MAX_LEVEL); } static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_external_spt_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); } static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) { kmem_cache_free(pte_list_desc_cache, pte_list_desc); } static bool sp_has_gptes(struct kvm_mmu_page *sp); static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) { if (sp->role.passthrough) return sp->gfn; if (sp->shadowed_translation) return sp->shadowed_translation[index] >> PAGE_SHIFT; return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS)); } /* * For leaf SPTEs, fetch the *guest* access permissions being shadowed. Note * that the SPTE itself may have a more constrained access permissions that * what the guest enforces. For example, a guest may create an executable * huge PTE but KVM may disallow execution to mitigate iTLB multihit. */ static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index) { if (sp->shadowed_translation) return sp->shadowed_translation[index] & ACC_ALL; /* * For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs, * KVM is not shadowing any guest page tables, so the "guest access * permissions" are just ACC_ALL. * * For direct SPs in indirect MMUs (shadow paging), i.e. when KVM * is shadowing a guest huge page with small pages, the guest access * permissions being shadowed are the access permissions of the huge * page. * * In both cases, sp->role.access contains the correct access bits. */ return sp->role.access; } static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, gfn_t gfn, unsigned int access) { if (sp->shadowed_translation) { sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access; return; } WARN_ONCE(access != kvm_mmu_page_get_access(sp, index), "access mismatch under %s page %llx (expected %u, got %u)\n", sp->role.passthrough ? "passthrough" : "direct", sp->gfn, kvm_mmu_page_get_access(sp, index), access); WARN_ONCE(gfn != kvm_mmu_page_get_gfn(sp, index), "gfn mismatch under %s page %llx (expected %llx, got %llx)\n", sp->role.passthrough ? "passthrough" : "direct", sp->gfn, kvm_mmu_page_get_gfn(sp, index), gfn); } static void kvm_mmu_page_set_access(struct kvm_mmu_page *sp, int index, unsigned int access) { gfn_t gfn = kvm_mmu_page_get_gfn(sp, index); kvm_mmu_page_set_translation(sp, index, gfn, access); } /* * Return the pointer to the large page information for a given gfn, * handling slots that are not large page aligned. */ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, const struct kvm_memory_slot *slot, int level) { unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); return &slot->arch.lpage_info[level - 2][idx]; } /* * The most significant bit in disallow_lpage tracks whether or not memory * attributes are mixed, i.e. not identical for all gfns at the current level. * The lower order bits are used to refcount other cases where a hugepage is * disallowed, e.g. if KVM has shadow a page table at the gfn. */ #define KVM_LPAGE_MIXED_FLAG BIT(31) static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot, gfn_t gfn, int count) { struct kvm_lpage_info *linfo; int old, i; for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { linfo = lpage_info_slot(gfn, slot, i); old = linfo->disallow_lpage; linfo->disallow_lpage += count; WARN_ON_ONCE((old ^ linfo->disallow_lpage) & KVM_LPAGE_MIXED_FLAG); } } void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn) { update_gfn_disallow_lpage_count(slot, gfn, 1); } void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn) { update_gfn_disallow_lpage_count(slot, gfn, -1); } static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; gfn_t gfn; kvm->arch.indirect_shadow_pages++; /* * Ensure indirect_shadow_pages is elevated prior to re-reading guest * child PTEs in FNAME(gpte_changed), i.e. guarantee either in-flight * emulated writes are visible before re-reading guest PTEs, or that * an emulated write will see the elevated count and acquire mmu_lock * to update SPTEs. Pairs with the smp_mb() in kvm_mmu_track_write(). */ smp_mb(); gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); /* the non-leaf shadow pages are keeping readonly. */ if (sp->role.level > PG_LEVEL_4K) return __kvm_write_track_add_gfn(kvm, slot, gfn); kvm_mmu_gfn_disallow_lpage(slot, gfn); if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K); } void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, enum kvm_mmu_type mmu_type) { /* * If it's possible to replace the shadow page with an NX huge page, * i.e. if the shadow page is the only thing currently preventing KVM * from using a huge page, add the shadow page to the list of "to be * zapped for NX recovery" pages. Note, the shadow page can already be * on the list if KVM is reusing an existing shadow page, i.e. if KVM * links a shadow page at multiple points. */ if (!list_empty(&sp->possible_nx_huge_page_link)) return; ++kvm->stat.nx_lpage_splits; ++kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages; list_add_tail(&sp->possible_nx_huge_page_link, &kvm->arch.possible_nx_huge_pages[mmu_type].pages); } static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, bool nx_huge_page_possible) { sp->nx_huge_page_disallowed = true; if (nx_huge_page_possible) track_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; gfn_t gfn; kvm->arch.indirect_shadow_pages--; gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); if (sp->role.level > PG_LEVEL_4K) return __kvm_write_track_remove_gfn(kvm, slot, gfn); kvm_mmu_gfn_allow_lpage(slot, gfn); } void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, enum kvm_mmu_type mmu_type) { if (list_empty(&sp->possible_nx_huge_page_link)) return; --kvm->stat.nx_lpage_splits; --kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages; list_del_init(&sp->possible_nx_huge_page_link); } static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { sp->nx_huge_page_disallowed = false; untrack_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU); } static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) { struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return NULL; if (no_dirty_log && kvm_slot_dirty_track_enabled(slot)) return NULL; return slot; } /* * About rmap_head encoding: * * If the bit zero of rmap_head->val is clear, then it points to the only spte * in this rmap chain. Otherwise, (rmap_head->val & ~3) points to a struct * pte_list_desc containing more mappings. */ #define KVM_RMAP_MANY BIT(0) /* * rmaps and PTE lists are mostly protected by mmu_lock (the shadow MMU always * operates with mmu_lock held for write), but rmaps can be walked without * holding mmu_lock so long as the caller can tolerate SPTEs in the rmap chain * being zapped/dropped _while the rmap is locked_. * * Other than the KVM_RMAP_LOCKED flag, modifications to rmap entries must be * done while holding mmu_lock for write. This allows a task walking rmaps * without holding mmu_lock to concurrently walk the same entries as a task * that is holding mmu_lock but _not_ the rmap lock. Neither task will modify * the rmaps, thus the walks are stable. * * As alluded to above, SPTEs in rmaps are _not_ protected by KVM_RMAP_LOCKED, * only the rmap chains themselves are protected. E.g. holding an rmap's lock * ensures all "struct pte_list_desc" fields are stable. */ #define KVM_RMAP_LOCKED BIT(1) static unsigned long __kvm_rmap_lock(struct kvm_rmap_head *rmap_head) { unsigned long old_val, new_val; lockdep_assert_preemption_disabled(); /* * Elide the lock if the rmap is empty, as lockless walkers (read-only * mode) don't need to (and can't) walk an empty rmap, nor can they add * entries to the rmap. I.e. the only paths that process empty rmaps * do so while holding mmu_lock for write, and are mutually exclusive. */ old_val = atomic_long_read(&rmap_head->val); if (!old_val) return 0; do { /* * If the rmap is locked, wait for it to be unlocked before * trying acquire the lock, e.g. to avoid bouncing the cache * line. */ while (old_val & KVM_RMAP_LOCKED) { cpu_relax(); old_val = atomic_long_read(&rmap_head->val); } /* * Recheck for an empty rmap, it may have been purged by the * task that held the lock. */ if (!old_val) return 0; new_val = old_val | KVM_RMAP_LOCKED; /* * Use try_cmpxchg_acquire() to prevent reads and writes to the rmap * from being reordered outside of the critical section created by * __kvm_rmap_lock(). * * Pairs with the atomic_long_set_release() in kvm_rmap_unlock(). * * For the !old_val case, no ordering is needed, as there is no rmap * to walk. */ } while (!atomic_long_try_cmpxchg_acquire(&rmap_head->val, &old_val, new_val)); /* * Return the old value, i.e. _without_ the LOCKED bit set. It's * impossible for the return value to be 0 (see above), i.e. the read- * only unlock flow can't get a false positive and fail to unlock. */ return old_val; } static unsigned long kvm_rmap_lock(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { lockdep_assert_held_write(&kvm->mmu_lock); return __kvm_rmap_lock(rmap_head); } static void __kvm_rmap_unlock(struct kvm_rmap_head *rmap_head, unsigned long val) { KVM_MMU_WARN_ON(val & KVM_RMAP_LOCKED); /* * Ensure that all accesses to the rmap have completed before unlocking * the rmap. * * Pairs with the atomic_long_try_cmpxchg_acquire() in __kvm_rmap_lock(). */ atomic_long_set_release(&rmap_head->val, val); } static void kvm_rmap_unlock(struct kvm *kvm, struct kvm_rmap_head *rmap_head, unsigned long new_val) { lockdep_assert_held_write(&kvm->mmu_lock); __kvm_rmap_unlock(rmap_head, new_val); } static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head) { return atomic_long_read(&rmap_head->val) & ~KVM_RMAP_LOCKED; } /* * If mmu_lock isn't held, rmaps can only be locked in read-only mode. The * actual locking is the same, but the caller is disallowed from modifying the * rmap, and so the unlock flow is a nop if the rmap is/was empty. */ static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head) { unsigned long rmap_val; preempt_disable(); rmap_val = __kvm_rmap_lock(rmap_head); if (!rmap_val) preempt_enable(); return rmap_val; } static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head, unsigned long old_val) { if (!old_val) return; KVM_MMU_WARN_ON(old_val != kvm_rmap_get(rmap_head)); __kvm_rmap_unlock(rmap_head, old_val); preempt_enable(); } /* * Returns the number of pointers in the rmap chain, not counting the new one. */ static int pte_list_add(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, u64 *spte, struct kvm_rmap_head *rmap_head) { unsigned long old_val, new_val; struct pte_list_desc *desc; int count = 0; old_val = kvm_rmap_lock(kvm, rmap_head); if (!old_val) { new_val = (unsigned long)spte; } else if (!(old_val & KVM_RMAP_MANY)) { desc = kvm_mmu_memory_cache_alloc(cache); desc->sptes[0] = (u64 *)old_val; desc->sptes[1] = spte; desc->spte_count = 2; desc->tail_count = 0; new_val = (unsigned long)desc | KVM_RMAP_MANY; ++count; } else { desc = (struct pte_list_desc *)(old_val & ~KVM_RMAP_MANY); count = desc->tail_count + desc->spte_count; /* * If the previous head is full, allocate a new head descriptor * as tail descriptors are always kept full. */ if (desc->spte_count == PTE_LIST_EXT) { desc = kvm_mmu_memory_cache_alloc(cache); desc->more = (struct pte_list_desc *)(old_val & ~KVM_RMAP_MANY); desc->spte_count = 0; desc->tail_count = count; new_val = (unsigned long)desc | KVM_RMAP_MANY; } else { new_val = old_val; } desc->sptes[desc->spte_count++] = spte; } kvm_rmap_unlock(kvm, rmap_head, new_val); return count; } static void pte_list_desc_remove_entry(struct kvm *kvm, unsigned long *rmap_val, struct pte_list_desc *desc, int i) { struct pte_list_desc *head_desc = (struct pte_list_desc *)(*rmap_val & ~KVM_RMAP_MANY); int j = head_desc->spte_count - 1; /* * The head descriptor should never be empty. A new head is added only * when adding an entry and the previous head is full, and heads are * removed (this flow) when they become empty. */ KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm); /* * Replace the to-be-freed SPTE with the last valid entry from the head * descriptor to ensure that tail descriptors are full at all times. * Note, this also means that tail_count is stable for each descriptor. */ desc->sptes[i] = head_desc->sptes[j]; head_desc->sptes[j] = NULL; head_desc->spte_count--; if (head_desc->spte_count) return; /* * The head descriptor is empty. If there are no tail descriptors, * nullify the rmap head to mark the list as empty, else point the rmap * head at the next descriptor, i.e. the new head. */ if (!head_desc->more) *rmap_val = 0; else *rmap_val = (unsigned long)head_desc->more | KVM_RMAP_MANY; mmu_free_pte_list_desc(head_desc); } static void pte_list_remove(struct kvm *kvm, u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; unsigned long rmap_val; int i; rmap_val = kvm_rmap_lock(kvm, rmap_head); if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_val, kvm)) goto out; if (!(rmap_val & KVM_RMAP_MANY)) { if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_val != spte, kvm)) goto out; rmap_val = 0; } else { desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY); while (desc) { for (i = 0; i < desc->spte_count; ++i) { if (desc->sptes[i] == spte) { pte_list_desc_remove_entry(kvm, &rmap_val, desc, i); goto out; } } desc = desc->more; } KVM_BUG_ON_DATA_CORRUPTION(true, kvm); } out: kvm_rmap_unlock(kvm, rmap_head, rmap_val); } static void kvm_zap_one_rmap_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, u64 *sptep) { mmu_spte_clear_track_bits(kvm, sptep); pte_list_remove(kvm, sptep, rmap_head); } /* Return true if at least one SPTE was zapped, false otherwise */ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc, *next; unsigned long rmap_val; int i; rmap_val = kvm_rmap_lock(kvm, rmap_head); if (!rmap_val) return false; if (!(rmap_val & KVM_RMAP_MANY)) { mmu_spte_clear_track_bits(kvm, (u64 *)rmap_val); goto out; } desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY); for (; desc; desc = next) { for (i = 0; i < desc->spte_count; i++) mmu_spte_clear_track_bits(kvm, desc->sptes[i]); next = desc->more; mmu_free_pte_list_desc(desc); } out: /* rmap_head is meaningless now, remember to reset it */ kvm_rmap_unlock(kvm, rmap_head, 0); return true; } unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) { unsigned long rmap_val = kvm_rmap_get(rmap_head); struct pte_list_desc *desc; if (!rmap_val) return 0; else if (!(rmap_val & KVM_RMAP_MANY)) return 1; desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY); return desc->tail_count + desc->spte_count; } static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level, const struct kvm_memory_slot *slot) { unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); return &slot->arch.rmap[level - PG_LEVEL_4K][idx]; } static void rmap_remove(struct kvm *kvm, u64 *spte) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; struct kvm_mmu_page *sp; gfn_t gfn; struct kvm_rmap_head *rmap_head; sp = sptep_to_sp(spte); gfn = kvm_mmu_page_get_gfn(sp, spte_index(spte)); /* * Unlike rmap_add, rmap_remove does not run in the context of a vCPU * so we have to determine which memslots to use based on context * information in sp->role. */ slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); pte_list_remove(kvm, spte, rmap_head); } /* * Used by the following functions to iterate through the sptes linked by a * rmap. All fields are private and not assumed to be used outside. */ struct rmap_iterator { /* private fields */ struct rmap_head *head; struct pte_list_desc *desc; /* holds the sptep if not NULL */ int pos; /* index of the sptep */ }; /* * Iteration must be started by this function. This should also be used after * removing/dropping sptes from the rmap link because in such cases the * information in the iterator may not be valid. * * Returns sptep if found, NULL otherwise. */ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, struct rmap_iterator *iter) { unsigned long rmap_val = kvm_rmap_get(rmap_head); if (!rmap_val) return NULL; if (!(rmap_val & KVM_RMAP_MANY)) { iter->desc = NULL; return (u64 *)rmap_val; } iter->desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY); iter->pos = 0; return iter->desc->sptes[iter->pos]; } /* * Must be used with a valid iterator: e.g. after rmap_get_first(). * * Returns sptep if found, NULL otherwise. */ static u64 *rmap_get_next(struct rmap_iterator *iter) { if (iter->desc) { if (iter->pos < PTE_LIST_EXT - 1) { ++iter->pos; if (iter->desc->sptes[iter->pos]) return iter->desc->sptes[iter->pos]; } iter->desc = iter->desc->more; if (iter->desc) { iter->pos = 0; /* desc->sptes[0] cannot be NULL */ return iter->desc->sptes[iter->pos]; } } return NULL; } #define __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \ for (_sptep_ = rmap_get_first(_rmap_head_, _iter_); \ _sptep_; _sptep_ = rmap_get_next(_iter_)) #define for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \ __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \ if (!WARN_ON_ONCE(!is_shadow_present_pte(*(_sptep_)))) \ #define for_each_rmap_spte_lockless(_rmap_head_, _iter_, _sptep_, _spte_) \ __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \ if (is_shadow_present_pte(_spte_ = mmu_spte_get_lockless(sptep))) static void drop_spte(struct kvm *kvm, u64 *sptep) { u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep); if (is_shadow_present_pte(old_spte)) rmap_remove(kvm, sptep); } static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) { struct kvm_mmu_page *sp; sp = sptep_to_sp(sptep); WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K); drop_spte(kvm, sptep); if (flush) kvm_flush_remote_tlbs_sptep(kvm, sptep); } /* * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. * * Note: write protection is difference between dirty logging and spte * protection: * - for dirty logging, the spte can be set to writable at anytime if * its dirty bitmap is properly set. * - for spte protection, the spte can be writable only after unsync-ing * shadow page. * * Return true if tlb need be flushed. */ static bool spte_write_protect(u64 *sptep, bool pt_protect) { u64 spte = *sptep; if (!is_writable_pte(spte) && !(pt_protect && is_mmu_writable_spte(spte))) return false; if (pt_protect) spte &= ~shadow_mmu_writable_mask; spte = spte & ~PT_WRITABLE_MASK; return mmu_spte_update(sptep, spte); } static bool rmap_write_protect(struct kvm_rmap_head *rmap_head, bool pt_protect) { u64 *sptep; struct rmap_iterator iter; bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_write_protect(sptep, pt_protect); return flush; } static bool spte_clear_dirty(u64 *sptep) { u64 spte = *sptep; KVM_MMU_WARN_ON(!spte_ad_enabled(spte)); spte &= ~shadow_dirty_mask; return mmu_spte_update(sptep, spte); } /* * Gets the GFN ready for another round of dirty logging by clearing the * - D bit on ad-enabled SPTEs, and * - W bit on ad-disabled SPTEs. * Returns true iff any D or W bits were cleared. */ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { u64 *sptep; struct rmap_iterator iter; bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) { if (spte_ad_need_write_protect(*sptep)) flush |= test_and_clear_bit(PT_WRITABLE_SHIFT, (unsigned long *)sptep); else flush |= spte_clear_dirty(sptep); } return flush; } static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { struct kvm_rmap_head *rmap_head; if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, true); if (!kvm_memslots_have_rmaps(kvm)) return; while (mask) { rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), PG_LEVEL_4K, slot); rmap_write_protect(rmap_head, false); /* clear the first set bit */ mask &= mask - 1; } } static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { struct kvm_rmap_head *rmap_head; if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, false); if (!kvm_memslots_have_rmaps(kvm)) return; while (mask) { rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), PG_LEVEL_4K, slot); __rmap_clear_dirty(kvm, rmap_head, slot); /* clear the first set bit */ mask &= mask - 1; } } void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { /* * If the slot was assumed to be "initially all dirty", write-protect * huge pages to ensure they are split to 4KiB on the first write (KVM * dirty logs at 4KiB granularity). If eager page splitting is enabled, * immediately try to split huge pages, e.g. so that vCPUs don't get * saddled with the cost of splitting. * * The gfn_offset is guaranteed to be aligned to 64, but the base_gfn * of memslot has no such restriction, so the range can cross two large * pages. */ if (kvm_dirty_log_manual_protect_and_init_set(kvm)) { gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask); gfn_t end = slot->base_gfn + gfn_offset + __fls(mask); if (READ_ONCE(eager_page_split)) kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K); kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M); /* Cross two large pages? */ if (ALIGN(start << PAGE_SHIFT, PMD_SIZE) != ALIGN(end << PAGE_SHIFT, PMD_SIZE)) kvm_mmu_slot_gfn_write_protect(kvm, slot, end, PG_LEVEL_2M); } /* * (Re)Enable dirty logging for all 4KiB SPTEs that map the GFNs in * mask. If PML is enabled and the GFN doesn't need to be write- * protected for other reasons, e.g. shadow paging, clear the Dirty bit. * Otherwise clear the Writable bit. * * Note that kvm_mmu_clear_dirty_pt_masked() is called whenever PML is * enabled but it chooses between clearing the Dirty bit and Writeable * bit based on the context. */ if (kvm->arch.cpu_dirty_log_size) kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask); else kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } int kvm_cpu_dirty_log_size(struct kvm *kvm) { return kvm->arch.cpu_dirty_log_size; } bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level) { struct kvm_rmap_head *rmap_head; int i; bool write_protected = false; if (kvm_memslots_have_rmaps(kvm)) { for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { rmap_head = gfn_to_rmap(gfn, i, slot); write_protected |= rmap_write_protect(rmap_head, true); } } if (tdp_mmu_enabled) write_protected |= kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level); return write_protected; } static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K); } static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { return kvm_zap_all_rmap_sptes(kvm, rmap_head); } struct slot_rmap_walk_iterator { /* input fields. */ const struct kvm_memory_slot *slot; gfn_t start_gfn; gfn_t end_gfn; int start_level; int end_level; /* output fields. */ gfn_t gfn; struct kvm_rmap_head *rmap; int level; /* private field. */ struct kvm_rmap_head *end_rmap; }; static void rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level) { iterator->level = level; iterator->gfn = iterator->start_gfn; iterator->rmap = gfn_to_rmap(iterator->gfn, level, iterator->slot); iterator->end_rmap = gfn_to_rmap(iterator->end_gfn, level, iterator->slot); } static void slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator, const struct kvm_memory_slot *slot, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn) { iterator->slot = slot; iterator->start_level = start_level; iterator->end_level = end_level; iterator->start_gfn = start_gfn; iterator->end_gfn = end_gfn; rmap_walk_init_level(iterator, iterator->start_level); } static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator) { return !!iterator->rmap; } static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) { while (++iterator->rmap <= iterator->end_rmap) { iterator->gfn += KVM_PAGES_PER_HPAGE(iterator->level); if (atomic_long_read(&iterator->rmap->val)) return; } if (++iterator->level > iterator->end_level) { iterator->rmap = NULL; return; } rmap_walk_init_level(iterator, iterator->level); } #define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_, \ _start_gfn, _end_gfn, _iter_) \ for (slot_rmap_walk_init(_iter_, _slot_, _start_level_, \ _end_level_, _start_gfn, _end_gfn); \ slot_rmap_walk_okay(_iter_); \ slot_rmap_walk_next(_iter_)) /* The return value indicates if tlb flush on all vcpus is needed. */ typedef bool (*slot_rmaps_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot); static __always_inline bool __walk_slot_rmaps(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool can_yield, bool flush_on_yield, bool flush) { struct slot_rmap_walk_iterator iterator; lockdep_assert_held_write(&kvm->mmu_lock); for_each_slot_rmap_range(slot, start_level, end_level, start_gfn, end_gfn, &iterator) { if (iterator.rmap) flush |= fn(kvm, iterator.rmap, slot); if (!can_yield) continue; if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { if (flush && flush_on_yield) { kvm_flush_remote_tlbs_range(kvm, start_gfn, iterator.gfn - start_gfn + 1); flush = false; } cond_resched_rwlock_write(&kvm->mmu_lock); } } return flush; } static __always_inline bool walk_slot_rmaps(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, int start_level, int end_level, bool flush_on_yield) { return __walk_slot_rmaps(kvm, slot, fn, start_level, end_level, slot->base_gfn, slot->base_gfn + slot->npages - 1, true, flush_on_yield, false); } static __always_inline bool walk_slot_rmaps_4k(struct kvm *kvm, const struct kvm_memory_slot *slot, slot_rmaps_handler fn, bool flush_on_yield) { return walk_slot_rmaps(kvm, slot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); } static bool __kvm_rmap_zap_gfn_range(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, bool can_yield, bool flush) { return __walk_slot_rmaps(kvm, slot, kvm_zap_rmap, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, start, end - 1, can_yield, true, flush); } bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { bool flush = false; /* * To prevent races with vCPUs faulting in a gfn using stale data, * zapping a gfn range must be protected by mmu_invalidate_in_progress * (and mmu_invalidate_seq). The only exception is memslot deletion; * in that case, SRCU synchronization ensures that SPTEs are zapped * after all vCPUs have unlocked SRCU, guaranteeing that vCPUs see the * invalid slot. */ lockdep_assert_once(kvm->mmu_invalidate_in_progress || lockdep_is_held(&kvm->slots_lock)); if (kvm_memslots_have_rmaps(kvm)) flush = __kvm_rmap_zap_gfn_range(kvm, range->slot, range->start, range->end, range->may_block, flush); if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); if (kvm_x86_ops.set_apic_access_page_addr && range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); return flush; } #define RMAP_RECYCLE_THRESHOLD 1000 static void __rmap_add(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access) { struct kvm_mmu_page *sp; struct kvm_rmap_head *rmap_head; int rmap_count; sp = sptep_to_sp(spte); kvm_mmu_page_set_translation(sp, spte_index(spte), gfn, access); kvm_update_page_stats(kvm, sp->role.level, 1); rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); rmap_count = pte_list_add(kvm, cache, spte, rmap_head); if (rmap_count > kvm->stat.max_mmu_rmap_size) kvm->stat.max_mmu_rmap_size = rmap_count; if (rmap_count > RMAP_RECYCLE_THRESHOLD) { kvm_zap_all_rmap_sptes(kvm, rmap_head); kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); } } static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, u64 *spte, gfn_t gfn, unsigned int access) { struct kvm_mmu_memory_cache *cache = &vcpu->arch.mmu_pte_list_desc_cache; __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access); } static bool kvm_rmap_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool test_only) { struct kvm_rmap_head *rmap_head; struct rmap_iterator iter; unsigned long rmap_val; bool young = false; u64 *sptep; gfn_t gfn; int level; u64 spte; for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { for (gfn = range->start; gfn < range->end; gfn += KVM_PAGES_PER_HPAGE(level)) { rmap_head = gfn_to_rmap(gfn, level, range->slot); rmap_val = kvm_rmap_lock_readonly(rmap_head); for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) { if (!is_accessed_spte(spte)) continue; if (test_only) { kvm_rmap_unlock_readonly(rmap_head, rmap_val); return true; } if (spte_ad_enabled(spte)) clear_bit((ffs(shadow_accessed_mask) - 1), (unsigned long *)sptep); else /* * If the following cmpxchg fails, the * spte is being concurrently modified * and should most likely stay young. */ cmpxchg64(sptep, spte, mark_spte_for_access_track(spte)); young = true; } kvm_rmap_unlock_readonly(rmap_head, rmap_val); } } return young; } static bool kvm_may_have_shadow_mmu_sptes(struct kvm *kvm) { return !tdp_mmu_enabled || READ_ONCE(kvm->arch.indirect_shadow_pages); } bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; if (tdp_mmu_enabled) young = kvm_tdp_mmu_age_gfn_range(kvm, range); if (kvm_may_have_shadow_mmu_sptes(kvm)) young |= kvm_rmap_age_gfn_range(kvm, range, false); return young; } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; if (tdp_mmu_enabled) young = kvm_tdp_mmu_test_age_gfn(kvm, range); if (young) return young; if (kvm_may_have_shadow_mmu_sptes(kvm)) young |= kvm_rmap_age_gfn_range(kvm, range, true); return young; } static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp) { #ifdef CONFIG_KVM_PROVE_MMU int i; for (i = 0; i < SPTE_ENT_PER_PAGE; i++) { if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i]))) pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free", sp->spt[i], &sp->spt[i], kvm_mmu_page_get_gfn(sp, i)); } #endif } static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm->arch.n_used_mmu_pages++; kvm_account_pgtable_pages((void *)sp->spt, +1); } static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm->arch.n_used_mmu_pages--; kvm_account_pgtable_pages((void *)sp->spt, -1); } static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp) { kvm_mmu_check_sptes_at_free(sp); hlist_del(&sp->hash_link); list_del(&sp->link); free_page((unsigned long)sp->spt); free_page((unsigned long)sp->shadowed_translation); kmem_cache_free(mmu_page_header_cache, sp); } static unsigned kvm_page_table_hashfn(gfn_t gfn) { return hash_64(gfn, KVM_MMU_HASH_SHIFT); } static void mmu_page_add_parent_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, struct kvm_mmu_page *sp, u64 *parent_pte) { if (!parent_pte) return; pte_list_add(kvm, cache, parent_pte, &sp->parent_ptes); } static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte) { pte_list_remove(kvm, parent_pte, &sp->parent_ptes); } static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *parent_pte) { mmu_page_remove_parent_pte(kvm, sp, parent_pte); mmu_spte_clear_no_track(parent_pte); } static void mark_unsync(u64 *spte); static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { u64 *sptep; struct rmap_iterator iter; for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { mark_unsync(sptep); } } static void mark_unsync(u64 *spte) { struct kvm_mmu_page *sp; sp = sptep_to_sp(spte); if (__test_and_set_bit(spte_index(spte), sp->unsync_child_bitmap)) return; if (sp->unsync_children++) return; kvm_mmu_mark_parents_unsync(sp); } #define KVM_PAGE_ARRAY_NR 16 struct kvm_mmu_pages { struct mmu_page_and_offset { struct kvm_mmu_page *sp; unsigned int idx; } page[KVM_PAGE_ARRAY_NR]; unsigned int nr; }; static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, int idx) { int i; if (sp->unsync) for (i=0; i < pvec->nr; i++) if (pvec->page[i].sp == sp) return 0; pvec->page[pvec->nr].sp = sp; pvec->page[pvec->nr].idx = idx; pvec->nr++; return (pvec->nr == KVM_PAGE_ARRAY_NR); } static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx) { --sp->unsync_children; WARN_ON_ONCE((int)sp->unsync_children < 0); __clear_bit(idx, sp->unsync_child_bitmap); } static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { int i, ret, nr_unsync_leaf = 0; for_each_set_bit(i, sp->unsync_child_bitmap, 512) { struct kvm_mmu_page *child; u64 ent = sp->spt[i]; if (!is_shadow_present_pte(ent) || is_large_pte(ent)) { clear_unsync_child_bit(sp, i); continue; } child = spte_to_child_sp(ent); if (child->unsync_children) { if (mmu_pages_add(pvec, child, i)) return -ENOSPC; ret = __mmu_unsync_walk(child, pvec); if (!ret) { clear_unsync_child_bit(sp, i); continue; } else if (ret > 0) { nr_unsync_leaf += ret; } else return ret; } else if (child->unsync) { nr_unsync_leaf++; if (mmu_pages_add(pvec, child, i)) return -ENOSPC; } else clear_unsync_child_bit(sp, i); } return nr_unsync_leaf; } #define INVALID_INDEX (-1) static int mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { pvec->nr = 0; if (!sp->unsync_children) return 0; mmu_pages_add(pvec, sp, INVALID_INDEX); return __mmu_unsync_walk(sp, pvec); } static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON_ONCE(!sp->unsync); trace_kvm_mmu_sync_page(sp); sp->unsync = 0; --kvm->stat.mmu_unsync; } static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list); static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); static bool sp_has_gptes(struct kvm_mmu_page *sp) { if (sp->role.direct) return false; if (sp->role.passthrough) return false; return true; } static __ro_after_init HLIST_HEAD(empty_page_hash); static struct hlist_head *kvm_get_mmu_page_hash(struct kvm *kvm, gfn_t gfn) { /* * Ensure the load of the hash table pointer itself is ordered before * loads to walk the table. The pointer is set at runtime outside of * mmu_lock when the TDP MMU is enabled, i.e. when the hash table of * shadow pages becomes necessary only when KVM needs to shadow L1's * TDP for an L2 guest. Pairs with the smp_store_release() in * kvm_mmu_alloc_page_hash(). */ struct hlist_head *page_hash = smp_load_acquire(&kvm->arch.mmu_page_hash); lockdep_assert_held(&kvm->mmu_lock); if (!page_hash) return &empty_page_hash; return &page_hash[kvm_page_table_hashfn(gfn)]; } #define for_each_valid_sp(_kvm, _sp, _list) \ hlist_for_each_entry(_sp, _list, hash_link) \ if (is_obsolete_sp((_kvm), (_sp))) { \ } else #define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \ for_each_valid_sp(_kvm, _sp, kvm_get_mmu_page_hash(_kvm, _gfn)) \ if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { union kvm_mmu_page_role root_role = vcpu->arch.mmu->root_role; /* * Ignore various flags when verifying that it's safe to sync a shadow * page using the current MMU context. * * - level: not part of the overall MMU role and will never match as the MMU's * level tracks the root level * - access: updated based on the new guest PTE * - quadrant: not part of the overall MMU role (similar to level) */ const union kvm_mmu_page_role sync_role_ign = { .level = 0xf, .access = 0x7, .quadrant = 0x3, .passthrough = 0x1, }; /* * Direct pages can never be unsync, and KVM should never attempt to * sync a shadow page for a different MMU context, e.g. if the role * differs then the memslot lookup (SMM vs. non-SMM) will be bogus, the * reserved bits checks will be wrong, etc... */ if (WARN_ON_ONCE(sp->role.direct || !vcpu->arch.mmu->sync_spte || (sp->role.word ^ root_role.word) & ~sync_role_ign.word)) return false; return true; } static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i) { /* sp->spt[i] has initial value of shadow page table allocation */ if (sp->spt[i] == SHADOW_NONPRESENT_VALUE) return 0; return vcpu->arch.mmu->sync_spte(vcpu, sp, i); } static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { int flush = 0; int i; if (!kvm_sync_page_check(vcpu, sp)) return -1; for (i = 0; i < SPTE_ENT_PER_PAGE; i++) { int ret = kvm_sync_spte(vcpu, sp, i); if (ret < -1) return -1; flush |= ret; } /* * Note, any flush is purely for KVM's correctness, e.g. when dropping * an existing SPTE or clearing W/A/D bits to ensure an mmu_notifier * unmap or dirty logging event doesn't fail to flush. The guest is * responsible for flushing the TLB to ensure any changes in protection * bits are recognized, i.e. until the guest flushes or page faults on * a relevant address, KVM is architecturally allowed to let vCPUs use * cached translations with the old protection bits. */ return flush; } static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int ret = __kvm_sync_page(vcpu, sp); if (ret < 0) kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return ret; } static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, struct list_head *invalid_list, bool remote_flush) { if (!remote_flush && list_empty(invalid_list)) return false; if (!list_empty(invalid_list)) kvm_mmu_commit_zap_page(kvm, invalid_list); else kvm_flush_remote_tlbs(kvm); return true; } static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { if (sp->role.invalid) return true; /* TDP MMU pages do not use the MMU generation. */ return !is_tdp_mmu_page(sp) && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } struct mmu_page_path { struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL]; unsigned int idx[PT64_ROOT_MAX_LEVEL]; }; #define for_each_sp(pvec, sp, parents, i) \ for (i = mmu_pages_first(&pvec, &parents); \ i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ i = mmu_pages_next(&pvec, &parents, i)) static int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, int i) { int n; for (n = i+1; n < pvec->nr; n++) { struct kvm_mmu_page *sp = pvec->page[n].sp; unsigned idx = pvec->page[n].idx; int level = sp->role.level; parents->idx[level-1] = idx; if (level == PG_LEVEL_4K) break; parents->parent[level-2] = sp; } return n; } static int mmu_pages_first(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents) { struct kvm_mmu_page *sp; int level; if (pvec->nr == 0) return 0; WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX); sp = pvec->page[0].sp; level = sp->role.level; WARN_ON_ONCE(level == PG_LEVEL_4K); parents->parent[level-2] = sp; /* Also set up a sentinel. Further entries in pvec are all * children of sp, so this element is never overwritten. */ parents->parent[level-1] = NULL; return mmu_pages_next(pvec, parents, 0); } static void mmu_pages_clear_parents(struct mmu_page_path *parents) { struct kvm_mmu_page *sp; unsigned int level = 0; do { unsigned int idx = parents->idx[level]; sp = parents->parent[level]; if (!sp) return; WARN_ON_ONCE(idx == INVALID_INDEX); clear_unsync_child_bit(sp, idx); level++; } while (!sp->unsync_children); } static int mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *parent, bool can_yield) { int i; struct kvm_mmu_page *sp; struct mmu_page_path parents; struct kvm_mmu_pages pages; LIST_HEAD(invalid_list); bool flush = false; while (mmu_unsync_walk(parent, &pages)) { bool protected = false; for_each_sp(pages, sp, parents, i) protected |= kvm_vcpu_write_protect_gfn(vcpu, sp->gfn); if (protected) { kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, true); flush = false; } for_each_sp(pages, sp, parents, i) { kvm_unlink_unsync_page(vcpu->kvm, sp); flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0; mmu_pages_clear_parents(&parents); } if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush); if (!can_yield) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); return -EINTR; } cond_resched_rwlock_write(&vcpu->kvm->mmu_lock); flush = false; } } kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush); return 0; } static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) { atomic_set(&sp->write_flooding_count, 0); } static void clear_sp_write_flooding_count(u64 *spte) { __clear_sp_write_flooding_count(sptep_to_sp(spte)); } /* * The vCPU is required when finding indirect shadow pages; the shadow * page may already exist and syncing it needs the vCPU pointer in * order to read guest page tables. Direct shadow pages are never * unsync, thus @vcpu can be NULL if @role.direct is true. */ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm, struct kvm_vcpu *vcpu, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; int ret; int collisions = 0; LIST_HEAD(invalid_list); for_each_valid_sp(kvm, sp, sp_list) { if (sp->gfn != gfn) { collisions++; continue; } if (sp->role.word != role.word) { /* * If the guest is creating an upper-level page, zap * unsync pages for the same gfn. While it's possible * the guest is using recursive page tables, in all * likelihood the guest has stopped using the unsync * page and is installing a completely unrelated page. * Unsync pages must not be left as is, because the new * upper-level page will be write-protected. */ if (role.level > PG_LEVEL_4K && sp->unsync) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); continue; } /* unsync and write-flooding only apply to indirect SPs. */ if (sp->role.direct) goto out; if (sp->unsync) { if (KVM_BUG_ON(!vcpu, kvm)) break; /* * The page is good, but is stale. kvm_sync_page does * get the latest guest state, but (unlike mmu_unsync_children) * it doesn't write-protect the page or mark it synchronized! * This way the validity of the mapping is ensured, but the * overhead of write protection is not incurred until the * guest invalidates the TLB mapping. This allows multiple * SPs for a single gfn to be unsync. * * If the sync fails, the page is zapped. If so, break * in order to rebuild it. */ ret = kvm_sync_page(vcpu, sp, &invalid_list); if (ret < 0) break; WARN_ON_ONCE(!list_empty(&invalid_list)); if (ret > 0) kvm_flush_remote_tlbs(kvm); } __clear_sp_write_flooding_count(sp); goto out; } sp = NULL; ++kvm->stat.mmu_cache_miss; out: kvm_mmu_commit_zap_page(kvm, &invalid_list); if (collisions > kvm->stat.max_mmu_page_hash_collisions) kvm->stat.max_mmu_page_hash_collisions = collisions; return sp; } /* Caches used when allocating a new shadow page. */ struct shadow_page_caches { struct kvm_mmu_memory_cache *page_header_cache; struct kvm_mmu_memory_cache *shadow_page_cache; struct kvm_mmu_memory_cache *shadowed_info_cache; }; static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, struct shadow_page_caches *caches, gfn_t gfn, struct hlist_head *sp_list, union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache); sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache); if (!role.direct && role.level <= KVM_MAX_HUGEPAGE_LEVEL) sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); /* * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages() * depends on valid pages being added to the head of the list. See * comments in kvm_zap_obsolete_pages(). */ sp->mmu_valid_gen = kvm->arch.mmu_valid_gen; list_add(&sp->link, &kvm->arch.active_mmu_pages); kvm_account_mmu_page(kvm, sp); sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, sp_list); if (sp_has_gptes(sp)) account_shadowed(kvm, sp); return sp; } /* Note, @vcpu may be NULL if @role.direct is true; see kvm_mmu_find_shadow_page. */ static struct kvm_mmu_page *__kvm_mmu_get_shadow_page(struct kvm *kvm, struct kvm_vcpu *vcpu, struct shadow_page_caches *caches, gfn_t gfn, union kvm_mmu_page_role role) { struct hlist_head *sp_list; struct kvm_mmu_page *sp; bool created = false; /* * No need for memory barriers, unlike in kvm_get_mmu_page_hash(), as * mmu_page_hash must be set prior to creating the first shadow root, * i.e. reaching this point is fully serialized by slots_arch_lock. */ BUG_ON(!kvm->arch.mmu_page_hash); sp_list = &kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; sp = kvm_mmu_find_shadow_page(kvm, vcpu, gfn, sp_list, role); if (!sp) { created = true; sp = kvm_mmu_alloc_shadow_page(kvm, caches, gfn, sp_list, role); } trace_kvm_mmu_get_page(sp, created); return sp; } static struct kvm_mmu_page *kvm_mmu_get_shadow_page(struct kvm_vcpu *vcpu, gfn_t gfn, union kvm_mmu_page_role role) { struct shadow_page_caches caches = { .page_header_cache = &vcpu->arch.mmu_page_header_cache, .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache, .shadowed_info_cache = &vcpu->arch.mmu_shadowed_info_cache, }; return __kvm_mmu_get_shadow_page(vcpu->kvm, vcpu, &caches, gfn, role); } static union kvm_mmu_page_role kvm_mmu_child_role(u64 *sptep, bool direct, unsigned int access) { struct kvm_mmu_page *parent_sp = sptep_to_sp(sptep); union kvm_mmu_page_role role; role = parent_sp->role; role.level--; role.access = access; role.direct = direct; role.passthrough = 0; /* * If the guest has 4-byte PTEs then that means it's using 32-bit, * 2-level, non-PAE paging. KVM shadows such guests with PAE paging * (i.e. 8-byte PTEs). The difference in PTE size means that KVM must * shadow each guest page table with multiple shadow page tables, which * requires extra bookkeeping in the role. * * Specifically, to shadow the guest's page directory (which covers a * 4GiB address space), KVM uses 4 PAE page directories, each mapping * 1GiB of the address space. @role.quadrant encodes which quarter of * the address space each maps. * * To shadow the guest's page tables (which each map a 4MiB region), KVM * uses 2 PAE page tables, each mapping a 2MiB region. For these, * @role.quadrant encodes which half of the region they map. * * Concretely, a 4-byte PDE consumes bits 31:22, while an 8-byte PDE * consumes bits 29:21. To consume bits 31:30, KVM's uses 4 shadow * PDPTEs; those 4 PAE page directories are pre-allocated and their * quadrant is assigned in mmu_alloc_root(). A 4-byte PTE consumes * bits 21:12, while an 8-byte PTE consumes bits 20:12. To consume * bit 21 in the PTE (the child here), KVM propagates that bit to the * quadrant, i.e. sets quadrant to '0' or '1'. The parent 8-byte PDE * covers bit 21 (see above), thus the quadrant is calculated from the * _least_ significant bit of the PDE index. */ if (role.has_4_byte_gpte) { WARN_ON_ONCE(role.level != PG_LEVEL_4K); role.quadrant = spte_index(sptep) & 1; } return role; } static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, bool direct, unsigned int access) { union kvm_mmu_page_role role; if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) return ERR_PTR(-EEXIST); role = kvm_mmu_child_role(sptep, direct, access); return kvm_mmu_get_shadow_page(vcpu, gfn, role); } static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, hpa_t root, u64 addr) { iterator->addr = addr; iterator->shadow_addr = root; iterator->level = vcpu->arch.mmu->root_role.level; if (iterator->level >= PT64_ROOT_4LEVEL && vcpu->arch.mmu->cpu_role.base.level < PT64_ROOT_4LEVEL && !vcpu->arch.mmu->root_role.direct) iterator->level = PT32E_ROOT_LEVEL; if (iterator->level == PT32E_ROOT_LEVEL) { /* * prev_root is currently only used for 64-bit hosts. So only * the active root_hpa is valid here. */ BUG_ON(root != vcpu->arch.mmu->root.hpa); iterator->shadow_addr = vcpu->arch.mmu->pae_root[(addr >> 30) & 3]; iterator->shadow_addr &= SPTE_BASE_ADDR_MASK; --iterator->level; if (!iterator->shadow_addr) iterator->level = 0; } } static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, u64 addr) { shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root.hpa, addr); } static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) { if (iterator->level < PG_LEVEL_4K) return false; iterator->index = SPTE_INDEX(iterator->addr, iterator->level); iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; return true; } static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator, u64 spte) { if (!is_shadow_present_pte(spte) || is_last_spte(spte, iterator->level)) { iterator->level = 0; return; } iterator->shadow_addr = spte & SPTE_BASE_ADDR_MASK; --iterator->level; } static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) { __shadow_walk_next(iterator, *iterator->sptep); } static void __link_shadow_page(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, u64 *sptep, struct kvm_mmu_page *sp, bool flush) { u64 spte; BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); /* * If an SPTE is present already, it must be a leaf and therefore * a large one. Drop it, and flush the TLB if needed, before * installing sp. */ if (is_shadow_present_pte(*sptep)) drop_large_spte(kvm, sptep, flush); spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp)); mmu_spte_set(sptep, spte); mmu_page_add_parent_pte(kvm, cache, sp, sptep); /* * The non-direct sub-pagetable must be updated before linking. For * L1 sp, the pagetable is updated via kvm_sync_page() in * kvm_mmu_find_shadow_page() without write-protecting the gfn, * so sp->unsync can be true or false. For higher level non-direct * sp, the pagetable is updated/synced via mmu_sync_children() in * FNAME(fetch)(), so sp->unsync_children can only be false. * WARN_ON_ONCE() if anything happens unexpectedly. */ if (WARN_ON_ONCE(sp->unsync_children) || sp->unsync) mark_unsync(sptep); } static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, struct kvm_mmu_page *sp) { __link_shadow_page(vcpu->kvm, &vcpu->arch.mmu_pte_list_desc_cache, sptep, sp, true); } static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned direct_access) { if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) { struct kvm_mmu_page *child; /* * For the direct sp, if the guest pte's dirty bit * changed form clean to dirty, it will corrupt the * sp's access: allow writable in the read-only sp, * so we should update the spte at this point to get * a new sp with the correct access. */ child = spte_to_child_sp(*sptep); if (child->role.access == direct_access) return; drop_parent_pte(vcpu->kvm, child, sptep); kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep); } } /* Returns the number of zapped non-leaf child shadow pages. */ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *spte, struct list_head *invalid_list) { u64 pte; struct kvm_mmu_page *child; pte = *spte; if (is_shadow_present_pte(pte)) { if (is_last_spte(pte, sp->role.level)) { drop_spte(kvm, spte); } else { child = spte_to_child_sp(pte); drop_parent_pte(kvm, child, spte); /* * Recursively zap nested TDP SPs, parentless SPs are * unlikely to be used again in the near future. This * avoids retaining a large number of stale nested SPs. */ if (tdp_enabled && invalid_list && child->role.guest_mode && !atomic_long_read(&child->parent_ptes.val)) return kvm_mmu_prepare_zap_page(kvm, child, invalid_list); } } else if (is_mmio_spte(kvm, pte)) { mmu_spte_clear_no_track(spte); } return 0; } static int kvm_mmu_page_unlink_children(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int zapped = 0; unsigned i; for (i = 0; i < SPTE_ENT_PER_PAGE; ++i) zapped += mmu_page_zap_pte(kvm, sp, sp->spt + i, invalid_list); return zapped; } static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) { u64 *sptep; struct rmap_iterator iter; while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) drop_parent_pte(kvm, sp, sptep); } static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *parent, struct list_head *invalid_list) { int i, zapped = 0; struct mmu_page_path parents; struct kvm_mmu_pages pages; if (parent->role.level == PG_LEVEL_4K) return 0; while (mmu_unsync_walk(parent, &pages)) { struct kvm_mmu_page *sp; for_each_sp(pages, sp, parents, i) { kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); mmu_pages_clear_parents(&parents); zapped++; } } return zapped; } static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list, int *nr_zapped) { bool list_unstable, zapped_root = false; lockdep_assert_held_write(&kvm->mmu_lock); trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list); kvm_mmu_unlink_parents(kvm, sp); /* Zapping children means active_mmu_pages has become unstable. */ list_unstable = *nr_zapped; if (!sp->role.invalid && sp_has_gptes(sp)) unaccount_shadowed(kvm, sp); if (sp->unsync) kvm_unlink_unsync_page(kvm, sp); if (!sp->root_count) { /* Count self */ (*nr_zapped)++; /* * Already invalid pages (previously active roots) are not on * the active page list. See list_del() in the "else" case of * !sp->root_count. */ if (sp->role.invalid) list_add(&sp->link, invalid_list); else list_move(&sp->link, invalid_list); kvm_unaccount_mmu_page(kvm, sp); } else { /* * Remove the active root from the active page list, the root * will be explicitly freed when the root_count hits zero. */ list_del(&sp->link); /* * Obsolete pages cannot be used on any vCPUs, see the comment * in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also * treats invalid shadow pages as being obsolete. */ zapped_root = !is_obsolete_sp(kvm, sp); } if (sp->nx_huge_page_disallowed) unaccount_nx_huge_page(kvm, sp); sp->role.invalid = 1; /* * Make the request to free obsolete roots after marking the root * invalid, otherwise other vCPUs may not see it as invalid. */ if (zapped_root) kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS); return list_unstable; } static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int nr_zapped; __kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped); return nr_zapped; } static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list) { struct kvm_mmu_page *sp, *nsp; if (list_empty(invalid_list)) return; /* * We need to make sure everyone sees our modifications to * the page tables and see changes to vcpu->mode here. The barrier * in the kvm_flush_remote_tlbs() achieves this. This pairs * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end. * * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit * guest mode and/or lockless shadow page table walks. */ kvm_flush_remote_tlbs(kvm); list_for_each_entry_safe(sp, nsp, invalid_list, link) { WARN_ON_ONCE(!sp->role.invalid || sp->root_count); kvm_mmu_free_shadow_page(sp); } } static unsigned long kvm_mmu_zap_oldest_mmu_pages(struct kvm *kvm, unsigned long nr_to_zap) { unsigned long total_zapped = 0; struct kvm_mmu_page *sp, *tmp; LIST_HEAD(invalid_list); bool unstable; int nr_zapped; if (list_empty(&kvm->arch.active_mmu_pages)) return 0; restart: list_for_each_entry_safe_reverse(sp, tmp, &kvm->arch.active_mmu_pages, link) { /* * Don't zap active root pages, the page itself can't be freed * and zapping it will just force vCPUs to realloc and reload. */ if (sp->root_count) continue; unstable = __kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &nr_zapped); total_zapped += nr_zapped; if (total_zapped >= nr_to_zap) break; if (unstable) goto restart; } kvm_mmu_commit_zap_page(kvm, &invalid_list); kvm->stat.mmu_recycled += total_zapped; return total_zapped; } static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm) { if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) return kvm->arch.n_max_mmu_pages - kvm->arch.n_used_mmu_pages; return 0; } static int make_mmu_pages_available(struct kvm_vcpu *vcpu) { unsigned long avail = kvm_mmu_available_pages(vcpu->kvm); if (likely(avail >= KVM_MIN_FREE_MMU_PAGES)) return 0; kvm_mmu_zap_oldest_mmu_pages(vcpu->kvm, KVM_REFILL_PAGES - avail); /* * Note, this check is intentionally soft, it only guarantees that one * page is available, while the caller may end up allocating as many as * four pages, e.g. for PAE roots or for 5-level paging. Temporarily * exceeding the (arbitrary by default) limit will not harm the host, * being too aggressive may unnecessarily kill the guest, and getting an * exact count is far more trouble than it's worth, especially in the * page fault paths. */ if (!kvm_mmu_available_pages(vcpu->kvm)) return -ENOSPC; return 0; } /* * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock */ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) { write_lock(&kvm->mmu_lock); if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages - goal_nr_mmu_pages); goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; } kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; write_unlock(&kvm->mmu_lock); } bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool always_retry) { struct kvm *kvm = vcpu->kvm; LIST_HEAD(invalid_list); struct kvm_mmu_page *sp; gpa_t gpa = cr2_or_gpa; bool r = false; /* * Bail early if there aren't any write-protected shadow pages to avoid * unnecessarily taking mmu_lock lock, e.g. if the gfn is write-tracked * by a third party. Reading indirect_shadow_pages without holding * mmu_lock is safe, as this is purely an optimization, i.e. a false * positive is benign, and a false negative will simply result in KVM * skipping the unprotect+retry path, which is also an optimization. */ if (!READ_ONCE(kvm->arch.indirect_shadow_pages)) goto out; if (!vcpu->arch.mmu->root_role.direct) { gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); if (gpa == INVALID_GPA) goto out; } write_lock(&kvm->mmu_lock); for_each_gfn_valid_sp_with_gptes(kvm, sp, gpa_to_gfn(gpa)) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); /* * Snapshot the result before zapping, as zapping will remove all list * entries, i.e. checking the list later would yield a false negative. */ r = !list_empty(&invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); write_unlock(&kvm->mmu_lock); out: if (r || always_retry) { vcpu->arch.last_retry_eip = kvm_rip_read(vcpu); vcpu->arch.last_retry_addr = cr2_or_gpa; } return r; } static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { trace_kvm_mmu_unsync_page(sp); ++kvm->stat.mmu_unsync; sp->unsync = 1; kvm_mmu_mark_parents_unsync(sp); } /* * Attempt to unsync any shadow pages that can be reached by the specified gfn, * KVM is creating a writable mapping for said gfn. Returns 0 if all pages * were marked unsync (or if there is no shadow page), -EPERM if the SPTE must * be write-protected. */ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, bool synchronizing, bool prefetch) { struct kvm_mmu_page *sp; bool locked = false; /* * Force write-protection if the page is being tracked. Note, the page * track machinery is used to write-protect upper-level shadow pages, * i.e. this guards the role.level == 4K assertion below! */ if (kvm_gfn_is_write_tracked(kvm, slot, gfn)) return -EPERM; /* * The page is not write-tracked, mark existing shadow pages unsync * unless KVM is synchronizing an unsync SP. In that case, KVM must * complete emulation of the guest TLB flush before allowing shadow * pages to become unsync (writable by the guest). */ for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) { if (synchronizing) return -EPERM; if (sp->unsync) continue; if (prefetch) return -EEXIST; /* * TDP MMU page faults require an additional spinlock as they * run with mmu_lock held for read, not write, and the unsync * logic is not thread safe. Take the spinklock regardless of * the MMU type to avoid extra conditionals/parameters, there's * no meaningful penalty if mmu_lock is held for write. */ if (!locked) { locked = true; spin_lock(&kvm->arch.mmu_unsync_pages_lock); /* * Recheck after taking the spinlock, a different vCPU * may have since marked the page unsync. A false * negative on the unprotected check above is not * possible as clearing sp->unsync _must_ hold mmu_lock * for write, i.e. unsync cannot transition from 1->0 * while this CPU holds mmu_lock for read (or write). */ if (READ_ONCE(sp->unsync)) continue; } WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(kvm, sp); } if (locked) spin_unlock(&kvm->arch.mmu_unsync_pages_lock); /* * We need to ensure that the marking of unsync pages is visible * before the SPTE is updated to allow writes because * kvm_mmu_sync_roots() checks the unsync flags without holding * the MMU lock and so can race with this. If the SPTE was updated * before the page had been marked as unsync-ed, something like the * following could happen: * * CPU 1 CPU 2 * --------------------------------------------------------------------- * 1.2 Host updates SPTE * to be writable * 2.1 Guest writes a GPTE for GVA X. * (GPTE being in the guest page table shadowed * by the SP from CPU 1.) * This reads SPTE during the page table walk. * Since SPTE.W is read as 1, there is no * fault. * * 2.2 Guest issues TLB flush. * That causes a VM Exit. * * 2.3 Walking of unsync pages sees sp->unsync is * false and skips the page. * * 2.4 Guest accesses GVA X. * Since the mapping in the SP was not updated, * so the old mapping for GVA X incorrectly * gets used. * 1.1 Host marks SP * as unsync * (sp->unsync = true) * * The write barrier below ensures that 1.1 happens before 1.2 and thus * the situation in 2.4 does not arise. It pairs with the read barrier * in is_unsync_root(), placed between 2.1's load of SPTE.W and 2.3. */ smp_wmb(); return 0; } static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, u64 *sptep, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, struct kvm_page_fault *fault) { struct kvm_mmu_page *sp = sptep_to_sp(sptep); int level = sp->role.level; int was_rmapped = 0; int ret = RET_PF_FIXED; bool flush = false; bool wrprot; u64 spte; /* Prefetching always gets a writable pfn. */ bool host_writable = !fault || fault->map_writable; bool prefetch = !fault || fault->prefetch; bool write_fault = fault && fault->write; if (unlikely(is_noslot_pfn(pfn))) { vcpu->stat.pf_mmio_spte_created++; mark_mmio_spte(vcpu, sptep, gfn, pte_access); return RET_PF_EMULATE; } if (is_shadow_present_pte(*sptep)) { if (prefetch && is_last_spte(*sptep, level) && pfn == spte_to_pfn(*sptep)) return RET_PF_SPURIOUS; /* * If we overwrite a PTE page pointer with a 2MB PMD, unlink * the parent of the now unreachable PTE. */ if (level > PG_LEVEL_4K && !is_large_pte(*sptep)) { struct kvm_mmu_page *child; u64 pte = *sptep; child = spte_to_child_sp(pte); drop_parent_pte(vcpu->kvm, child, sptep); flush = true; } else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) { drop_spte(vcpu->kvm, sptep); flush = true; } else was_rmapped = 1; } wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch, false, host_writable, &spte); if (*sptep == spte) { ret = RET_PF_SPURIOUS; } else { flush |= mmu_spte_update(sptep, spte); trace_kvm_mmu_set_spte(level, gfn, sptep); } if (wrprot && write_fault) ret = RET_PF_WRITE_PROTECTED; if (flush) kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); if (!was_rmapped) { WARN_ON_ONCE(ret == RET_PF_SPURIOUS); rmap_add(vcpu, slot, sptep, gfn, pte_access); } else { /* Already rmapped but the pte_access bits may have changed. */ kvm_mmu_page_set_access(sp, spte_index(sptep), pte_access); } return ret; } static bool kvm_mmu_prefetch_sptes(struct kvm_vcpu *vcpu, gfn_t gfn, u64 *sptep, int nr_pages, unsigned int access) { struct page *pages[PTE_PREFETCH_NUM]; struct kvm_memory_slot *slot; int i; if (WARN_ON_ONCE(nr_pages > PTE_PREFETCH_NUM)) return false; slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK); if (!slot) return false; nr_pages = kvm_prefetch_pages(slot, gfn, pages, nr_pages); if (nr_pages <= 0) return false; for (i = 0; i < nr_pages; i++, gfn++, sptep++) { mmu_set_spte(vcpu, slot, sptep, access, gfn, page_to_pfn(pages[i]), NULL); /* * KVM always prefetches writable pages from the primary MMU, * and KVM can make its SPTE writable in the fast page handler, * without notifying the primary MMU. Mark pages/folios dirty * now to ensure file data is written back if it ends up being * written by the guest. Because KVM's prefetching GUPs * writable PTEs, the probability of unnecessary writeback is * extremely low. */ kvm_release_page_dirty(pages[i]); } return true; } static bool direct_pte_prefetch_many(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *start, u64 *end) { gfn_t gfn = kvm_mmu_page_get_gfn(sp, spte_index(start)); unsigned int access = sp->role.access; return kvm_mmu_prefetch_sptes(vcpu, gfn, start, end - start, access); } static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *sptep) { u64 *spte, *start = NULL; int i; WARN_ON_ONCE(!sp->role.direct); i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1); spte = sp->spt + i; for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { if (is_shadow_present_pte(*spte) || spte == sptep) { if (!start) continue; if (!direct_pte_prefetch_many(vcpu, sp, start, spte)) return; start = NULL; } else if (!start) start = spte; } if (start) direct_pte_prefetch_many(vcpu, sp, start, spte); } static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) { struct kvm_mmu_page *sp; sp = sptep_to_sp(sptep); /* * Without accessed bits, there's no way to distinguish between * actually accessed translations and prefetched, so disable pte * prefetch if accessed bits aren't available. */ if (sp_ad_disabled(sp)) return; if (sp->role.level > PG_LEVEL_4K) return; /* * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; __direct_pte_prefetch(vcpu, sp, sptep); } /* * Lookup the mapping level for @gfn in the current mm. * * WARNING! Use of host_pfn_mapping_level() requires the caller and the end * consumer to be tied into KVM's handlers for MMU notifier events! * * There are several ways to safely use this helper: * * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * * - Hold mmu_lock AND ensure there is no in-progress MMU notifier invalidation * event for the hva. This can be done by explicit checking the MMU notifier * or by ensuring that KVM already has a valid mapping that covers the hva. * * - Do not use the result to install new mappings, e.g. use the host mapping * level only to decide whether or not to zap an entry. In this case, it's * not required to hold mmu_lock (though it's highly likely the caller will * want to hold mmu_lock anyways, e.g. to modify SPTEs). * * Note! The lookup can still race with modifications to host page tables, but * the above "rules" ensure KVM will not _consume_ the result of the walk if a * race with the primary MMU occurs. */ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, const struct kvm_memory_slot *slot) { int level = PG_LEVEL_4K; unsigned long hva; unsigned long flags; pgd_t pgd; p4d_t p4d; pud_t pud; pmd_t pmd; /* * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() * is not solely for performance, it's also necessary to avoid the * "writable" check in __gfn_to_hva_many(), which will always fail on * read-only memslots due to gfn_to_hva() assuming writes. Earlier * page fault steps have already verified the guest isn't writing a * read-only memslot. */ hva = __gfn_to_hva_memslot(slot, gfn); /* * Disable IRQs to prevent concurrent tear down of host page tables, * e.g. if the primary MMU promotes a P*D to a huge page and then frees * the original page table. */ local_irq_save(flags); /* * Read each entry once. As above, a non-leaf entry can be promoted to * a huge page _during_ this walk. Re-reading the entry could send the * walk into the weeks, e.g. p*d_leaf() returns false (sees the old * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; p4d = READ_ONCE(*p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; pud = READ_ONCE(*pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; if (pud_leaf(pud)) { level = PG_LEVEL_1G; goto out; } pmd = READ_ONCE(*pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; if (pmd_leaf(pmd)) level = PG_LEVEL_2M; out: local_irq_restore(flags); return level; } static u8 kvm_max_level_for_order(int order) { BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) return PG_LEVEL_1G; if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) return PG_LEVEL_2M; return PG_LEVEL_4K; } static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, const struct kvm_memory_slot *slot, gfn_t gfn, bool is_private) { u8 max_level, coco_level; kvm_pfn_t pfn; /* For faults, use the gmem information that was resolved earlier. */ if (fault) { pfn = fault->pfn; max_level = fault->max_level; } else { /* TODO: Call into guest_memfd once hugepages are supported. */ WARN_ONCE(1, "Get pfn+order from guest_memfd"); pfn = KVM_PFN_ERR_FAULT; max_level = PG_LEVEL_4K; } if (max_level == PG_LEVEL_4K) return max_level; /* * CoCo may influence the max mapping level, e.g. due to RMP or S-EPT * restrictions. A return of '0' means "no additional restrictions", to * allow for using an optional "ret0" static call. */ coco_level = kvm_x86_call(gmem_max_mapping_level)(kvm, pfn, is_private); if (coco_level) max_level = min(max_level, coco_level); return max_level; } int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, const struct kvm_memory_slot *slot, gfn_t gfn) { struct kvm_lpage_info *linfo; int host_level, max_level; bool is_private; lockdep_assert_held(&kvm->mmu_lock); if (fault) { max_level = fault->max_level; is_private = fault->is_private; } else { max_level = PG_LEVEL_NUM; is_private = kvm_mem_is_private(kvm, gfn); } max_level = min(max_level, max_huge_page_level); for ( ; max_level > PG_LEVEL_4K; max_level--) { linfo = lpage_info_slot(gfn, slot, max_level); if (!linfo->disallow_lpage) break; } if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; if (is_private || kvm_memslot_is_gmem_only(slot)) host_level = kvm_gmem_max_mapping_level(kvm, fault, slot, gfn, is_private); else host_level = host_pfn_mapping_level(kvm, gfn, slot); return min(host_level, max_level); } void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; kvm_pfn_t mask; fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled; if (unlikely(fault->max_level == PG_LEVEL_4K)) return; if (is_error_noslot_pfn(fault->pfn)) return; if (kvm_slot_dirty_track_enabled(slot)) return; /* * Enforce the iTLB multihit workaround after capturing the requested * level, which will be used to do precise, accurate accounting. */ fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, fault, fault->slot, fault->gfn); if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) return; /* * mmu_invalidate_retry() was successful and mmu_lock is held, so * the pmd can't be split from under us. */ fault->goal_level = fault->req_level; mask = KVM_PAGES_PER_HPAGE(fault->goal_level) - 1; VM_BUG_ON((fault->gfn & mask) != (fault->pfn & mask)); fault->pfn &= ~mask; } void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level) { if (cur_level > PG_LEVEL_4K && cur_level == fault->goal_level && is_shadow_present_pte(spte) && !is_large_pte(spte) && spte_to_child_sp(spte)->nx_huge_page_disallowed) { /* * A small SPTE exists for this pfn, but FNAME(fetch), * direct_map(), or kvm_tdp_mmu_map() would like to create a * large PTE instead: just force them to go down another level, * patching back for them into pfn the next 9 bits of the * address. */ u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) - KVM_PAGES_PER_HPAGE(cur_level - 1); fault->pfn |= fault->gfn & page_mask; fault->goal_level--; } } static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; int ret; gfn_t base_gfn = fault->gfn; kvm_mmu_hugepage_adjust(vcpu, fault); trace_kvm_mmu_spte_requested(fault); for_each_shadow_entry(vcpu, fault->addr, it) { /* * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. */ if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, ACC_ALL); if (sp == ERR_PTR(-EEXIST)) continue; link_shadow_page(vcpu, it.sptep, sp); if (fault->huge_page_disallowed) account_nx_huge_page(vcpu->kvm, sp, fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) return -EFAULT; ret = mmu_set_spte(vcpu, fault->slot, it.sptep, ACC_ALL, base_gfn, fault->pfn, fault); if (ret == RET_PF_SPURIOUS) return ret; direct_pte_prefetch(vcpu, it.sptep); return ret; } static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long hva = gfn_to_hva_memslot(slot, gfn); send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current); } static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { if (is_sigpending_pfn(fault->pfn)) { kvm_handle_signal_exit(vcpu); return -EINTR; } /* * Do not cache the mmio info caused by writing the readonly gfn * into the spte otherwise read access on readonly gfn also can * caused mmio page fault and treat it as mmio access. */ if (fault->pfn == KVM_PFN_ERR_RO_FAULT) return RET_PF_EMULATE; if (fault->pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(fault->slot, fault->gfn); return RET_PF_RETRY; } return -EFAULT; } static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access) { gva_t gva = fault->is_tdp ? 0 : fault->addr; if (fault->is_private) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } vcpu_cache_mmio_info(vcpu, gva, fault->gfn, access & shadow_mmio_access_mask); fault->slot = NULL; fault->pfn = KVM_PFN_NOSLOT; fault->map_writable = false; /* * If MMIO caching is disabled, emulate immediately without * touching the shadow page tables as attempting to install an * MMIO SPTE will just be an expensive nop. */ if (unlikely(!enable_mmio_caching)) return RET_PF_EMULATE; /* * Do not create an MMIO SPTE for a gfn greater than host.MAXPHYADDR, * any guest that generates such gfns is running nested and is being * tricked by L0 userspace (you can observe gfn > L1.MAXPHYADDR if and * only if L1's MAXPHYADDR is inaccurate with respect to the * hardware's). */ if (unlikely(fault->gfn > kvm_mmu_max_gfn())) return RET_PF_EMULATE; return RET_PF_CONTINUE; } static bool page_fault_can_be_fast(struct kvm *kvm, struct kvm_page_fault *fault) { /* * Page faults with reserved bits set, i.e. faults on MMIO SPTEs, only * reach the common page fault handler if the SPTE has an invalid MMIO * generation number. Refreshing the MMIO generation needs to go down * the slow path. Note, EPT Misconfigs do NOT set the PRESENT flag! */ if (fault->rsvd) return false; /* * For hardware-protected VMs, certain conditions like attempting to * perform a write to a page which is not in the state that the guest * expects it to be in can result in a nested/extended #PF. In this * case, the below code might misconstrue this situation as being the * result of a write-protected access, and treat it as a spurious case * rather than taking any action to satisfy the real source of the #PF * such as generating a KVM_EXIT_MEMORY_FAULT. This can lead to the * guest spinning on a #PF indefinitely, so don't attempt the fast path * in this case. * * Note that the kvm_mem_is_private() check might race with an * attribute update, but this will either result in the guest spinning * on RET_PF_SPURIOUS until the update completes, or an actual spurious * case might go down the slow path. Either case will resolve itself. */ if (kvm->arch.has_private_mem && fault->is_private != kvm_mem_is_private(kvm, fault->gfn)) return false; /* * #PF can be fast if: * * 1. The shadow page table entry is not present and A/D bits are * disabled _by KVM_, which could mean that the fault is potentially * caused by access tracking (if enabled). If A/D bits are enabled * by KVM, but disabled by L1 for L2, KVM is forced to disable A/D * bits for L2 and employ access tracking, but the fast page fault * mechanism only supports direct MMUs. * 2. The shadow page table entry is present, the access is a write, * and no reserved bits are set (MMIO SPTEs cannot be "fixed"), i.e. * the fault was caused by a write-protection violation. If the * SPTE is MMU-writable (determined later), the fault can be fixed * by setting the Writable bit, which can be done out of mmu_lock. */ if (!fault->present) return !kvm_ad_enabled; /* * Note, instruction fetches and writes are mutually exclusive, ignore * the "exec" flag. */ return fault->write; } /* * Returns true if the SPTE was fixed successfully. Otherwise, * someone else modified the SPTE from its original value. */ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, u64 *sptep, u64 old_spte, u64 new_spte) { /* * Theoretically we could also set dirty bit (and flush TLB) here in * order to eliminate unnecessary PML logging. See comments in * set_spte. But fast_page_fault is very unlikely to happen with PML * enabled, so we do not do this. This might result in the same GPA * to be logged in PML buffer again when the write really happens, and * eventually to be called by mark_page_dirty twice. But it's also no * harm. This also avoids the TLB flush needed after setting dirty bit * so non-PML cases won't be impacted. * * Compare with make_spte() where instead shadow_dirty_mask is set. */ if (!try_cmpxchg64(sptep, &old_spte, new_spte)) return false; if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn); return true; } /* * Returns the last level spte pointer of the shadow page walk for the given * gpa, and sets *spte to the spte value. This spte may be non-preset. If no * walk could be performed, returns NULL and *spte does not contain valid data. * * Contract: * - Must be called between walk_shadow_page_lockless_{begin,end}. * - The returned sptep must not be used after walk_shadow_page_lockless_end. */ static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte) { struct kvm_shadow_walk_iterator iterator; u64 old_spte; u64 *sptep = NULL; for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) { sptep = iterator.sptep; *spte = old_spte; } return sptep; } /* * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS. */ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu_page *sp; int ret = RET_PF_INVALID; u64 spte; u64 *sptep; uint retry_count = 0; if (!page_fault_can_be_fast(vcpu->kvm, fault)) return ret; walk_shadow_page_lockless_begin(vcpu); do { u64 new_spte; if (tdp_mmu_enabled) sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, fault->gfn, &spte); else sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte); /* * It's entirely possible for the mapping to have been zapped * by a different task, but the root page should always be * available as the vCPU holds a reference to its root(s). */ if (WARN_ON_ONCE(!sptep)) spte = FROZEN_SPTE; if (!is_shadow_present_pte(spte)) break; sp = sptep_to_sp(sptep); if (!is_last_spte(spte, sp->role.level)) break; /* * Check whether the memory access that caused the fault would * still cause it if it were to be performed right now. If not, * then this is a spurious fault caused by TLB lazily flushed, * or some other CPU has already fixed the PTE after the * current CPU took the fault. * * Need not check the access of upper level table entries since * they are always ACC_ALL. */ if (is_access_allowed(fault, spte)) { ret = RET_PF_SPURIOUS; break; } new_spte = spte; /* * KVM only supports fixing page faults outside of MMU lock for * direct MMUs, nested MMUs are always indirect, and KVM always * uses A/D bits for non-nested MMUs. Thus, if A/D bits are * enabled, the SPTE can't be an access-tracked SPTE. */ if (unlikely(!kvm_ad_enabled) && is_access_track_spte(spte)) new_spte = restore_acc_track_spte(new_spte) | shadow_accessed_mask; /* * To keep things simple, only SPTEs that are MMU-writable can * be made fully writable outside of mmu_lock, e.g. only SPTEs * that were write-protected for dirty-logging or access * tracking are handled here. Don't bother checking if the * SPTE is writable to prioritize running with A/D bits enabled. * The is_access_allowed() check above handles the common case * of the fault being spurious, and the SPTE is known to be * shadow-present, i.e. except for access tracking restoration * making the new SPTE writable, the check is wasteful. */ if (fault->write && is_mmu_writable_spte(spte)) { new_spte |= PT_WRITABLE_MASK; /* * Do not fix write-permission on the large spte when * dirty logging is enabled. Since we only dirty the * first page into the dirty-bitmap in * fast_pf_fix_direct_spte(), other pages are missed * if its slot has dirty logging enabled. * * Instead, we let the slow page fault path create a * normal spte to fix the access. */ if (sp->role.level > PG_LEVEL_4K && kvm_slot_dirty_track_enabled(fault->slot)) break; } /* Verify that the fault can be handled in the fast path */ if (new_spte == spte || !is_access_allowed(fault, new_spte)) break; /* * Currently, fast page fault only works for direct mapping * since the gfn is not stable for indirect shadow page. See * Documentation/virt/kvm/locking.rst to get more detail. */ if (fast_pf_fix_direct_spte(vcpu, fault, sptep, spte, new_spte)) { ret = RET_PF_FIXED; break; } if (++retry_count > 4) { pr_warn_once("Fast #PF retrying more than 4 times.\n"); break; } } while (true); trace_fast_page_fault(vcpu, fault, sptep, spte, ret); walk_shadow_page_lockless_end(vcpu); if (ret != RET_PF_INVALID) vcpu->stat.pf_fast++; return ret; } static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, struct list_head *invalid_list) { struct kvm_mmu_page *sp; if (!VALID_PAGE(*root_hpa)) return; sp = root_to_sp(*root_hpa); if (WARN_ON_ONCE(!sp)) return; if (is_tdp_mmu_page(sp)) { lockdep_assert_held_read(&kvm->mmu_lock); kvm_tdp_mmu_put_root(kvm, sp); } else { lockdep_assert_held_write(&kvm->mmu_lock); if (!--sp->root_count && sp->role.invalid) kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } *root_hpa = INVALID_PAGE; } /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free) { bool is_tdp_mmu = tdp_mmu_enabled && mmu->root_role.direct; int i; LIST_HEAD(invalid_list); bool free_active_root; WARN_ON_ONCE(roots_to_free & ~KVM_MMU_ROOTS_ALL); BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG); /* Before acquiring the MMU lock, see if we need to do any real work. */ free_active_root = (roots_to_free & KVM_MMU_ROOT_CURRENT) && VALID_PAGE(mmu->root.hpa); if (!free_active_root) { for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) && VALID_PAGE(mmu->prev_roots[i].hpa)) break; if (i == KVM_MMU_NUM_PREV_ROOTS) return; } if (is_tdp_mmu) read_lock(&kvm->mmu_lock); else write_lock(&kvm->mmu_lock); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) mmu_free_root_page(kvm, &mmu->prev_roots[i].hpa, &invalid_list); if (free_active_root) { if (kvm_mmu_is_dummy_root(mmu->root.hpa)) { /* Nothing to cleanup for dummy roots. */ } else if (root_to_sp(mmu->root.hpa)) { mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list); } else if (mmu->pae_root) { for (i = 0; i < 4; ++i) { if (!IS_VALID_PAE_ROOT(mmu->pae_root[i])) continue; mmu_free_root_page(kvm, &mmu->pae_root[i], &invalid_list); mmu->pae_root[i] = INVALID_PAE_ROOT; } } mmu->root.hpa = INVALID_PAGE; mmu->root.pgd = 0; } if (is_tdp_mmu) { read_unlock(&kvm->mmu_lock); WARN_ON_ONCE(!list_empty(&invalid_list)); } else { kvm_mmu_commit_zap_page(kvm, &invalid_list); write_unlock(&kvm->mmu_lock); } } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_roots); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) { unsigned long roots_to_free = 0; struct kvm_mmu_page *sp; hpa_t root_hpa; int i; /* * This should not be called while L2 is active, L2 can't invalidate * _only_ its own roots, e.g. INVVPID unconditionally exits. */ WARN_ON_ONCE(mmu->root_role.guest_mode); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { root_hpa = mmu->prev_roots[i].hpa; if (!VALID_PAGE(root_hpa)) continue; sp = root_to_sp(root_hpa); if (!sp || sp->role.guest_mode) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); } kvm_mmu_free_roots(kvm, mmu, roots_to_free); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_guest_mode_roots); static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant, u8 level) { union kvm_mmu_page_role role = vcpu->arch.mmu->root_role; struct kvm_mmu_page *sp; role.level = level; role.quadrant = quadrant; WARN_ON_ONCE(quadrant && !role.has_4_byte_gpte); WARN_ON_ONCE(role.direct && role.has_4_byte_gpte); sp = kvm_mmu_get_shadow_page(vcpu, gfn, role); ++sp->root_count; return __pa(sp->spt); } static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; u8 shadow_root_level = mmu->root_role.level; hpa_t root; unsigned i; int r; if (tdp_mmu_enabled) { if (kvm_has_mirrored_tdp(vcpu->kvm) && !VALID_PAGE(mmu->mirror_root_hpa)) kvm_tdp_mmu_alloc_root(vcpu, true); kvm_tdp_mmu_alloc_root(vcpu, false); return 0; } write_lock(&vcpu->kvm->mmu_lock); r = make_mmu_pages_available(vcpu); if (r < 0) goto out_unlock; if (shadow_root_level >= PT64_ROOT_4LEVEL) { root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level); mmu->root.hpa = root; } else if (shadow_root_level == PT32E_ROOT_LEVEL) { if (WARN_ON_ONCE(!mmu->pae_root)) { r = -EIO; goto out_unlock; } for (i = 0; i < 4; ++i) { WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i])); root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), 0, PT32_ROOT_LEVEL); mmu->pae_root[i] = root | PT_PRESENT_MASK | shadow_me_value; } mmu->root.hpa = __pa(mmu->pae_root); } else { WARN_ONCE(1, "Bad TDP root level = %d\n", shadow_root_level); r = -EIO; goto out_unlock; } /* root.pgd is ignored for direct MMUs. */ mmu->root.pgd = 0; out_unlock: write_unlock(&vcpu->kvm->mmu_lock); return r; } static int kvm_mmu_alloc_page_hash(struct kvm *kvm) { struct hlist_head *h; if (kvm->arch.mmu_page_hash) return 0; h = kvcalloc(KVM_NUM_MMU_PAGES, sizeof(*h), GFP_KERNEL_ACCOUNT); if (!h) return -ENOMEM; /* * Ensure the hash table pointer is set only after all stores to zero * the memory are retired. Pairs with the smp_load_acquire() in * kvm_get_mmu_page_hash(). Note, mmu_lock must be held for write to * add (or remove) shadow pages, and so readers are guaranteed to see * an empty list for their current mmu_lock critical section. */ smp_store_release(&kvm->arch.mmu_page_hash, h); return 0; } static int mmu_first_shadow_root_alloc(struct kvm *kvm) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; int r = 0, i, bkt; /* * Check if this is the first shadow root being allocated before * taking the lock. */ if (kvm_shadow_root_allocated(kvm)) return 0; mutex_lock(&kvm->slots_arch_lock); /* Recheck, under the lock, whether this is the first shadow root. */ if (kvm_shadow_root_allocated(kvm)) goto out_unlock; r = kvm_mmu_alloc_page_hash(kvm); if (r) goto out_unlock; /* * Check if memslot metadata actually needs to be allocated, e.g. all * metadata will be allocated upfront if TDP is disabled. */ if (kvm_memslots_have_rmaps(kvm) && kvm_page_track_write_tracking_enabled(kvm)) goto out_success; for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { slots = __kvm_memslots(kvm, i); kvm_for_each_memslot(slot, bkt, slots) { /* * Both of these functions are no-ops if the target is * already allocated, so unconditionally calling both * is safe. Intentionally do NOT free allocations on * failure to avoid having to track which allocations * were made now versus when the memslot was created. * The metadata is guaranteed to be freed when the slot * is freed, and will be kept/used if userspace retries * KVM_RUN instead of killing the VM. */ r = memslot_rmap_alloc(slot, slot->npages); if (r) goto out_unlock; r = kvm_page_track_write_tracking_alloc(slot); if (r) goto out_unlock; } } /* * Ensure that shadow_root_allocated becomes true strictly after * all the related pointers are set. */ out_success: smp_store_release(&kvm->arch.shadow_root_allocated, true); out_unlock: mutex_unlock(&kvm->slots_arch_lock); return r; } static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; u64 pdptrs[4], pm_mask; gfn_t root_gfn, root_pgd; int quadrant, i, r; hpa_t root; root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu); root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT; if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) { mmu->root.hpa = kvm_mmu_get_dummy_root(); return 0; } /* * On SVM, reading PDPTRs might access guest memory, which might fault * and thus might sleep. Grab the PDPTRs before acquiring mmu_lock. */ if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) { for (i = 0; i < 4; ++i) { pdptrs[i] = mmu->get_pdptr(vcpu, i); if (!(pdptrs[i] & PT_PRESENT_MASK)) continue; if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT)) pdptrs[i] = 0; } } r = mmu_first_shadow_root_alloc(vcpu->kvm); if (r) return r; write_lock(&vcpu->kvm->mmu_lock); r = make_mmu_pages_available(vcpu); if (r < 0) goto out_unlock; /* * Do we shadow a long mode page table? If so we need to * write-protect the guests page table root. */ if (mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) { root = mmu_alloc_root(vcpu, root_gfn, 0, mmu->root_role.level); mmu->root.hpa = root; goto set_root_pgd; } if (WARN_ON_ONCE(!mmu->pae_root)) { r = -EIO; goto out_unlock; } /* * We shadow a 32 bit page table. This may be a legacy 2-level * or a PAE 3-level page table. In either case we need to be aware that * the shadow page table may be a PAE or a long mode page table. */ pm_mask = PT_PRESENT_MASK | shadow_me_value; if (mmu->root_role.level >= PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; if (WARN_ON_ONCE(!mmu->pml4_root)) { r = -EIO; goto out_unlock; } mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask; if (mmu->root_role.level == PT64_ROOT_5LEVEL) { if (WARN_ON_ONCE(!mmu->pml5_root)) { r = -EIO; goto out_unlock; } mmu->pml5_root[0] = __pa(mmu->pml4_root) | pm_mask; } } for (i = 0; i < 4; ++i) { WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i])); if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) { if (!(pdptrs[i] & PT_PRESENT_MASK)) { mmu->pae_root[i] = INVALID_PAE_ROOT; continue; } root_gfn = pdptrs[i] >> PAGE_SHIFT; } /* * If shadowing 32-bit non-PAE page tables, each PAE page * directory maps one quarter of the guest's non-PAE page * directory. Othwerise each PAE page direct shadows one guest * PAE page directory so that quadrant should be 0. */ quadrant = (mmu->cpu_role.base.level == PT32_ROOT_LEVEL) ? i : 0; root = mmu_alloc_root(vcpu, root_gfn, quadrant, PT32_ROOT_LEVEL); mmu->pae_root[i] = root | pm_mask; } if (mmu->root_role.level == PT64_ROOT_5LEVEL) mmu->root.hpa = __pa(mmu->pml5_root); else if (mmu->root_role.level == PT64_ROOT_4LEVEL) mmu->root.hpa = __pa(mmu->pml4_root); else mmu->root.hpa = __pa(mmu->pae_root); set_root_pgd: mmu->root.pgd = root_pgd; out_unlock: write_unlock(&vcpu->kvm->mmu_lock); return r; } static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; bool need_pml5 = mmu->root_role.level > PT64_ROOT_4LEVEL; u64 *pml5_root = NULL; u64 *pml4_root = NULL; u64 *pae_root; /* * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP * tables are allocated and initialized at root creation as there is no * equivalent level in the guest's NPT to shadow. Allocate the tables * on demand, as running a 32-bit L1 VMM on 64-bit KVM is very rare. */ if (mmu->root_role.direct || mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL || mmu->root_role.level < PT64_ROOT_4LEVEL) return 0; /* * NPT, the only paging mode that uses this horror, uses a fixed number * of levels for the shadow page tables, e.g. all MMUs are 4-level or * all MMus are 5-level. Thus, this can safely require that pml5_root * is allocated if the other roots are valid and pml5 is needed, as any * prior MMU would also have required pml5. */ if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root)) return 0; /* * The special roots should always be allocated in concert. Yell and * bail if KVM ends up in a state where only one of the roots is valid. */ if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root || (need_pml5 && mmu->pml5_root))) return -EIO; /* * Unlike 32-bit NPT, the PDP table doesn't need to be in low mem, and * doesn't need to be decrypted. */ pae_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!pae_root) return -ENOMEM; #ifdef CONFIG_X86_64 pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!pml4_root) goto err_pml4; if (need_pml5) { pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!pml5_root) goto err_pml5; } #endif mmu->pae_root = pae_root; mmu->pml4_root = pml4_root; mmu->pml5_root = pml5_root; return 0; #ifdef CONFIG_X86_64 err_pml5: free_page((unsigned long)pml4_root); err_pml4: free_page((unsigned long)pae_root); return -ENOMEM; #endif } static bool is_unsync_root(hpa_t root) { struct kvm_mmu_page *sp; if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root)) return false; /* * The read barrier orders the CPU's read of SPTE.W during the page table * walk before the reads of sp->unsync/sp->unsync_children here. * * Even if another CPU was marking the SP as unsync-ed simultaneously, * any guest page table changes are not guaranteed to be visible anyway * until this VCPU issues a TLB flush strictly after those changes are * made. We only need to ensure that the other CPU sets these flags * before any actual changes to the page tables are made. The comments * in mmu_try_to_unsync_pages() describe what could go wrong if this * requirement isn't satisfied. */ smp_rmb(); sp = root_to_sp(root); /* * PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the * PDPTEs for a given PAE root need to be synchronized individually. */ if (WARN_ON_ONCE(!sp)) return false; if (sp->unsync || sp->unsync_children) return true; return false; } void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { int i; struct kvm_mmu_page *sp; if (vcpu->arch.mmu->root_role.direct) return; if (!VALID_PAGE(vcpu->arch.mmu->root.hpa)) return; vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu->root.hpa; if (!is_unsync_root(root)) return; sp = root_to_sp(root); write_lock(&vcpu->kvm->mmu_lock); mmu_sync_children(vcpu, sp, true); write_unlock(&vcpu->kvm->mmu_lock); return; } write_lock(&vcpu->kvm->mmu_lock); for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu->pae_root[i]; if (IS_VALID_PAE_ROOT(root)) { sp = spte_to_child_sp(root); mmu_sync_children(vcpu, sp, true); } } write_unlock(&vcpu->kvm->mmu_lock); } void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu) { unsigned long roots_to_free = 0; int i; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if (is_unsync_root(vcpu->arch.mmu->prev_roots[i].hpa)) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); /* sync prev_roots by simply freeing them */ kvm_mmu_free_roots(vcpu->kvm, vcpu->arch.mmu, roots_to_free); } static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t vaddr, u64 access, struct x86_exception *exception) { if (exception) exception->error_code = 0; return kvm_translate_gpa(vcpu, mmu, vaddr, access, exception); } static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) { /* * A nested guest cannot use the MMIO cache if it is using nested * page tables, because cr2 is a nGPA while the cache stores GPAs. */ if (mmu_is_nested(vcpu)) return false; if (direct) return vcpu_match_mmio_gpa(vcpu, addr); return vcpu_match_mmio_gva(vcpu, addr); } /* * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. * * Must be called between walk_shadow_page_lockless_{begin,end}. */ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { struct kvm_shadow_walk_iterator iterator; int leaf = -1; u64 spte; for (shadow_walk_init(&iterator, vcpu, addr), *root_level = iterator.level; shadow_walk_okay(&iterator); __shadow_walk_next(&iterator, spte)) { leaf = iterator.level; spte = mmu_spte_get_lockless(iterator.sptep); sptes[leaf] = spte; } return leaf; } static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { int leaf; walk_shadow_page_lockless_begin(vcpu); if (is_tdp_mmu_active(vcpu)) leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root_level); else leaf = get_walk(vcpu, addr, sptes, root_level); walk_shadow_page_lockless_end(vcpu); return leaf; } /* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; struct rsvd_bits_validate *rsvd_check; int root, leaf, level; bool reserved = false; leaf = get_sptes_lockless(vcpu, addr, sptes, &root); if (unlikely(leaf < 0)) { *sptep = 0ull; return reserved; } *sptep = sptes[leaf]; /* * Skip reserved bits checks on the terminal leaf if it's not a valid * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by * design, always have reserved bits set. The purpose of the checks is * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs. */ if (!is_shadow_present_pte(sptes[leaf])) leaf++; rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) reserved |= is_rsvd_spte(rsvd_check, sptes[level], level); if (reserved) { pr_err("%s: reserved bits set on MMU-present spte, addr 0x%llx, hierarchy:\n", __func__, addr); for (level = root; level >= leaf; level--) pr_err("------ spte = 0x%llx level = %d, rsvd bits = 0x%llx", sptes[level], level, get_rsvd_bits(rsvd_check, sptes[level], level)); } return reserved; } static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; bool reserved; if (mmio_info_in_cache(vcpu, addr, direct)) return RET_PF_EMULATE; reserved = get_mmio_spte(vcpu, addr, &spte); if (WARN_ON_ONCE(reserved)) return -EINVAL; if (is_mmio_spte(vcpu->kvm, spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); unsigned int access = get_mmio_spte_access(spte); if (!check_mmio_spte(vcpu, spte)) return RET_PF_INVALID; if (direct) addr = 0; trace_handle_mmio_page_fault(addr, gfn, access); vcpu_cache_mmio_info(vcpu, addr, gfn, access); return RET_PF_EMULATE; } /* * If the page table is zapped by other cpus, let CPU fault again on * the address. */ return RET_PF_RETRY; } static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { if (unlikely(fault->rsvd)) return false; if (!fault->present || !fault->write) return false; /* * guest is writing the page which is write tracked which can * not be fixed by page fault handler. */ if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn)) return true; return false; } static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) { struct kvm_shadow_walk_iterator iterator; u64 spte; walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) clear_sp_write_flooding_count(iterator.sptep); walk_shadow_page_lockless_end(vcpu); } static u32 alloc_apf_token(struct kvm_vcpu *vcpu) { /* make sure the token value is not 0 */ u32 id = vcpu->arch.apf.id; if (id << 12 == 0) vcpu->arch.apf.id = 1; return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; } static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_arch_async_pf arch; arch.token = alloc_apf_token(vcpu); arch.gfn = fault->gfn; arch.error_code = fault->error_code; arch.direct_map = vcpu->arch.mmu->root_role.direct; arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); return kvm_setup_async_pf(vcpu, fault->addr, kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch); } void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { int r; if (WARN_ON_ONCE(work->arch.error_code & PFERR_PRIVATE_ACCESS)) return; if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) || work->wakeup_all) return; r = kvm_mmu_reload(vcpu); if (unlikely(r)) return; if (!vcpu->arch.mmu->root_role.direct && work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) return; r = kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL, NULL); /* * Account fixed page faults, otherwise they'll never be counted, but * ignore stats for all other return times. Page-ready "faults" aren't * truly spurious and never trigger emulation */ if (r == RET_PF_FIXED) vcpu->stat.pf_fixed++; } static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int r) { kvm_release_faultin_page(vcpu->kvm, fault->refcounted_page, r == RET_PF_RETRY, fault->map_writable); } static int kvm_mmu_faultin_pfn_gmem(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { int max_order, r; if (!kvm_slot_has_gmem(fault->slot)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn, &fault->refcounted_page, &max_order); if (r) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return r; } fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY); fault->max_level = kvm_max_level_for_order(max_order); return RET_PF_CONTINUE; } static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { unsigned int foll = fault->write ? FOLL_WRITE : 0; if (fault->is_private || kvm_memslot_is_gmem_only(fault->slot)) return kvm_mmu_faultin_pfn_gmem(vcpu, fault); foll |= FOLL_NOWAIT; fault->pfn = __kvm_faultin_pfn(fault->slot, fault->gfn, foll, &fault->map_writable, &fault->refcounted_page); /* * If resolving the page failed because I/O is needed to fault-in the * page, then either set up an asynchronous #PF to do the I/O, or if * doing an async #PF isn't possible, retry with I/O allowed. All * other failures are terminal, i.e. retrying won't help. */ if (fault->pfn != KVM_PFN_ERR_NEEDS_IO) return RET_PF_CONTINUE; if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(fault->addr, fault->gfn); if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) { trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return RET_PF_RETRY; } else if (kvm_arch_setup_async_pf(vcpu, fault)) { return RET_PF_RETRY; } } /* * Allow gup to bail on pending non-fatal signals when it's also allowed * to wait for IO. Note, gup always bails if it is unable to quickly * get a page and a fatal signal, i.e. SIGKILL, is pending. */ foll |= FOLL_INTERRUPTIBLE; foll &= ~FOLL_NOWAIT; fault->pfn = __kvm_faultin_pfn(fault->slot, fault->gfn, foll, &fault->map_writable, &fault->refcounted_page); return RET_PF_CONTINUE; } static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access) { struct kvm_memory_slot *slot = fault->slot; struct kvm *kvm = vcpu->kvm; int ret; if (KVM_BUG_ON(kvm_is_gfn_alias(kvm, fault->gfn), kvm)) return -EFAULT; /* * Note that the mmu_invalidate_seq also serves to detect a concurrent * change in attributes. is_page_fault_stale() will detect an * invalidation relate to fault->fn and resume the guest without * installing a mapping in the page tables. */ fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); /* * Now that we have a snapshot of mmu_invalidate_seq we can check for a * private vs. shared mismatch. */ if (fault->is_private != kvm_mem_is_private(kvm, fault->gfn)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } if (unlikely(!slot)) return kvm_handle_noslot_fault(vcpu, fault, access); /* * Retry the page fault if the gfn hit a memslot that is being deleted * or moved. This ensures any existing SPTEs for the old memslot will * be zapped before KVM inserts a new MMIO SPTE for the gfn. Punt the * error to userspace if this is a prefault, as KVM's prefaulting ABI * doesn't provide the same forward progress guarantees as KVM_RUN. */ if (slot->flags & KVM_MEMSLOT_INVALID) { if (fault->prefetch) return -EAGAIN; return RET_PF_RETRY; } if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) { /* * Don't map L1's APIC access page into L2, KVM doesn't support * using APICv/AVIC to accelerate L2 accesses to L1's APIC, * i.e. the access needs to be emulated. Emulating access to * L1's APIC is also correct if L1 is accelerating L2's own * virtual APIC, but for some reason L1 also maps _L1's_ APIC * into L2. Note, vcpu_is_mmio_gpa() always treats access to * the APIC as MMIO. Allow an MMIO SPTE to be created, as KVM * uses different roots for L1 vs. L2, i.e. there is no danger * of breaking APICv/AVIC for L1. */ if (is_guest_mode(vcpu)) return kvm_handle_noslot_fault(vcpu, fault, access); /* * If the APIC access page exists but is disabled, go directly * to emulation without caching the MMIO access or creating a * MMIO SPTE. That way the cache doesn't need to be purged * when the AVIC is re-enabled. */ if (!kvm_apicv_activated(vcpu->kvm)) return RET_PF_EMULATE; } /* * Check for a relevant mmu_notifier invalidation event before getting * the pfn from the primary MMU, and before acquiring mmu_lock. * * For mmu_lock, if there is an in-progress invalidation and the kernel * allows preemption, the invalidation task may drop mmu_lock and yield * in response to mmu_lock being contended, which is *very* counter- * productive as this vCPU can't actually make forward progress until * the invalidation completes. * * Retrying now can also avoid unnessary lock contention in the primary * MMU, as the primary MMU doesn't necessarily hold a single lock for * the duration of the invalidation, i.e. faulting in a conflicting pfn * can cause the invalidation to take longer by holding locks that are * needed to complete the invalidation. * * Do the pre-check even for non-preemtible kernels, i.e. even if KVM * will never yield mmu_lock in response to contention, as this vCPU is * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held * to detect retry guarantees the worst case latency for the vCPU. */ if (mmu_invalidate_retry_gfn_unsafe(kvm, fault->mmu_seq, fault->gfn)) return RET_PF_RETRY; ret = __kvm_mmu_faultin_pfn(vcpu, fault); if (ret != RET_PF_CONTINUE) return ret; if (unlikely(is_error_pfn(fault->pfn))) return kvm_handle_error_pfn(vcpu, fault); if (WARN_ON_ONCE(!fault->slot || is_noslot_pfn(fault->pfn))) return kvm_handle_noslot_fault(vcpu, fault, access); /* * Check again for a relevant mmu_notifier invalidation event purely to * avoid contending mmu_lock. Most invalidations will be detected by * the previous check, but checking is extremely cheap relative to the * overall cost of failing to detect the invalidation until after * mmu_lock is acquired. */ if (mmu_invalidate_retry_gfn_unsafe(kvm, fault->mmu_seq, fault->gfn)) { kvm_mmu_finish_page_fault(vcpu, fault, RET_PF_RETRY); return RET_PF_RETRY; } return RET_PF_CONTINUE; } /* * Returns true if the page fault is stale and needs to be retried, i.e. if the * root was invalidated by a memslot update or a relevant mmu_notifier fired. */ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa); /* Special roots, e.g. pae_root, are not backed by shadow pages. */ if (sp && is_obsolete_sp(vcpu->kvm, sp)) return true; /* * Roots without an associated shadow page are considered invalid if * there is a pending request to free obsolete roots. The request is * only a hint that the current root _may_ be obsolete and needs to be * reloaded, e.g. if the guest frees a PGD that KVM is tracking as a * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs * to reload even if no vCPU is actively using the root. */ if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) return true; /* * Check for a relevant mmu_notifier invalidation event one last time * now that mmu_lock is held, as the "unsafe" checks performed without * holding mmu_lock can get false negatives. */ return fault->slot && mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { int r; /* Dummy roots are used only for shadowing bad guest roots. */ if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) return r; r = mmu_topup_memory_caches(vcpu, false); if (r) return r; r = kvm_mmu_faultin_pfn(vcpu, fault, ACC_ALL); if (r != RET_PF_CONTINUE) return r; r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) goto out_unlock; r = direct_map(vcpu, fault); out_unlock: kvm_mmu_finish_page_fault(vcpu, fault, r); write_unlock(&vcpu->kvm->mmu_lock); return r; } static int nonpaging_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */ fault->max_level = PG_LEVEL_2M; return direct_page_fault(vcpu, fault); } int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len) { int r = 1; u32 flags = vcpu->arch.apf.host_apf_flags; #ifndef CONFIG_X86_64 /* A 64-bit CR2 should be impossible on 32-bit KVM. */ if (WARN_ON_ONCE(fault_address >> 32)) return -EFAULT; #endif /* * Legacy #PF exception only have a 32-bit error code. Simply drop the * upper bits as KVM doesn't use them for #PF (because they are never * set), and to ensure there are no collisions with KVM-defined bits. */ if (WARN_ON_ONCE(error_code >> 32)) error_code = lower_32_bits(error_code); /* * Restrict KVM-defined flags to bits 63:32 so that it's impossible for * them to conflict with #PF error codes, which are limited to 32 bits. */ BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK)); vcpu->arch.l1tf_flush_l1d = true; if (!flags) { trace_kvm_page_fault(vcpu, fault_address, error_code); r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, insn_len); } else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) { vcpu->arch.apf.host_apf_flags = 0; local_irq_disable(); kvm_async_pf_task_wait_schedule(fault_address); local_irq_enable(); } else { WARN_ONCE(1, "Unexpected host async PF flags: %x\n", flags); } return r; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_page_fault); #ifdef CONFIG_X86_64 static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { int r; if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_WRITE_PROTECTED; r = fast_page_fault(vcpu, fault); if (r != RET_PF_INVALID) return r; r = mmu_topup_memory_caches(vcpu, false); if (r) return r; r = kvm_mmu_faultin_pfn(vcpu, fault, ACC_ALL); if (r != RET_PF_CONTINUE) return r; r = RET_PF_RETRY; read_lock(&vcpu->kvm->mmu_lock); if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = kvm_tdp_mmu_map(vcpu, fault); out_unlock: kvm_mmu_finish_page_fault(vcpu, fault, r); read_unlock(&vcpu->kvm->mmu_lock); return r; } #endif int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { #ifdef CONFIG_X86_64 if (tdp_mmu_enabled) return kvm_tdp_mmu_page_fault(vcpu, fault); #endif return direct_page_fault(vcpu, fault); } int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level) { int r; /* * Restrict to TDP page fault, since that's the only case where the MMU * is indexed by GPA. */ if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault) return -EOPNOTSUPP; do { if (signal_pending(current)) return -EINTR; if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) return -EIO; cond_resched(); r = kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level); } while (r == RET_PF_RETRY); if (r < 0) return r; switch (r) { case RET_PF_FIXED: case RET_PF_SPURIOUS: case RET_PF_WRITE_PROTECTED: return 0; case RET_PF_EMULATE: return -ENOENT; case RET_PF_RETRY: case RET_PF_CONTINUE: case RET_PF_INVALID: default: WARN_ONCE(1, "could not fix page fault during prefault"); return -EIO; } } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_map_page); long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range) { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; u64 direct_bits; u64 end; int r; if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa))) return -EINVAL; /* * reload is efficient when called repeatedly, so we can do it on * every iteration. */ r = kvm_mmu_reload(vcpu); if (r) return r; direct_bits = 0; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; else direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level); if (r < 0) return r; /* * If the mapping that covers range->gpa can use a huge page, it * may start below it or end after range->gpa + range->size. */ end = (range->gpa & KVM_HPAGE_MASK(level)) + KVM_HPAGE_SIZE(level); return min(range->size, end - range->gpa); } static void nonpaging_init_context(struct kvm_mmu *context) { context->page_fault = nonpaging_page_fault; context->gva_to_gpa = nonpaging_gva_to_gpa; context->sync_spte = NULL; } static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd, union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; if (!VALID_PAGE(root->hpa)) return false; if (!role.direct && pgd != root->pgd) return false; sp = root_to_sp(root->hpa); if (WARN_ON_ONCE(!sp)) return false; return role.word == sp->role.word; } /* * Find out if a previously cached root matching the new pgd/role is available, * and insert the current root as the MRU in the cache. * If a matching root is found, it is assigned to kvm_mmu->root and * true is returned. * If no match is found, kvm_mmu->root is left invalid, the LRU root is * evicted to make room for the current root, and false is returned. */ static bool cached_root_find_and_keep_current(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { uint i; if (is_root_usable(&mmu->root, new_pgd, new_role)) return true; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { /* * The swaps end up rotating the cache like this: * C 0 1 2 3 (on entry to the function) * 0 C 1 2 3 * 1 C 0 2 3 * 2 C 0 1 3 * 3 C 0 1 2 (on exit from the loop) */ swap(mmu->root, mmu->prev_roots[i]); if (is_root_usable(&mmu->root, new_pgd, new_role)) return true; } kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT); return false; } /* * Find out if a previously cached root matching the new pgd/role is available. * On entry, mmu->root is invalid. * If a matching root is found, it is assigned to kvm_mmu->root, the LRU entry * of the cache becomes invalid, and true is returned. * If no match is found, kvm_mmu->root is left invalid and false is returned. */ static bool cached_root_find_without_current(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { uint i; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if (is_root_usable(&mmu->prev_roots[i], new_pgd, new_role)) goto hit; return false; hit: swap(mmu->root, mmu->prev_roots[i]); /* Bubble up the remaining roots. */ for (; i < KVM_MMU_NUM_PREV_ROOTS - 1; i++) mmu->prev_roots[i] = mmu->prev_roots[i + 1]; mmu->prev_roots[i].hpa = INVALID_PAGE; return true; } static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { /* * Limit reuse to 64-bit hosts+VMs without "special" roots in order to * avoid having to deal with PDPTEs and other complexities. */ if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa)) kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT); if (VALID_PAGE(mmu->root.hpa)) return cached_root_find_and_keep_current(kvm, mmu, new_pgd, new_role); else return cached_root_find_without_current(kvm, mmu, new_pgd, new_role); } void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) { struct kvm_mmu *mmu = vcpu->arch.mmu; union kvm_mmu_page_role new_role = mmu->root_role; /* * Return immediately if no usable root was found, kvm_mmu_reload() * will establish a valid root prior to the next VM-Enter. */ if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) return; /* * It's possible that the cached previous root page is obsolete because * of a change in the MMU generation number. However, changing the * generation number is accompanied by KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, * which will free the root set here and allocate a new one. */ kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); if (force_flush_and_sync_on_reuse) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } /* * The last MMIO access's GVA and GPA are cached in the VCPU. When * switching to a new CR3, that GVA->GPA mapping may no longer be * valid. So clear any cached MMIO info even when we don't need to sync * the shadow page tables. */ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); /* * If this is a direct root page, it doesn't have a write flooding * count. Otherwise, clear the write flooding count. */ if (!new_role.direct) { struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa); if (!WARN_ON_ONCE(!sp)) __clear_sp_write_flooding_count(sp); } } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_new_pgd); static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access) { if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { mmu_spte_clear_no_track(sptep); return true; } mark_mmio_spte(vcpu, sptep, gfn, access); return true; } return false; } #define PTTYPE_EPT 18 /* arbitrary */ #define PTTYPE PTTYPE_EPT #include "paging_tmpl.h" #undef PTTYPE #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE #define PTTYPE 32 #include "paging_tmpl.h" #undef PTTYPE static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, int level, bool nx, bool gbpages, bool pse, bool amd) { u64 gbpages_bit_rsvd = 0; u64 nonleaf_bit8_rsvd = 0; u64 high_bits_rsvd; rsvd_check->bad_mt_xwr = 0; if (!gbpages) gbpages_bit_rsvd = rsvd_bits(7, 7); if (level == PT32E_ROOT_LEVEL) high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 62); else high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51); /* Note, NX doesn't exist in PDPTEs, this is handled below. */ if (!nx) high_bits_rsvd |= rsvd_bits(63, 63); /* * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for * leaf entries) on AMD CPUs only. */ if (amd) nonleaf_bit8_rsvd = rsvd_bits(8, 8); switch (level) { case PT32_ROOT_LEVEL: /* no rsvd bits for 2 level 4K page table entries */ rsvd_check->rsvd_bits_mask[0][1] = 0; rsvd_check->rsvd_bits_mask[0][0] = 0; rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; if (!pse) { rsvd_check->rsvd_bits_mask[1][1] = 0; break; } if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); break; case PT32E_ROOT_LEVEL: rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(63, 63) | high_bits_rsvd | rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */ rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; /* PDE */ rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; /* PTE */ rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(13, 20); /* large page */ rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; break; case PT64_ROOT_5LEVEL: rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7); rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; fallthrough; case PT64_ROOT_4LEVEL: rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7); rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | gbpages_bit_rsvd; rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | gbpages_bit_rsvd | rsvd_bits(13, 29); rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(13, 20); /* large page */ rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; break; } } static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { __reset_rsvds_bits_mask(&context->guest_rsvd_check, vcpu->arch.reserved_gpa_bits, context->cpu_role.base.level, is_efer_nx(context), guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES), is_cr4_pse(context), guest_cpuid_is_amd_compatible(vcpu)); } static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, u64 pa_bits_rsvd, bool execonly, int huge_page_level) { u64 high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51); u64 large_1g_rsvd = 0, large_2m_rsvd = 0; u64 bad_mt_xwr; if (huge_page_level < PG_LEVEL_1G) large_1g_rsvd = rsvd_bits(7, 7); if (huge_page_level < PG_LEVEL_2M) large_2m_rsvd = rsvd_bits(7, 7); rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6) | large_1g_rsvd; rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6) | large_2m_rsvd; rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; /* large page */ rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29) | large_1g_rsvd; rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20) | large_2m_rsvd; rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */ bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */ bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */ bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */ bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */ if (!execonly) { /* bits 0..2 must not be 100 unless VMX capabilities allow it */ bad_mt_xwr |= REPEAT_BYTE(1ull << 4); } rsvd_check->bad_mt_xwr = bad_mt_xwr; } static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly, int huge_page_level) { __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check, vcpu->arch.reserved_gpa_bits, execonly, huge_page_level); } static inline u64 reserved_hpa_bits(void) { return rsvd_bits(kvm_host.maxphyaddr, 63); } /* * the page table on host is the shadow page table for the page * table in guest or amd nested guest, its mmu features completely * follow the features in guest. */ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { /* @amd adds a check on bit of SPTEs, which KVM shouldn't use anyways. */ bool is_amd = true; /* KVM doesn't use 2-level page tables for the shadow MMU. */ bool is_pse = false; struct rsvd_bits_validate *shadow_zero_check; int i; WARN_ON_ONCE(context->root_role.level < PT32E_ROOT_LEVEL); shadow_zero_check = &context->shadow_zero_check; __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), context->root_role.level, context->root_role.efer_nx, guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES), is_pse, is_amd); if (!shadow_me_mask) return; for (i = context->root_role.level; --i >= 0;) { /* * So far shadow_me_value is a constant during KVM's life * time. Bits in shadow_me_value are allowed to be set. * Bits in shadow_me_mask but not in shadow_me_value are * not allowed to be set. */ shadow_zero_check->rsvd_bits_mask[0][i] |= shadow_me_mask; shadow_zero_check->rsvd_bits_mask[1][i] |= shadow_me_mask; shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_value; shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_value; } } static inline bool boot_cpu_is_amd(void) { WARN_ON_ONCE(!tdp_enabled); return shadow_x_mask == 0; } /* * the direct page table on host, use as much mmu features as * possible, however, kvm currently does not do execution-protection. */ static void reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) { struct rsvd_bits_validate *shadow_zero_check; int i; shadow_zero_check = &context->shadow_zero_check; if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), context->root_role.level, true, boot_cpu_has(X86_FEATURE_GBPAGES), false, true); else __reset_rsvds_bits_mask_ept(shadow_zero_check, reserved_hpa_bits(), false, max_huge_page_level); if (!shadow_me_mask) return; for (i = context->root_role.level; --i >= 0;) { shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; } } /* * as the comments in reset_shadow_zero_bits_mask() except it * is the shadow page table for intel nested guest. */ static void reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly) { __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, reserved_hpa_bits(), execonly, max_huge_page_level); } #define BYTE_MASK(access) \ ((1 & (access) ? 2 : 0) | \ (2 & (access) ? 4 : 0) | \ (3 & (access) ? 8 : 0) | \ (4 & (access) ? 16 : 0) | \ (5 & (access) ? 32 : 0) | \ (6 & (access) ? 64 : 0) | \ (7 & (access) ? 128 : 0)) static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept) { unsigned byte; const u8 x = BYTE_MASK(ACC_EXEC_MASK); const u8 w = BYTE_MASK(ACC_WRITE_MASK); const u8 u = BYTE_MASK(ACC_USER_MASK); bool cr4_smep = is_cr4_smep(mmu); bool cr4_smap = is_cr4_smap(mmu); bool cr0_wp = is_cr0_wp(mmu); bool efer_nx = is_efer_nx(mmu); for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { unsigned pfec = byte << 1; /* * Each "*f" variable has a 1 bit for each UWX value * that causes a fault with the given PFEC. */ /* Faults from writes to non-writable pages */ u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0; /* Faults from user mode accesses to supervisor pages */ u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0; /* Faults from fetches of non-executable pages*/ u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0; /* Faults from kernel mode fetches of user pages */ u8 smepf = 0; /* Faults from kernel mode accesses of user pages */ u8 smapf = 0; if (!ept) { /* Faults from kernel mode accesses to user pages */ u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u; /* Not really needed: !nx will cause pte.nx to fault */ if (!efer_nx) ff = 0; /* Allow supervisor writes if !cr0.wp */ if (!cr0_wp) wf = (pfec & PFERR_USER_MASK) ? wf : 0; /* Disallow supervisor fetches of user code if cr4.smep */ if (cr4_smep) smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0; /* * SMAP:kernel-mode data accesses from user-mode * mappings should fault. A fault is considered * as a SMAP violation if all of the following * conditions are true: * - X86_CR4_SMAP is set in CR4 * - A user page is accessed * - The access is not a fetch * - The access is supervisor mode * - If implicit supervisor access or X86_EFLAGS_AC is clear * * Here, we cover the first four conditions. * The fifth is computed dynamically in permission_fault(); * PFERR_RSVD_MASK bit will be set in PFEC if the access is * *not* subject to SMAP restrictions. */ if (cr4_smap) smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf; } mmu->permissions[byte] = ff | uf | wf | smepf | smapf; } } /* * PKU is an additional mechanism by which the paging controls access to * user-mode addresses based on the value in the PKRU register. Protection * key violations are reported through a bit in the page fault error code. * Unlike other bits of the error code, the PK bit is not known at the * call site of e.g. gva_to_gpa; it must be computed directly in * permission_fault based on two bits of PKRU, on some machine state (CR4, * CR0, EFER, CPL), and on other bits of the error code and the page tables. * * In particular the following conditions come from the error code, the * page tables and the machine state: * - PK is always zero unless CR4.PKE=1 and EFER.LMA=1 * - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch) * - PK is always zero if U=0 in the page tables * - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access. * * The PKRU bitmask caches the result of these four conditions. The error * code (minus the P bit) and the page table's U bit form an index into the * PKRU bitmask. Two bits of the PKRU bitmask are then extracted and ANDed * with the two bits of the PKRU register corresponding to the protection key. * For the first three conditions above the bits will be 00, thus masking * away both AD and WD. For all reads or if the last condition holds, WD * only will be masked away. */ static void update_pkru_bitmask(struct kvm_mmu *mmu) { unsigned bit; bool wp; mmu->pkru_mask = 0; if (!is_cr4_pke(mmu)) return; wp = is_cr0_wp(mmu); for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) { unsigned pfec, pkey_bits; bool check_pkey, check_write, ff, uf, wf, pte_user; pfec = bit << 1; ff = pfec & PFERR_FETCH_MASK; uf = pfec & PFERR_USER_MASK; wf = pfec & PFERR_WRITE_MASK; /* PFEC.RSVD is replaced by ACC_USER_MASK. */ pte_user = pfec & PFERR_RSVD_MASK; /* * Only need to check the access which is not an * instruction fetch and is to a user page. */ check_pkey = (!ff && pte_user); /* * write access is controlled by PKRU if it is a * user access or CR0.WP = 1. */ check_write = check_pkey && wf && (uf || wp); /* PKRU.AD stops both read and write access. */ pkey_bits = !!check_pkey; /* PKRU.WD stops write access. */ pkey_bits |= (!!check_write) << 1; mmu->pkru_mask |= (pkey_bits & 3) << pfec; } } static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { if (!is_cr0_pg(mmu)) return; reset_guest_rsvds_bits_mask(vcpu, mmu); update_permission_bitmask(mmu, false); update_pkru_bitmask(mmu); } static void paging64_init_context(struct kvm_mmu *context) { context->page_fault = paging64_page_fault; context->gva_to_gpa = paging64_gva_to_gpa; context->sync_spte = paging64_sync_spte; } static void paging32_init_context(struct kvm_mmu *context) { context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; context->sync_spte = paging32_sync_spte; } static union kvm_cpu_role kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs) { union kvm_cpu_role role = {0}; role.base.access = ACC_ALL; role.base.smm = is_smm(vcpu); role.base.guest_mode = is_guest_mode(vcpu); role.ext.valid = 1; if (!____is_cr0_pg(regs)) { role.base.direct = 1; return role; } role.base.efer_nx = ____is_efer_nx(regs); role.base.cr0_wp = ____is_cr0_wp(regs); role.base.smep_andnot_wp = ____is_cr4_smep(regs) && !____is_cr0_wp(regs); role.base.smap_andnot_wp = ____is_cr4_smap(regs) && !____is_cr0_wp(regs); role.base.has_4_byte_gpte = !____is_cr4_pae(regs); if (____is_efer_lma(regs)) role.base.level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; else if (____is_cr4_pae(regs)) role.base.level = PT32E_ROOT_LEVEL; else role.base.level = PT32_ROOT_LEVEL; role.ext.cr4_smep = ____is_cr4_smep(regs); role.ext.cr4_smap = ____is_cr4_smap(regs); role.ext.cr4_pse = ____is_cr4_pse(regs); /* PKEY and LA57 are active iff long mode is active. */ role.ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs); role.ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs); role.ext.efer_lma = ____is_efer_lma(regs); return role; } void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { const bool cr0_wp = kvm_is_cr0_bit_set(vcpu, X86_CR0_WP); BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP); BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS)); if (is_cr0_wp(mmu) == cr0_wp) return; mmu->cpu_role.base.cr0_wp = cr0_wp; reset_guest_paging_metadata(vcpu, mmu); } static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) { int maxpa; if (vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM) maxpa = cpuid_query_maxguestphyaddr(vcpu); else maxpa = cpuid_maxphyaddr(vcpu); /* tdp_root_level is architecture forced level, use it if nonzero */ if (tdp_root_level) return tdp_root_level; /* Use 5-level TDP if and only if it's useful/necessary. */ if (max_tdp_level == 5 && maxpa <= 48) return 4; return max_tdp_level; } u8 kvm_mmu_get_max_tdp_level(void) { return tdp_root_level ? tdp_root_level : max_tdp_level; } static union kvm_mmu_page_role kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { union kvm_mmu_page_role role = {0}; role.access = ACC_ALL; role.cr0_wp = true; role.efer_nx = true; role.smm = cpu_role.base.smm; role.guest_mode = cpu_role.base.guest_mode; role.ad_disabled = !kvm_ad_enabled; role.level = kvm_mmu_get_tdp_level(vcpu); role.direct = true; role.has_4_byte_gpte = false; return role; } static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; union kvm_mmu_page_role root_role = kvm_calc_tdp_mmu_root_page_role(vcpu, cpu_role); if (cpu_role.as_u64 == context->cpu_role.as_u64 && root_role.word == context->root_role.word) return; context->cpu_role.as_u64 = cpu_role.as_u64; context->root_role.word = root_role.word; context->page_fault = kvm_tdp_page_fault; context->sync_spte = NULL; context->get_guest_pgd = get_guest_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; if (!is_cr0_pg(context)) context->gva_to_gpa = nonpaging_gva_to_gpa; else if (is_cr4_pae(context)) context->gva_to_gpa = paging64_gva_to_gpa; else context->gva_to_gpa = paging32_gva_to_gpa; reset_guest_paging_metadata(vcpu, context); reset_tdp_shadow_zero_bits_mask(context); } static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context, union kvm_cpu_role cpu_role, union kvm_mmu_page_role root_role) { if (cpu_role.as_u64 == context->cpu_role.as_u64 && root_role.word == context->root_role.word) return; context->cpu_role.as_u64 = cpu_role.as_u64; context->root_role.word = root_role.word; if (!is_cr0_pg(context)) nonpaging_init_context(context); else if (is_cr4_pae(context)) paging64_init_context(context); else paging32_init_context(context); reset_guest_paging_metadata(vcpu, context); reset_shadow_zero_bits_mask(vcpu, context); } static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; union kvm_mmu_page_role root_role; root_role = cpu_role.base; /* KVM uses PAE paging whenever the guest isn't using 64-bit paging. */ root_role.level = max_t(u32, root_role.level, PT32E_ROOT_LEVEL); /* * KVM forces EFER.NX=1 when TDP is disabled, reflect it in the MMU role. * KVM uses NX when TDP is disabled to handle a variety of scenarios, * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0. * The iTLB multi-hit workaround can be toggled at any time, so assume * NX can be used by any non-nested shadow MMU to avoid having to reset * MMU contexts. */ root_role.efer_nx = true; shadow_mmu_init_context(vcpu, context, cpu_role, root_role); } void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, unsigned long cr4, u64 efer, gpa_t nested_cr3) { struct kvm_mmu *context = &vcpu->arch.guest_mmu; struct kvm_mmu_role_regs regs = { .cr0 = cr0, .cr4 = cr4 & ~X86_CR4_PKE, .efer = efer, }; union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, &regs); union kvm_mmu_page_role root_role; /* NPT requires CR0.PG=1. */ WARN_ON_ONCE(cpu_role.base.direct || !cpu_role.base.guest_mode); root_role = cpu_role.base; root_role.level = kvm_mmu_get_tdp_level(vcpu); if (root_role.level == PT64_ROOT_5LEVEL && cpu_role.base.level == PT64_ROOT_4LEVEL) root_role.passthrough = 1; shadow_mmu_init_context(vcpu, context, cpu_role, root_role); kvm_mmu_new_pgd(vcpu, nested_cr3); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_npt_mmu); static union kvm_cpu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly, u8 level) { union kvm_cpu_role role = {0}; /* * KVM does not support SMM transfer monitors, and consequently does not * support the "entry to SMM" control either. role.base.smm is always 0. */ WARN_ON_ONCE(is_smm(vcpu)); role.base.level = level; role.base.has_4_byte_gpte = false; role.base.direct = false; role.base.ad_disabled = !accessed_dirty; role.base.guest_mode = true; role.base.access = ACC_ALL; role.ext.word = 0; role.ext.execonly = execonly; role.ext.valid = 1; return role; } void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, int huge_page_level, bool accessed_dirty, gpa_t new_eptp) { struct kvm_mmu *context = &vcpu->arch.guest_mmu; u8 level = vmx_eptp_page_walk_level(new_eptp); union kvm_cpu_role new_mode = kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty, execonly, level); if (new_mode.as_u64 != context->cpu_role.as_u64) { /* EPT, and thus nested EPT, does not consume CR0, CR4, nor EFER. */ context->cpu_role.as_u64 = new_mode.as_u64; context->root_role.word = new_mode.base.word; context->page_fault = ept_page_fault; context->gva_to_gpa = ept_gva_to_gpa; context->sync_spte = ept_sync_spte; update_permission_bitmask(context, true); context->pkru_mask = 0; reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level); reset_ept_shadow_zero_bits_mask(context, execonly); } kvm_mmu_new_pgd(vcpu, new_eptp); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_ept_mmu); static void init_kvm_softmmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; kvm_init_shadow_mmu(vcpu, cpu_role); context->get_guest_pgd = get_guest_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; } static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, union kvm_cpu_role new_mode) { struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; if (new_mode.as_u64 == g_context->cpu_role.as_u64) return; g_context->cpu_role.as_u64 = new_mode.as_u64; g_context->get_guest_pgd = get_guest_cr3; g_context->get_pdptr = kvm_pdptr_read; g_context->inject_page_fault = kvm_inject_page_fault; /* * L2 page tables are never shadowed, so there is no need to sync * SPTEs. */ g_context->sync_spte = NULL; /* * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using * L1's nested page tables (e.g. EPT12). The nested translation * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using * L2's page tables as the first level of translation and L1's * nested page tables as the second level of translation. Basically * the gva_to_gpa functions between mmu and nested_mmu are swapped. */ if (!is_paging(vcpu)) g_context->gva_to_gpa = nonpaging_gva_to_gpa; else if (is_long_mode(vcpu)) g_context->gva_to_gpa = paging64_gva_to_gpa; else if (is_pae(vcpu)) g_context->gva_to_gpa = paging64_gva_to_gpa; else g_context->gva_to_gpa = paging32_gva_to_gpa; reset_guest_paging_metadata(vcpu, g_context); } void kvm_init_mmu(struct kvm_vcpu *vcpu) { struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu); union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, &regs); if (mmu_is_nested(vcpu)) init_kvm_nested_mmu(vcpu, cpu_role); else if (tdp_enabled) init_kvm_tdp_mmu(vcpu, cpu_role); else init_kvm_softmmu(vcpu, cpu_role); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_mmu); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) { /* * Invalidate all MMU roles to force them to reinitialize as CPUID * information is factored into reserved bit calculations. * * Correctly handling multiple vCPU models with respect to paging and * physical address properties) in a single VM would require tracking * all relevant CPUID information in kvm_mmu_page_role. That is very * undesirable as it would increase the memory requirements for * gfn_write_track (see struct kvm_mmu_page_role comments). For now * that problem is swept under the rug; KVM's CPUID API is horrific and * it's all but impossible to solve it without introducing a new API. */ vcpu->arch.root_mmu.root_role.invalid = 1; vcpu->arch.guest_mmu.root_role.invalid = 1; vcpu->arch.nested_mmu.root_role.invalid = 1; vcpu->arch.root_mmu.cpu_role.ext.valid = 0; vcpu->arch.guest_mmu.cpu_role.ext.valid = 0; vcpu->arch.nested_mmu.cpu_role.ext.valid = 0; kvm_mmu_reset_context(vcpu); /* * Changing guest CPUID after KVM_RUN is forbidden, see the comment in * kvm_arch_vcpu_ioctl(). */ KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm); } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); kvm_init_mmu(vcpu); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_reset_context); int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->root_role.direct); if (r) goto out; r = mmu_alloc_special_roots(vcpu); if (r) goto out; if (vcpu->arch.mmu->root_role.direct) r = mmu_alloc_direct_roots(vcpu); else r = mmu_alloc_shadow_roots(vcpu); if (r) goto out; kvm_mmu_sync_roots(vcpu); kvm_mmu_load_pgd(vcpu); /* * Flush any TLB entries for the new root, the provenance of the root * is unknown. Even if KVM ensures there are no stale TLB entries * for a freed root, in theory another hypervisor could have left * stale entries. Flushing on alloc also allows KVM to skip the TLB * flush when freeing a root (see kvm_tdp_mmu_put_root()). */ kvm_x86_call(flush_tlb_current)(vcpu); out: return r; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_load); void kvm_mmu_unload(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL); WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa)); kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa)); vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); } static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa) { struct kvm_mmu_page *sp; if (!VALID_PAGE(root_hpa)) return false; /* * When freeing obsolete roots, treat roots as obsolete if they don't * have an associated shadow page, as it's impossible to determine if * such roots are fresh or stale. This does mean KVM will get false * positives and free roots that don't strictly need to be freed, but * such false positives are relatively rare: * * (a) only PAE paging and nested NPT have roots without shadow pages * (or any shadow paging flavor with a dummy root, see note below) * (b) remote reloads due to a memslot update obsoletes _all_ roots * (c) KVM doesn't track previous roots for PAE paging, and the guest * is unlikely to zap an in-use PGD. * * Note! Dummy roots are unique in that they are obsoleted by memslot * _creation_! See also FNAME(fetch). */ sp = root_to_sp(root_hpa); return !sp || is_obsolete_sp(kvm, sp); } static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu) { unsigned long roots_to_free = 0; int i; if (is_obsolete_root(kvm, mmu->root.hpa)) roots_to_free |= KVM_MMU_ROOT_CURRENT; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (is_obsolete_root(kvm, mmu->prev_roots[i].hpa)) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); } if (roots_to_free) kvm_mmu_free_roots(kvm, mmu, roots_to_free); } void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) { __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu); __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_obsolete_roots); static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) { u64 gentry = 0; int r; /* * Assume that the pte write on a page table of the same type * as the current vcpu paging mode since we update the sptes only * when they have the same mode. */ if (is_pae(vcpu) && *bytes == 4) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; } if (*bytes == 4 || *bytes == 8) { r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); if (r) gentry = 0; } return gentry; } /* * If we're seeing too many writes to a page, it may no longer be a page table, * or we may be forking, in which case it is better to unmap the page. */ static bool detect_write_flooding(struct kvm_mmu_page *sp) { /* * Skip write-flooding detected for the sp whose level is 1, because * it can become unsync, then the guest page is not write-protected. */ if (sp->role.level == PG_LEVEL_4K) return false; atomic_inc(&sp->write_flooding_count); return atomic_read(&sp->write_flooding_count) >= 3; } /* * Misaligned accesses are too much trouble to fix up; also, they usually * indicate a page is not used as a page table. */ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, int bytes) { unsigned offset, pte_size, misaligned; offset = offset_in_page(gpa); pte_size = sp->role.has_4_byte_gpte ? 4 : 8; /* * Sometimes, the OS only writes the last one bytes to update status * bits, for example, in linux, andb instruction is used in clear_bit(). */ if (!(offset & (pte_size - 1)) && bytes == 1) return false; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; return misaligned; } static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) { unsigned page_offset, quadrant; u64 *spte; int level; page_offset = offset_in_page(gpa); level = sp->role.level; *nspte = 1; if (sp->role.has_4_byte_gpte) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map * only 2MB. So we need to double the offset again * and zap two pdes instead of one. */ if (level == PT32_ROOT_LEVEL) { page_offset &= ~7; /* kill rounding error */ page_offset <<= 1; *nspte = 2; } quadrant = page_offset >> PAGE_SHIFT; page_offset &= ~PAGE_MASK; if (quadrant != sp->role.quadrant) return NULL; } spte = &sp->spt[page_offset / sizeof(*spte)]; return spte; } void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool flush = false; /* * When emulating guest writes, ensure the written value is visible to * any task that is handling page faults before checking whether or not * KVM is shadowing a guest PTE. This ensures either KVM will create * the correct SPTE in the page fault handler, or this task will see * a non-zero indirect_shadow_pages. Pairs with the smp_mb() in * account_shadowed(). */ smp_mb(); if (!vcpu->kvm->arch.indirect_shadow_pages) return; write_lock(&vcpu->kvm->mmu_lock); gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); ++vcpu->kvm->stat.mmu_pte_write; for_each_gfn_valid_sp_with_gptes(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } spte = get_written_sptes(sp, gpa, &npte); if (!spte) continue; while (npte--) { entry = *spte; mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); if (gentry && sp->role.level != PG_LEVEL_4K) ++vcpu->kvm->stat.mmu_pde_zapped; if (is_shadow_present_pte(entry)) flush = true; ++spte; } } kvm_mmu_remote_flush_or_zap(vcpu->kvm, &invalid_list, flush); write_unlock(&vcpu->kvm->mmu_lock); } static bool is_write_to_guest_page_table(u64 error_code) { const u64 mask = PFERR_GUEST_PAGE_MASK | PFERR_WRITE_MASK | PFERR_PRESENT_MASK; return (error_code & mask) == mask; } static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, int *emulation_type) { bool direct = vcpu->arch.mmu->root_role.direct; /* * Do not try to unprotect and retry if the vCPU re-faulted on the same * RIP with the same address that was previously unprotected, as doing * so will likely put the vCPU into an infinite. E.g. if the vCPU uses * a non-page-table modifying instruction on the PDE that points to the * instruction, then unprotecting the gfn will unmap the instruction's * code, i.e. make it impossible for the instruction to ever complete. */ if (vcpu->arch.last_retry_eip == kvm_rip_read(vcpu) && vcpu->arch.last_retry_addr == cr2_or_gpa) return RET_PF_EMULATE; /* * Reset the unprotect+retry values that guard against infinite loops. * The values will be refreshed if KVM explicitly unprotects a gfn and * retries, in all other cases it's safe to retry in the future even if * the next page fault happens on the same RIP+address. */ vcpu->arch.last_retry_eip = 0; vcpu->arch.last_retry_addr = 0; /* * It should be impossible to reach this point with an MMIO cache hit, * as RET_PF_WRITE_PROTECTED is returned if and only if there's a valid, * writable memslot, and creating a memslot should invalidate the MMIO * cache by way of changing the memslot generation. WARN and disallow * retry if MMIO is detected, as retrying MMIO emulation is pointless * and could put the vCPU into an infinite loop because the processor * will keep faulting on the non-existent MMIO address. */ if (WARN_ON_ONCE(mmio_info_in_cache(vcpu, cr2_or_gpa, direct))) return RET_PF_EMULATE; /* * Before emulating the instruction, check to see if the access was due * to a read-only violation while the CPU was walking non-nested NPT * page tables, i.e. for a direct MMU, for _guest_ page tables in L1. * If L1 is sharing (a subset of) its page tables with L2, e.g. by * having nCR3 share lower level page tables with hCR3, then when KVM * (L0) write-protects the nested NPTs, i.e. npt12 entries, KVM is also * unknowingly write-protecting L1's guest page tables, which KVM isn't * shadowing. * * Because the CPU (by default) walks NPT page tables using a write * access (to ensure the CPU can do A/D updates), page walks in L1 can * trigger write faults for the above case even when L1 isn't modifying * PTEs. As a result, KVM will unnecessarily emulate (or at least, try * to emulate) an excessive number of L1 instructions; because L1's MMU * isn't shadowed by KVM, there is no need to write-protect L1's gPTEs * and thus no need to emulate in order to guarantee forward progress. * * Try to unprotect the gfn, i.e. zap any shadow pages, so that L1 can * proceed without triggering emulation. If one or more shadow pages * was zapped, skip emulation and resume L1 to let it natively execute * the instruction. If no shadow pages were zapped, then the write- * fault is due to something else entirely, i.e. KVM needs to emulate, * as resuming the guest will put it into an infinite loop. * * Note, this code also applies to Intel CPUs, even though it is *very* * unlikely that an L1 will share its page tables (IA32/PAE/paging64 * format) with L2's page tables (EPT format). * * For indirect MMUs, i.e. if KVM is shadowing the current MMU, try to * unprotect the gfn and retry if an event is awaiting reinjection. If * KVM emulates multiple instructions before completing event injection, * the event could be delayed beyond what is architecturally allowed, * e.g. KVM could inject an IRQ after the TPR has been raised. */ if (((direct && is_write_to_guest_page_table(error_code)) || (!direct && kvm_event_needs_reinjection(vcpu))) && kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa)) return RET_PF_RETRY; /* * The gfn is write-protected, but if KVM detects its emulating an * instruction that is unlikely to be used to modify page tables, or if * emulation fails, KVM can try to unprotect the gfn and let the CPU * re-execute the instruction that caused the page fault. Do not allow * retrying an instruction from a nested guest as KVM is only explicitly * shadowing L1's page tables, i.e. unprotecting something for L1 isn't * going to magically fix whatever issue caused L2 to fail. */ if (!is_guest_mode(vcpu)) *emulation_type |= EMULTYPE_ALLOW_RETRY_PF; return RET_PF_EMULATE; } int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = EMULTYPE_PF; bool direct = vcpu->arch.mmu->root_role.direct; if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; /* * Except for reserved faults (emulated MMIO is shared-only), set the * PFERR_PRIVATE_ACCESS flag for software-protected VMs based on the gfn's * current attributes, which are the source of truth for such VMs. Note, * this wrong for nested MMUs as the GPA is an L2 GPA, but KVM doesn't * currently supported nested virtualization (among many other things) * for software-protected VMs. */ if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && !(error_code & PFERR_RSVD_MASK) && vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(cr2_or_gpa))) error_code |= PFERR_PRIVATE_ACCESS; r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { if (WARN_ON_ONCE(error_code & PFERR_PRIVATE_ACCESS)) return -EFAULT; r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { vcpu->stat.pf_taken++; r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false, &emulation_type, NULL); if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm)) return -EIO; } if (r < 0) return r; if (r == RET_PF_WRITE_PROTECTED) r = kvm_mmu_write_protect_fault(vcpu, cr2_or_gpa, error_code, &emulation_type); if (r == RET_PF_FIXED) vcpu->stat.pf_fixed++; else if (r == RET_PF_EMULATE) vcpu->stat.pf_emulate++; else if (r == RET_PF_SPURIOUS) vcpu->stat.pf_spurious++; /* * None of handle_mmio_page_fault(), kvm_mmu_do_page_fault(), or * kvm_mmu_write_protect_fault() return RET_PF_CONTINUE. * kvm_mmu_do_page_fault() only uses RET_PF_CONTINUE internally to * indicate continuing the page fault handling until to the final * page table mapping phase. */ WARN_ON_ONCE(r == RET_PF_CONTINUE); if (r != RET_PF_EMULATE) return r; emulate: return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_page_fault); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; int root_level, leaf, level; leaf = get_sptes_lockless(vcpu, gpa, sptes, &root_level); if (unlikely(leaf < 0)) return; pr_err("%s %llx", msg, gpa); for (level = root_level; level >= leaf; level--) pr_cont(", spte[%d] = 0x%llx", level, sptes[level]); pr_cont("\n"); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_print_sptes); static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, hpa_t root_hpa) { struct kvm_shadow_walk_iterator iterator; vcpu_clear_mmio_info(vcpu, addr); /* * Walking and synchronizing SPTEs both assume they are operating in * the context of the current MMU, and would need to be reworked if * this is ever used to sync the guest_mmu, e.g. to emulate INVEPT. */ if (WARN_ON_ONCE(mmu != vcpu->arch.mmu)) return; if (!VALID_PAGE(root_hpa)) return; write_lock(&vcpu->kvm->mmu_lock); for_each_shadow_entry_using_root(vcpu, root_hpa, addr, iterator) { struct kvm_mmu_page *sp = sptep_to_sp(iterator.sptep); if (sp->unsync) { int ret = kvm_sync_spte(vcpu, sp, iterator.index); if (ret < 0) mmu_page_zap_pte(vcpu->kvm, sp, iterator.sptep, NULL); if (ret) kvm_flush_remote_tlbs_sptep(vcpu->kvm, iterator.sptep); } if (!sp->unsync_children) break; } write_unlock(&vcpu->kvm->mmu_lock); } void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, unsigned long roots) { int i; WARN_ON_ONCE(roots & ~KVM_MMU_ROOTS_ALL); /* It's actually a GPA for vcpu->arch.guest_mmu. */ if (mmu != &vcpu->arch.guest_mmu) { /* INVLPG on a non-canonical address is a NOP according to the SDM. */ if (is_noncanonical_invlpg_address(addr, vcpu)) return; kvm_x86_call(flush_tlb_gva)(vcpu, addr); } if (!mmu->sync_spte) return; if (roots & KVM_MMU_ROOT_CURRENT) __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->root.hpa); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (roots & KVM_MMU_ROOT_PREVIOUS(i)) __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->prev_roots[i].hpa); } } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_invalidate_addr); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { /* * INVLPG is required to invalidate any global mappings for the VA, * irrespective of PCID. Blindly sync all roots as it would take * roughly the same amount of work/time to determine whether any of the * previous roots have a global mapping. * * Mappings not reachable via the current or previous cached roots will * be synced when switching to that new cr3, so nothing needs to be * done here for them. */ kvm_mmu_invalidate_addr(vcpu, vcpu->arch.walk_mmu, gva, KVM_MMU_ROOTS_ALL); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_invlpg); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) { struct kvm_mmu *mmu = vcpu->arch.mmu; unsigned long roots = 0; uint i; if (pcid == kvm_get_active_pcid(vcpu)) roots |= KVM_MMU_ROOT_CURRENT; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) roots |= KVM_MMU_ROOT_PREVIOUS(i); } if (roots) kvm_mmu_invalidate_addr(vcpu, mmu, gva, roots); ++vcpu->stat.invlpg; /* * Mappings not reachable via the current cr3 or the prev_roots will be * synced when switching to that cr3, so nothing needs to be done here * for them. */ } void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level) { tdp_enabled = enable_tdp; tdp_root_level = tdp_forced_root_level; max_tdp_level = tdp_max_root_level; #ifdef CONFIG_X86_64 tdp_mmu_enabled = tdp_mmu_allowed && tdp_enabled; #endif /* * max_huge_page_level reflects KVM's MMU capabilities irrespective * of kernel support, e.g. KVM may be capable of using 1GB pages when * the kernel is not. But, KVM never creates a page size greater than * what is used by the kernel for any given HVA, i.e. the kernel's * capabilities are ultimately consulted by kvm_mmu_hugepage_adjust(). */ if (tdp_enabled) max_huge_page_level = tdp_huge_page_level; else if (boot_cpu_has(X86_FEATURE_GBPAGES)) max_huge_page_level = PG_LEVEL_1G; else max_huge_page_level = PG_LEVEL_2M; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_configure_mmu); static void free_mmu_pages(struct kvm_mmu *mmu) { if (!tdp_enabled && mmu->pae_root) set_memory_encrypted((unsigned long)mmu->pae_root, 1); free_page((unsigned long)mmu->pae_root); free_page((unsigned long)mmu->pml4_root); free_page((unsigned long)mmu->pml5_root); } static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { struct page *page; int i; mmu->root.hpa = INVALID_PAGE; mmu->root.pgd = 0; mmu->mirror_root_hpa = INVALID_PAGE; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; /* vcpu->arch.guest_mmu isn't used when !tdp_enabled. */ if (!tdp_enabled && mmu == &vcpu->arch.guest_mmu) return 0; /* * When using PAE paging, the four PDPTEs are treated as 'root' pages, * while the PDP table is a per-vCPU construct that's allocated at MMU * creation. When emulating 32-bit mode, cr3 is only 32 bits even on * x86_64. Therefore we need to allocate the PDP table in the first * 4GB of memory, which happens to fit the DMA32 zone. TDP paging * generally doesn't use PAE paging and can skip allocating the PDP * table. The main exception, handled here, is SVM's 32-bit NPT. The * other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit * KVM; that horror is handled on-demand by mmu_alloc_special_roots(). */ if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL) return 0; page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); if (!page) return -ENOMEM; mmu->pae_root = page_address(page); /* * CR3 is only 32 bits when PAE paging is used, thus it's impossible to * get the CPU to treat the PDPTEs as encrypted. Decrypt the page so * that KVM's writes and the CPU's reads get along. Note, this is * only necessary when using shadow paging, as 64-bit NPT can get at * the C-bit even when shadowing 32-bit NPT, and SME isn't supported * by 32-bit kernels (when KVM itself uses 32-bit NPT). */ if (!tdp_enabled) set_memory_decrypted((unsigned long)mmu->pae_root, 1); else WARN_ON_ONCE(shadow_me_value); for (i = 0; i < 4; ++i) mmu->pae_root[i] = INVALID_PAE_ROOT; return 0; } int kvm_mmu_create(struct kvm_vcpu *vcpu) { int ret; vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache; vcpu->arch.mmu_pte_list_desc_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache; vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu_shadow_page_cache.init_value = SHADOW_NONPRESENT_VALUE; if (!vcpu->arch.mmu_shadow_page_cache.init_value) vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu = &vcpu->arch.root_mmu; vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; ret = __kvm_mmu_create(vcpu, &vcpu->arch.guest_mmu); if (ret) return ret; ret = __kvm_mmu_create(vcpu, &vcpu->arch.root_mmu); if (ret) goto fail_allocate_root; return ret; fail_allocate_root: free_mmu_pages(&vcpu->arch.guest_mmu); return ret; } #define BATCH_ZAP_PAGES 10 static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; LIST_HEAD(invalid_list); bool unstable; lockdep_assert_held(&kvm->slots_lock); restart: list_for_each_entry_safe_reverse(sp, node, &kvm->arch.active_mmu_pages, link) { /* * No obsolete valid page exists before a newly created page * since active_mmu_pages is a FIFO list. */ if (!is_obsolete_sp(kvm, sp)) break; /* * Invalid pages should never land back on the list of active * pages. Skip the bogus page, otherwise we'll get stuck in an * infinite loop if the page gets put back on the list (again). */ if (WARN_ON_ONCE(sp->role.invalid)) continue; /* * No need to flush the TLB since we're only zapping shadow * pages with an obsolete generation number and all vCPUS have * loaded a new root, i.e. the shadow pages being zapped cannot * be in active use by the guest. */ if (batch >= BATCH_ZAP_PAGES && cond_resched_rwlock_write(&kvm->mmu_lock)) { batch = 0; goto restart; } unstable = __kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &nr_zapped); batch += nr_zapped; if (unstable) goto restart; } /* * Kick all vCPUs (via remote TLB flush) before freeing the page tables * to ensure KVM is not in the middle of a lockless shadow page table * walk, which may reference the pages. The remote TLB flush itself is * not required and is simply a convenient way to kick vCPUs as needed. * KVM performs a local TLB flush when allocating a new root (see * kvm_mmu_load()), and the reload in the caller ensure no vCPUs are * running with an obsolete MMU. */ kvm_mmu_commit_zap_page(kvm, &invalid_list); } /* * Fast invalidate all shadow pages and use lock-break technique * to zap obsolete pages. * * It's required when memslot is being deleted or VM is being * destroyed, in these cases, we should ensure that KVM MMU does * not use any resource of the being-deleted slot or all slots * after calling the function. */ static void kvm_mmu_zap_all_fast(struct kvm *kvm) { lockdep_assert_held(&kvm->slots_lock); write_lock(&kvm->mmu_lock); trace_kvm_mmu_zap_all_fast(kvm); /* * Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is * held for the entire duration of zapping obsolete pages, it's * impossible for there to be multiple invalid generations associated * with *valid* shadow pages at any given time, i.e. there is exactly * one valid generation and (at most) one invalid generation. */ kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1; /* * In order to ensure all vCPUs drop their soon-to-be invalid roots, * invalidating TDP MMU roots must be done while holding mmu_lock for * write and in the same critical section as making the reload request, * e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield. */ if (tdp_mmu_enabled) { /* * External page tables don't support fast zapping, therefore * their mirrors must be invalidated separately by the caller. */ kvm_tdp_mmu_invalidate_roots(kvm, KVM_DIRECT_ROOTS); } /* * Notify all vcpus to reload its shadow page table and flush TLB. * Then all vcpus will switch to new shadow page table with the new * mmu_valid_gen. * * Note: we need to do this under the protection of mmu_lock, * otherwise, vcpu would purge shadow page but miss tlb flush. */ kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS); kvm_zap_obsolete_pages(kvm); write_unlock(&kvm->mmu_lock); /* * Zap the invalidated TDP MMU roots, all SPTEs must be dropped before * returning to the caller, e.g. if the zap is in response to a memslot * deletion, mmu_notifier callbacks will be unable to reach the SPTEs * associated with the deleted memslot once the update completes, and * Deferring the zap until the final reference to the root is put would * lead to use-after-free. */ if (tdp_mmu_enabled) kvm_tdp_mmu_zap_invalidated_roots(kvm, true); } int kvm_mmu_init_vm(struct kvm *kvm) { int r, i; kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); for (i = 0; i < KVM_NR_MMU_TYPES; ++i) INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages[i].pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); if (tdp_mmu_enabled) { kvm_mmu_init_tdp_mmu(kvm); } else { r = kvm_mmu_alloc_page_hash(kvm); if (r) return r; } kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; kvm->arch.split_shadow_page_cache.gfp_zero = __GFP_ZERO; kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; return 0; } static void mmu_free_vm_memory_caches(struct kvm *kvm) { kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache); kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache); kvm_mmu_free_memory_cache(&kvm->arch.split_shadow_page_cache); } void kvm_mmu_uninit_vm(struct kvm *kvm) { kvfree(kvm->arch.mmu_page_hash); if (tdp_mmu_enabled) kvm_mmu_uninit_tdp_mmu(kvm); mmu_free_vm_memory_caches(kvm); } static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { const struct kvm_memory_slot *memslot; struct kvm_memslots *slots; struct kvm_memslot_iter iter; bool flush = false; gfn_t start, end; int i; if (!kvm_memslots_have_rmaps(kvm)) return flush; for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { slots = __kvm_memslots(kvm, i); kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) { memslot = iter.slot; start = max(gfn_start, memslot->base_gfn); end = min(gfn_end, memslot->base_gfn + memslot->npages); if (WARN_ON_ONCE(start >= end)) continue; flush = __kvm_rmap_zap_gfn_range(kvm, memslot, start, end, true, flush); } } return flush; } /* * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end * (not including it) */ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { bool flush; if (WARN_ON_ONCE(gfn_end <= gfn_start)) return; write_lock(&kvm->mmu_lock); kvm_mmu_invalidate_begin(kvm); kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); if (tdp_mmu_enabled) flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush); if (flush) kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start); kvm_mmu_invalidate_end(kvm); write_unlock(&kvm->mmu_lock); } static bool slot_rmap_write_protect(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { return rmap_write_protect(rmap_head, false); } void kvm_mmu_slot_remove_write_access(struct kvm *kvm, const struct kvm_memory_slot *memslot, int start_level) { if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); walk_slot_rmaps(kvm, memslot, slot_rmap_write_protect, start_level, KVM_MAX_HUGEPAGE_LEVEL, false); write_unlock(&kvm->mmu_lock); } if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); read_unlock(&kvm->mmu_lock); } } static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min) { return kvm_mmu_memory_cache_nr_free_objects(cache) < min; } static bool need_topup_split_caches_or_resched(struct kvm *kvm) { if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) return true; /* * In the worst case, SPLIT_DESC_CACHE_MIN_NR_OBJECTS descriptors are needed * to split a single huge page. Calculating how many are actually needed * is possible but not worth the complexity. */ return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) || need_topup(&kvm->arch.split_page_header_cache, 1) || need_topup(&kvm->arch.split_shadow_page_cache, 1); } static int topup_split_caches(struct kvm *kvm) { /* * Allocating rmap list entries when splitting huge pages for nested * MMUs is uncommon as KVM needs to use a list if and only if there is * more than one rmap entry for a gfn, i.e. requires an L1 gfn to be * aliased by multiple L2 gfns and/or from multiple nested roots with * different roles. Aliasing gfns when using TDP is atypical for VMMs; * a few gfns are often aliased during boot, e.g. when remapping BIOS, * but aliasing rarely occurs post-boot or for many gfns. If there is * only one rmap entry, rmap->val points directly at that one entry and * doesn't need to allocate a list. Buffer the cache by the default * capacity so that KVM doesn't have to drop mmu_lock to topup if KVM * encounters an aliased gfn or two. */ const int capacity = SPLIT_DESC_CACHE_MIN_NR_OBJECTS + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE; int r; lockdep_assert_held(&kvm->slots_lock); r = __kvm_mmu_topup_memory_cache(&kvm->arch.split_desc_cache, capacity, SPLIT_DESC_CACHE_MIN_NR_OBJECTS); if (r) return r; r = kvm_mmu_topup_memory_cache(&kvm->arch.split_page_header_cache, 1); if (r) return r; return kvm_mmu_topup_memory_cache(&kvm->arch.split_shadow_page_cache, 1); } static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep) { struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep); struct shadow_page_caches caches = {}; union kvm_mmu_page_role role; unsigned int access; gfn_t gfn; gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep)); access = kvm_mmu_page_get_access(huge_sp, spte_index(huge_sptep)); /* * Note, huge page splitting always uses direct shadow pages, regardless * of whether the huge page itself is mapped by a direct or indirect * shadow page, since the huge page region itself is being directly * mapped with smaller pages. */ role = kvm_mmu_child_role(huge_sptep, /*direct=*/true, access); /* Direct SPs do not require a shadowed_info_cache. */ caches.page_header_cache = &kvm->arch.split_page_header_cache; caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache; /* Safe to pass NULL for vCPU since requesting a direct SP. */ return __kvm_mmu_get_shadow_page(kvm, NULL, &caches, gfn, role); } static void shadow_mmu_split_huge_page(struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep) { struct kvm_mmu_memory_cache *cache = &kvm->arch.split_desc_cache; u64 huge_spte = READ_ONCE(*huge_sptep); struct kvm_mmu_page *sp; bool flush = false; u64 *sptep, spte; gfn_t gfn; int index; sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep); for (index = 0; index < SPTE_ENT_PER_PAGE; index++) { sptep = &sp->spt[index]; gfn = kvm_mmu_page_get_gfn(sp, index); /* * The SP may already have populated SPTEs, e.g. if this huge * page is aliased by multiple sptes with the same access * permissions. These entries are guaranteed to map the same * gfn-to-pfn translation since the SP is direct, so no need to * modify them. * * However, if a given SPTE points to a lower level page table, * that lower level page table may only be partially populated. * Installing such SPTEs would effectively unmap a potion of the * huge page. Unmapping guest memory always requires a TLB flush * since a subsequent operation on the unmapped regions would * fail to detect the need to flush. */ if (is_shadow_present_pte(*sptep)) { flush |= !is_last_spte(*sptep, sp->role.level); continue; } spte = make_small_spte(kvm, huge_spte, sp->role, index); mmu_spte_set(sptep, spte); __rmap_add(kvm, cache, slot, sptep, gfn, sp->role.access); } __link_shadow_page(kvm, cache, huge_sptep, sp, flush); } static int shadow_mmu_try_split_huge_page(struct kvm *kvm, const struct kvm_memory_slot *slot, u64 *huge_sptep) { struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep); int level, r = 0; gfn_t gfn; u64 spte; /* Grab information for the tracepoint before dropping the MMU lock. */ gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep)); level = huge_sp->role.level; spte = *huge_sptep; if (kvm_mmu_available_pages(kvm) <= KVM_MIN_FREE_MMU_PAGES) { r = -ENOSPC; goto out; } if (need_topup_split_caches_or_resched(kvm)) { write_unlock(&kvm->mmu_lock); cond_resched(); /* * If the topup succeeds, return -EAGAIN to indicate that the * rmap iterator should be restarted because the MMU lock was * dropped. */ r = topup_split_caches(kvm) ?: -EAGAIN; write_lock(&kvm->mmu_lock); goto out; } shadow_mmu_split_huge_page(kvm, slot, huge_sptep); out: trace_kvm_mmu_split_huge_page(gfn, spte, level, r); return r; } static bool shadow_mmu_try_split_huge_pages(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { struct rmap_iterator iter; struct kvm_mmu_page *sp; u64 *huge_sptep; int r; restart: for_each_rmap_spte(rmap_head, &iter, huge_sptep) { sp = sptep_to_sp(huge_sptep); /* TDP MMU is enabled, so rmap only contains nested MMU SPs. */ if (WARN_ON_ONCE(!sp->role.guest_mode)) continue; /* The rmaps should never contain non-leaf SPTEs. */ if (WARN_ON_ONCE(!is_large_pte(*huge_sptep))) continue; /* SPs with level >PG_LEVEL_4K should never by unsync. */ if (WARN_ON_ONCE(sp->unsync)) continue; /* Don't bother splitting huge pages on invalid SPs. */ if (sp->role.invalid) continue; r = shadow_mmu_try_split_huge_page(kvm, slot, huge_sptep); /* * The split succeeded or needs to be retried because the MMU * lock was dropped. Either way, restart the iterator to get it * back into a consistent state. */ if (!r || r == -EAGAIN) goto restart; /* The split failed and shouldn't be retried (e.g. -ENOMEM). */ break; } return false; } static void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, int target_level) { int level; /* * Split huge pages starting with KVM_MAX_HUGEPAGE_LEVEL and working * down to the target level. This ensures pages are recursively split * all the way to the target level. There's no need to split pages * already at the target level. */ for (level = KVM_MAX_HUGEPAGE_LEVEL; level > target_level; level--) __walk_slot_rmaps(kvm, slot, shadow_mmu_try_split_huge_pages, level, level, start, end - 1, true, true, false); } /* Must be called with the mmu_lock held in write-mode. */ void kvm_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, u64 start, u64 end, int target_level) { if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) kvm_shadow_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level); kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, false); /* * A TLB flush is unnecessary at this point for the same reasons as in * kvm_mmu_slot_try_split_huge_pages(). */ } void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, int target_level) { u64 start = memslot->base_gfn; u64 end = start + memslot->npages; if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); kvm_shadow_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level); write_unlock(&kvm->mmu_lock); } read_lock(&kvm->mmu_lock); kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, true); read_unlock(&kvm->mmu_lock); /* * No TLB flush is necessary here. KVM will flush TLBs after * write-protecting and/or clearing dirty on the newly split SPTEs to * ensure that guest writes are reflected in the dirty log before the * ioctl to enable dirty logging on this memslot completes. Since the * split SPTEs retain the write and dirty bits of the huge SPTE, it is * safe for KVM to decide if a TLB flush is necessary based on the split * SPTEs. */ } static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { u64 *sptep; struct rmap_iterator iter; int need_tlb_flush = 0; struct kvm_mmu_page *sp; restart: for_each_rmap_spte(rmap_head, &iter, sptep) { sp = sptep_to_sp(sptep); /* * We cannot do huge page mapping for indirect shadow pages, * which are found on the last rmap (level = 1) when not using * tdp; such shadow pages are synced with the page table in * the guest, and the guest page table is using 4K page size * mapping if the indirect sp has level = 1. */ if (sp->role.direct && sp->role.level < kvm_mmu_max_mapping_level(kvm, NULL, slot, sp->gfn)) { kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); if (kvm_available_flush_remote_tlbs_range()) kvm_flush_remote_tlbs_sptep(kvm, sptep); else need_tlb_flush = 1; goto restart; } } return need_tlb_flush; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_zap_gfn_range); static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot) { /* * Note, use KVM_MAX_HUGEPAGE_LEVEL - 1 since there's no need to zap * pages that are already mapped at the maximum hugepage level. */ if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true)) kvm_flush_remote_tlbs_memslot(kvm, slot); } void kvm_mmu_recover_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *slot) { if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); kvm_rmap_zap_collapsible_sptes(kvm, slot); write_unlock(&kvm->mmu_lock); } if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_recover_huge_pages(kvm, slot); read_unlock(&kvm->mmu_lock); } } void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); /* * Clear dirty bits only on 4k SPTEs since the legacy MMU only * support dirty logging at a 4k granularity. */ walk_slot_rmaps_4k(kvm, memslot, __rmap_clear_dirty, false); write_unlock(&kvm->mmu_lock); } if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); read_unlock(&kvm->mmu_lock); } /* * The caller will flush the TLBs after this function returns. * * It's also safe to flush TLBs out of mmu lock here as currently this * function is only used for dirty logging, in which case flushing TLB * out of mmu lock also guarantees no dirty pages will be lost in * dirty_bitmap. */ } static void kvm_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; LIST_HEAD(invalid_list); int ign; write_lock(&kvm->mmu_lock); restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { if (WARN_ON_ONCE(sp->role.invalid)) continue; if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) goto restart; if (cond_resched_rwlock_write(&kvm->mmu_lock)) goto restart; } kvm_mmu_commit_zap_page(kvm, &invalid_list); if (tdp_mmu_enabled) kvm_tdp_mmu_zap_all(kvm); write_unlock(&kvm->mmu_lock); } void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_mmu_zap_all(kvm); } static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm, struct kvm_memory_slot *slot, bool flush) { LIST_HEAD(invalid_list); unsigned long i; if (list_empty(&kvm->arch.active_mmu_pages)) goto out_flush; /* * Since accounting information is stored in struct kvm_arch_memory_slot, * all MMU pages that are shadowing guest PTEs must be zapped before the * memslot is deleted, as freeing such pages after the memslot is freed * will result in use-after-free, e.g. in unaccount_shadowed(). */ for (i = 0; i < slot->npages; i++) { struct kvm_mmu_page *sp; gfn_t gfn = slot->base_gfn + i; for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); flush = false; cond_resched_rwlock_write(&kvm->mmu_lock); } } out_flush: kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); } static void kvm_mmu_zap_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { struct kvm_gfn_range range = { .slot = slot, .start = slot->base_gfn, .end = slot->base_gfn + slot->npages, .may_block = true, .attr_filter = KVM_FILTER_PRIVATE | KVM_FILTER_SHARED, }; bool flush; write_lock(&kvm->mmu_lock); flush = kvm_unmap_gfn_range(kvm, &range); kvm_mmu_zap_memslot_pages_and_flush(kvm, slot, flush); write_unlock(&kvm->mmu_lock); } static inline bool kvm_memslot_flush_zap_all(struct kvm *kvm) { return kvm->arch.vm_type == KVM_X86_DEFAULT_VM && kvm_check_has_quirk(kvm, KVM_X86_QUIRK_SLOT_ZAP_ALL); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { if (kvm_memslot_flush_zap_all(kvm)) kvm_mmu_zap_all_fast(kvm); else kvm_mmu_zap_memslot(kvm, slot); } void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); gen &= MMIO_SPTE_GEN_MASK; /* * Generation numbers are incremented in multiples of the number of * address spaces in order to provide unique generations across all * address spaces. Strip what is effectively the address space * modifier prior to checking for a wrap of the MMIO generation so * that a wrap in any address space is detected. */ gen &= ~((u64)kvm_arch_nr_memslot_as_ids(kvm) - 1); /* * The very rare case: if the MMIO generation number has wrapped, * zap all shadow pages. */ if (unlikely(gen == 0)) { kvm_debug_ratelimited("zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_zap_all_fast(kvm); } } static void mmu_destroy_caches(void) { kmem_cache_destroy(pte_list_desc_cache); kmem_cache_destroy(mmu_page_header_cache); } static void kvm_wake_nx_recovery_thread(struct kvm *kvm) { /* * The NX recovery thread is spawned on-demand at the first KVM_RUN and * may not be valid even though the VM is globally visible. Do nothing, * as such a VM can't have any possible NX huge pages. */ struct vhost_task *nx_thread = READ_ONCE(kvm->arch.nx_huge_page_recovery_thread); if (nx_thread) vhost_task_wake(nx_thread); } static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp) { if (nx_hugepage_mitigation_hard_disabled) return sysfs_emit(buffer, "never\n"); return param_get_bool(buffer, kp); } static bool get_nx_auto_mode(void) { /* Return true when CPU has the bug, and mitigations are ON */ return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); } static void __set_nx_huge_pages(bool val) { nx_huge_pages = itlb_multihit_kvm_mitigation = val; } static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) { bool old_val = nx_huge_pages; bool new_val; if (nx_hugepage_mitigation_hard_disabled) return -EPERM; /* In "auto" mode deploy workaround only if CPU has the bug. */ if (sysfs_streq(val, "off")) { new_val = 0; } else if (sysfs_streq(val, "force")) { new_val = 1; } else if (sysfs_streq(val, "auto")) { new_val = get_nx_auto_mode(); } else if (sysfs_streq(val, "never")) { new_val = 0; mutex_lock(&kvm_lock); if (!list_empty(&vm_list)) { mutex_unlock(&kvm_lock); return -EBUSY; } nx_hugepage_mitigation_hard_disabled = true; mutex_unlock(&kvm_lock); } else if (kstrtobool(val, &new_val) < 0) { return -EINVAL; } __set_nx_huge_pages(new_val); if (new_val != old_val) { struct kvm *kvm; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { mutex_lock(&kvm->slots_lock); kvm_mmu_zap_all_fast(kvm); mutex_unlock(&kvm->slots_lock); kvm_wake_nx_recovery_thread(kvm); } mutex_unlock(&kvm_lock); } return 0; } /* * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as * its default value of -1 is technically undefined behavior for a boolean. * Forward the module init call to SPTE code so that it too can handle module * params that need to be resolved/snapshot. */ void __init kvm_mmu_x86_module_init(void) { if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); /* * Snapshot userspace's desire to enable the TDP MMU. Whether or not the * TDP MMU is actually enabled is determined in kvm_configure_mmu() * when the vendor module is loaded. */ tdp_mmu_allowed = tdp_mmu_enabled; kvm_mmu_spte_module_init(); } /* * The bulk of the MMU initialization is deferred until the vendor module is * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need * to be reset when a potentially different vendor module is loaded. */ int kvm_mmu_vendor_module_init(void) { int ret = -ENOMEM; /* * MMU roles use union aliasing which is, generally speaking, an * undefined behavior. However, we supposedly know how compilers behave * and the current status quo is unlikely to change. Guardians below are * supposed to let us know if the assumption becomes false. */ BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32)); BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32)); BUILD_BUG_ON(sizeof(union kvm_cpu_role) != sizeof(u64)); kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = KMEM_CACHE(pte_list_desc, SLAB_ACCOUNT); if (!pte_list_desc_cache) goto out; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), 0, SLAB_ACCOUNT, NULL); if (!mmu_page_header_cache) goto out; return 0; out: mmu_destroy_caches(); return ret; } void kvm_mmu_destroy(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); if (tdp_mmu_enabled) { read_lock(&vcpu->kvm->mmu_lock); mmu_free_root_page(vcpu->kvm, &vcpu->arch.mmu->mirror_root_hpa, NULL); read_unlock(&vcpu->kvm->mmu_lock); } free_mmu_pages(&vcpu->arch.root_mmu); free_mmu_pages(&vcpu->arch.guest_mmu); mmu_free_memory_caches(vcpu); } void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); } /* * Calculate the effective recovery period, accounting for '0' meaning "let KVM * select a halving time of 1 hour". Returns true if recovery is enabled. */ static bool calc_nx_huge_pages_recovery_period(uint *period) { /* * Use READ_ONCE to get the params, this may be called outside of the * param setters, e.g. by the kthread to compute its next timeout. */ bool enabled = READ_ONCE(nx_huge_pages); uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio); if (!enabled || !ratio) return false; *period = READ_ONCE(nx_huge_pages_recovery_period_ms); if (!*period) { /* Make sure the period is not less than one second. */ ratio = min(ratio, 3600u); *period = 60 * 60 * 1000 / ratio; } return true; } static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp) { bool was_recovery_enabled, is_recovery_enabled; uint old_period, new_period; int err; if (nx_hugepage_mitigation_hard_disabled) return -EPERM; was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period); err = param_set_uint(val, kp); if (err) return err; is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period); if (is_recovery_enabled && (!was_recovery_enabled || old_period > new_period)) { struct kvm *kvm; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_wake_nx_recovery_thread(kvm); mutex_unlock(&kvm_lock); } return err; } static unsigned long nx_huge_pages_to_zap(struct kvm *kvm, enum kvm_mmu_type mmu_type) { unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages); unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio); return ratio ? DIV_ROUND_UP(pages, ratio) : 0; } static bool kvm_mmu_sp_dirty_logging_enabled(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memory_slot *slot; /* * Skip the memslot lookup if dirty tracking can't possibly be enabled, * as memslot lookups are relatively expensive. * * If a memslot update is in progress, reading an incorrect value of * kvm->nr_memslots_dirty_logging is not a problem: if it is becoming * zero, KVM will do an unnecessary memslot lookup; if it is becoming * nonzero, the page will be zapped unnecessarily. Either way, this * only affects efficiency in racy situations, and not correctness. */ if (!atomic_read(&kvm->nr_memslots_dirty_logging)) return false; slot = __gfn_to_memslot(kvm_memslots_for_spte_role(kvm, sp->role), sp->gfn); if (WARN_ON_ONCE(!slot)) return false; return kvm_slot_dirty_track_enabled(slot); } static void kvm_recover_nx_huge_pages(struct kvm *kvm, const enum kvm_mmu_type mmu_type) { #ifdef CONFIG_X86_64 const bool is_tdp_mmu = mmu_type == KVM_TDP_MMU; spinlock_t *tdp_mmu_pages_lock = &kvm->arch.tdp_mmu_pages_lock; #else const bool is_tdp_mmu = false; spinlock_t *tdp_mmu_pages_lock = NULL; #endif unsigned long to_zap = nx_huge_pages_to_zap(kvm, mmu_type); struct list_head *nx_huge_pages; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); bool flush = false; int rcu_idx; nx_huge_pages = &kvm->arch.possible_nx_huge_pages[mmu_type].pages; rcu_idx = srcu_read_lock(&kvm->srcu); if (is_tdp_mmu) read_lock(&kvm->mmu_lock); else write_lock(&kvm->mmu_lock); /* * Zapping TDP MMU shadow pages, including the remote TLB flush, must * be done under RCU protection, because the pages are freed via RCU * callback. */ rcu_read_lock(); for ( ; to_zap; --to_zap) { if (is_tdp_mmu) spin_lock(tdp_mmu_pages_lock); if (list_empty(nx_huge_pages)) { if (is_tdp_mmu) spin_unlock(tdp_mmu_pages_lock); break; } /* * We use a separate list instead of just using active_mmu_pages * because the number of shadow pages that be replaced with an * NX huge page is expected to be relatively small compared to * the total number of shadow pages. And because the TDP MMU * doesn't use active_mmu_pages. */ sp = list_first_entry(nx_huge_pages, struct kvm_mmu_page, possible_nx_huge_page_link); WARN_ON_ONCE(!sp->nx_huge_page_disallowed); WARN_ON_ONCE(!sp->role.direct); unaccount_nx_huge_page(kvm, sp); if (is_tdp_mmu) spin_unlock(tdp_mmu_pages_lock); /* * Do not attempt to recover any NX Huge Pages that are being * dirty tracked, as they would just be faulted back in as 4KiB * pages. The NX Huge Pages in this slot will be recovered, * along with all the other huge pages in the slot, when dirty * logging is disabled. */ if (!kvm_mmu_sp_dirty_logging_enabled(kvm, sp)) { if (is_tdp_mmu) flush |= kvm_tdp_mmu_zap_possible_nx_huge_page(kvm, sp); else kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); } WARN_ON_ONCE(sp->nx_huge_page_disallowed); if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); rcu_read_unlock(); if (is_tdp_mmu) cond_resched_rwlock_read(&kvm->mmu_lock); else cond_resched_rwlock_write(&kvm->mmu_lock); flush = false; rcu_read_lock(); } } kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); rcu_read_unlock(); if (is_tdp_mmu) read_unlock(&kvm->mmu_lock); else write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, rcu_idx); } static void kvm_nx_huge_page_recovery_worker_kill(void *data) { } static bool kvm_nx_huge_page_recovery_worker(void *data) { struct kvm *kvm = data; long remaining_time; bool enabled; uint period; int i; enabled = calc_nx_huge_pages_recovery_period(&period); if (!enabled) return false; remaining_time = kvm->arch.nx_huge_page_last + msecs_to_jiffies(period) - get_jiffies_64(); if (remaining_time > 0) { schedule_timeout(remaining_time); /* check for signals and come back */ return true; } __set_current_state(TASK_RUNNING); for (i = 0; i < KVM_NR_MMU_TYPES; ++i) kvm_recover_nx_huge_pages(kvm, i); kvm->arch.nx_huge_page_last = get_jiffies_64(); return true; } static int kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); struct vhost_task *nx_thread; kvm->arch.nx_huge_page_last = get_jiffies_64(); nx_thread = vhost_task_create(kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill, kvm, "kvm-nx-lpage-recovery"); if (IS_ERR(nx_thread)) return PTR_ERR(nx_thread); vhost_task_start(nx_thread); /* Make the task visible only once it is fully started. */ WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); return 0; } int kvm_mmu_post_init_vm(struct kvm *kvm) { if (nx_hugepage_mitigation_hard_disabled) return 0; return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) { if (kvm->arch.nx_huge_page_recovery_thread) vhost_task_stop(kvm->arch.nx_huge_page_recovery_thread); } #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, int level) { return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG; } static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn, int level) { lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG; } static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn, int level) { lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; } bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range) { struct kvm_memory_slot *slot = range->slot; int level; /* * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM * can simply ignore such slots. But if userspace is making memory * PRIVATE, then KVM must prevent the guest from accessing the memory * as shared. And if userspace is making memory SHARED and this point * is reached, then at least one page within the range was previously * PRIVATE, i.e. the slot's possible hugepage ranges are changing. * Zapping SPTEs in this case ensures KVM will reassess whether or not * a hugepage can be used for affected ranges. */ if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) return false; if (WARN_ON_ONCE(range->end <= range->start)) return false; /* * If the head and tail pages of the range currently allow a hugepage, * i.e. reside fully in the slot and don't have mixed attributes, then * add each corresponding hugepage range to the ongoing invalidation, * e.g. to prevent KVM from creating a hugepage in response to a fault * for a gfn whose attributes aren't changing. Note, only the range * of gfns whose attributes are being modified needs to be explicitly * unmapped, as that will unmap any existing hugepages. */ for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { gfn_t start = gfn_round_for_level(range->start, level); gfn_t end = gfn_round_for_level(range->end - 1, level); gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); if ((start != range->start || start + nr_pages > range->end) && start >= slot->base_gfn && start + nr_pages <= slot->base_gfn + slot->npages && !hugepage_test_mixed(slot, start, level)) kvm_mmu_invalidate_range_add(kvm, start, start + nr_pages); if (end == start) continue; if ((end + nr_pages) > range->end && (end + nr_pages) <= (slot->base_gfn + slot->npages) && !hugepage_test_mixed(slot, end, level)) kvm_mmu_invalidate_range_add(kvm, end, end + nr_pages); } /* Unmap the old attribute page. */ if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) range->attr_filter = KVM_FILTER_SHARED; else range->attr_filter = KVM_FILTER_PRIVATE; return kvm_unmap_gfn_range(kvm, range); } static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long attrs) { const unsigned long start = gfn; const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); if (level == PG_LEVEL_2M) return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { if (hugepage_test_mixed(slot, gfn, level - 1) || attrs != kvm_get_memory_attributes(kvm, gfn)) return false; } return true; } bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range) { unsigned long attrs = range->arg.attributes; struct kvm_memory_slot *slot = range->slot; int level; lockdep_assert_held_write(&kvm->mmu_lock); lockdep_assert_held(&kvm->slots_lock); /* * Calculate which ranges can be mapped with hugepages even if the slot * can't map memory PRIVATE. KVM mustn't create a SHARED hugepage over * a range that has PRIVATE GFNs, and conversely converting a range to * SHARED may now allow hugepages. */ if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) return false; /* * The sequence matters here: upper levels consume the result of lower * level's scanning. */ for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); gfn_t gfn = gfn_round_for_level(range->start, level); /* Process the head page if it straddles the range. */ if (gfn != range->start || gfn + nr_pages > range->end) { /* * Skip mixed tracking if the aligned gfn isn't covered * by the memslot, KVM can't use a hugepage due to the * misaligned address regardless of memory attributes. */ if (gfn >= slot->base_gfn && gfn + nr_pages <= slot->base_gfn + slot->npages) { if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else hugepage_set_mixed(slot, gfn, level); } gfn += nr_pages; } /* * Pages entirely covered by the range are guaranteed to have * only the attributes which were just set. */ for ( ; gfn + nr_pages <= range->end; gfn += nr_pages) hugepage_clear_mixed(slot, gfn, level); /* * Process the last tail page if it straddles the range and is * contained by the memslot. Like the head page, KVM can't * create a hugepage if the slot size is misaligned. */ if (gfn < range->end && (gfn + nr_pages) <= (slot->base_gfn + slot->npages)) { if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else hugepage_set_mixed(slot, gfn, level); } } return false; } void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, struct kvm_memory_slot *slot) { int level; if (!kvm_arch_has_private_mem(kvm)) return; for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { /* * Don't bother tracking mixed attributes for pages that can't * be huge due to alignment, i.e. process only pages that are * entirely contained by the memslot. */ gfn_t end = gfn_round_for_level(slot->base_gfn + slot->npages, level); gfn_t start = gfn_round_for_level(slot->base_gfn, level); gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); gfn_t gfn; if (start < slot->base_gfn) start += nr_pages; /* * Unlike setting attributes, every potential hugepage needs to * be manually checked as the attributes may already be mixed. */ for (gfn = start; gfn < end; gfn += nr_pages) { unsigned long attrs = kvm_get_memory_attributes(kvm, gfn); if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else hugepage_set_mixed(slot, gfn, level); } } } #endif
2739 3926 52 52 1 1 52 217 1 16 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Fast and scalable bitmaps. * * Copyright (C) 2016 Facebook * Copyright (C) 2013-2014 Jens Axboe */ #ifndef __LINUX_SCALE_BITMAP_H #define __LINUX_SCALE_BITMAP_H #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/cache.h> #include <linux/list.h> #include <linux/log2.h> #include <linux/minmax.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/smp.h> #include <linux/types.h> #include <linux/wait.h> struct seq_file; /** * struct sbitmap_word - Word in a &struct sbitmap. */ struct sbitmap_word { /** * @word: word holding free bits */ unsigned long word; /** * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; /** * @swap_lock: serializes simultaneous updates of ->word and ->cleared */ raw_spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** * struct sbitmap - Scalable bitmap. * * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This * trades off higher memory usage for better scalability. */ struct sbitmap { /** * @depth: Number of bits used in the whole bitmap. */ unsigned int depth; /** * @shift: log2(number of bits used per word) */ unsigned int shift; /** * @map_nr: Number of words (cachelines) being used for the bitmap. */ unsigned int map_nr; /** * @round_robin: Allocate bits in strict round-robin order. */ bool round_robin; /** * @map: Allocated bitmap. */ struct sbitmap_word *map; /* * @alloc_hint: Cache of last successfully allocated or freed bit. * * This is per-cpu, which allows multiple users to stick to different * cachelines until the map is exhausted. */ unsigned int __percpu *alloc_hint; }; #define SBQ_WAIT_QUEUES 8 #define SBQ_WAKE_BATCH 8 /** * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue. */ struct sbq_wait_state { /** * @wait: Wait queue. */ wait_queue_head_t wait; } ____cacheline_aligned_in_smp; /** * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free * bits. * * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to * avoid contention on the wait queue spinlock. This ensures that we don't hit a * scalability wall when we run out of free bits and have to start putting tasks * to sleep. */ struct sbitmap_queue { /** * @sb: Scalable bitmap. */ struct sbitmap sb; /** * @wake_batch: Number of bits which must be freed before we wake up any * waiters. */ unsigned int wake_batch; /** * @wake_index: Next wait queue in @ws to wake up. */ atomic_t wake_index; /** * @ws: Wait queues. */ struct sbq_wait_state *ws; /* * @ws_active: count of currently active ws waitqueues */ atomic_t ws_active; /** * @min_shallow_depth: The minimum shallow depth which may be passed to * sbitmap_queue_get_shallow() */ unsigned int min_shallow_depth; /** * @completion_cnt: Number of bits cleared passed to the * wakeup function. */ atomic_t completion_cnt; /** * @wakeup_cnt: Number of thread wake ups issued. */ atomic_t wakeup_cnt; }; /** * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node. * @sb: Bitmap to initialize. * @depth: Number of bits to allocate. * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if * given, a good default is chosen. * @flags: Allocation flags. * @node: Memory node to allocate on. * @round_robin: If true, be stricter about allocation order; always allocate * starting from the last allocated bit. This is less efficient * than the default behavior (false). * @alloc_hint: If true, apply percpu hint for where to start searching for * a free bit. * * Return: Zero on success or negative errno on failure. */ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint); /* sbitmap internal helper */ static inline unsigned int __map_depth(const struct sbitmap *sb, int index) { if (index == sb->map_nr - 1) return sb->depth - (index << sb->shift); return 1U << sb->shift; } /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. */ static inline void sbitmap_free(struct sbitmap *sb) { free_percpu(sb->alloc_hint); kvfree(sb->map); sb->map = NULL; } /** * sbitmap_resize() - Resize a &struct sbitmap. * @sb: Bitmap to resize. * @depth: New number of bits to resize to. * * Doesn't reallocate anything. It's up to the caller to ensure that the new * depth doesn't exceed the depth that the sb was initialized with. */ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); /** * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. * @sb: Bitmap to allocate from. * * This operation provides acquire barrier semantics if it succeeds. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get(struct sbitmap *sb); /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. * * Return: true if any bit in the bitmap is set, false otherwise. */ bool sbitmap_any_bit_set(const struct sbitmap *sb); #define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) #define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); /** * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @start: Where to start the iteration. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. * * This is inline even though it's non-trivial so that the function calls to the * callback will hopefully get optimized away. */ static inline void __sbitmap_for_each_set(struct sbitmap *sb, unsigned int start, sb_for_each_fn fn, void *data) { unsigned int index; unsigned int nr; unsigned int scanned = 0; if (start >= sb->depth) start = 0; index = SB_NR_TO_INDEX(sb, start); nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, __map_depth(sb, index) - nr, sb->depth - scanned); scanned += depth; word = sb->map[index].word & ~sb->map[index].cleared; if (!word) goto next; /* * On the first iteration of the outer loop, we need to add the * bit offset back to the size of the word for find_next_bit(). * On all other iterations, nr is zero, so this is a noop. */ depth += nr; while (1) { nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) return; nr++; } next: nr = 0; if (++index >= sb->map_nr) index = 0; } } /** * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. */ static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, void *data) { __sbitmap_for_each_set(sb, 0, fn, data); } static inline unsigned long *__sbitmap_word(struct sbitmap *sb, unsigned int bitnr) { return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word; } /* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */ static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr) { set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) { clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } /* * This one is special, since it doesn't actually clear the bit, rather it * sets the corresponding bit in the ->cleared mask instead. Paired with * the caller doing sbitmap_deferred_clear() if a given index is full, which * will clear the previously freed entries in the corresponding ->word. */ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) { unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; set_bit(SB_NR_TO_BIT(sb, bitnr), addr); } /* * Pair of sbitmap_get, and this one applies both cleared bit and * allocation hint. */ static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr) { sbitmap_deferred_clear_bit(sb, bitnr); if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth)) *raw_cpu_ptr(sb->alloc_hint) = bitnr; } static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) { return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline int sbitmap_calculate_shift(unsigned int depth) { int shift = ilog2(BITS_PER_LONG); /* * If the bitmap is small, shrink the number of bits per word so * we spread over a few cachelines, at least. If less than 4 * bits, just forget about it, it's not going to work optimally * anyway. */ if (depth >= 4) { while ((4U << shift) > depth) shift--; } return shift; } /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_weight() - Return how many set and not cleared bits in a &struct * sbitmap. * @sb: Bitmap to check. * * Return: How many set and not cleared bits set */ unsigned int sbitmap_weight(const struct sbitmap *sb); /** * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct * seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The output isn't guaranteed to be internally * consistent. */ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific * memory node. * @sbq: Bitmap queue to initialize. * @depth: See sbitmap_init_node(). * @shift: See sbitmap_init_node(). * @round_robin: See sbitmap_get(). * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int shift, bool round_robin, gfp_t flags, int node); /** * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. * * @sbq: Bitmap queue to free. */ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); sbitmap_free(&sbq->sb); } /** * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch * @sbq: Bitmap queue to recalculate wake batch. * @users: Number of shares. * * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch * by depth. This interface is for HCTX shared tags or queue shared tags. */ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users); /** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. * @depth: New number of bits to resize to. * * Like sbitmap_resize(), this doesn't reallocate anything. It has to do * some extra work on the &struct sbitmap_queue, so it's not safe to just * resize the underlying &struct sbitmap. */ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); /** * __sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue with preemption already disabled. * @sbq: Bitmap queue to allocate from. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits * @sbq: Bitmap queue to allocate from. * @nr_tags: number of tags requested * @offset: offset to add to returned bits * * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is * a bit in the mask returned, and the caller must add @offset to the value to * get the absolute tag value. */ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset); /** * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. * @shallow_depth: The maximum number of bits to allocate from the queue. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after * initializing @sbq. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. * @sbq: Bitmap queue to allocate from. * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, unsigned int *cpu) { int nr; *cpu = get_cpu(); nr = __sbitmap_queue_get(sbq); put_cpu(); return nr; } /** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. * @sbq: Bitmap queue in question. * @min_shallow_depth: The minimum shallow depth that will be passed to * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). * * sbitmap_queue_clear() batches wakeups as an optimization. The batch size * depends on the depth of the bitmap. Since the shallow allocation functions * effectively operate with a different depth, the shallow depth must be taken * into account when calculating the batch size. This function must be called * with the minimum shallow depth that will be used. Failure to do so can result * in missed wakeups. */ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth); /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. * @cpu: CPU the bit was allocated on. */ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu); /** * sbitmap_queue_clear_batch() - Free a batch of allocated bits * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @offset: offset for each tag in array * @tags: array of tags * @nr_tags: number of tags in array */ void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, int *tags, int nr_tags); static inline int sbq_index_inc(int index) { return (index + 1) & (SBQ_WAIT_QUEUES - 1); } static inline void sbq_index_atomic_inc(atomic_t *index) { int old = atomic_read(index); int new = sbq_index_inc(old); atomic_cmpxchg(index, old, new); } /** * sbq_wait_ptr() - Get the next wait queue to use for a &struct * sbitmap_queue. * @sbq: Bitmap queue to wait on. * @wait_index: A counter per "user" of @sbq. */ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, atomic_t *wait_index) { struct sbq_wait_state *ws; ws = &sbq->ws[atomic_read(wait_index)]; sbq_index_atomic_inc(wait_index); return ws; } /** * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct * sbitmap_queue. * @sbq: Bitmap queue to wake up. */ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); /** * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. * @nr: Number of bits cleared. */ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct * seq_file. * @sbq: Bitmap queue to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); struct sbq_wait { struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */ struct wait_queue_entry wait; }; #define DEFINE_SBQ_WAIT(name) \ struct sbq_wait name = { \ .sbq = NULL, \ .wait = { \ .private = current, \ .func = autoremove_wake_function, \ .entry = LIST_HEAD_INIT((name).wait.entry), \ } \ } /* * Wrapper around prepare_to_wait_exclusive(), which maintains some extra * internal state. */ void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait, int state); /* * Must be paired with sbitmap_prepare_to_wait(). */ void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Wrapper around add_wait_queue(), which maintains some extra internal state */ void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Must be paired with sbitmap_add_wait_queue() */ void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait); #endif /* __LINUX_SCALE_BITMAP_H */
75 36 45 101 43 1 100 75 74 75 21 21 113 1 93 43 28 27 22 1 1 3 1 17 13 1 3 11 11 1 4 6 20 16 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/attributes.c * * Vyacheslav Dubeyko <slava@dubeyko.com> * * Handling of records in attributes tree */ #include "hfsplus_fs.h" #include "hfsplus_raw.h" static struct kmem_cache *hfsplus_attr_tree_cachep; int __init hfsplus_create_attr_tree_cache(void) { if (hfsplus_attr_tree_cachep) return -EEXIST; hfsplus_attr_tree_cachep = kmem_cache_create("hfsplus_attr_cache", sizeof(hfsplus_attr_entry), 0, SLAB_HWCACHE_ALIGN, NULL); if (!hfsplus_attr_tree_cachep) return -ENOMEM; return 0; } void hfsplus_destroy_attr_tree_cache(void) { kmem_cache_destroy(hfsplus_attr_tree_cachep); } int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2) { __be32 k1_cnid, k2_cnid; k1_cnid = k1->attr.cnid; k2_cnid = k2->attr.cnid; if (k1_cnid != k2_cnid) return be32_to_cpu(k1_cnid) < be32_to_cpu(k2_cnid) ? -1 : 1; return hfsplus_strcmp( (const struct hfsplus_unistr *)&k1->attr.key_name, (const struct hfsplus_unistr *)&k2->attr.key_name); } int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, u32 cnid, const char *name) { int len; memset(key, 0, sizeof(struct hfsplus_attr_key)); key->attr.cnid = cpu_to_be32(cnid); if (name) { int res = hfsplus_asc2uni(sb, (struct hfsplus_unistr *)&key->attr.key_name, HFSPLUS_ATTR_MAX_STRLEN, name, strlen(name)); if (res) return res; len = be16_to_cpu(key->attr.key_name.length); } else { key->attr.key_name.length = 0; len = 0; } /* The length of the key, as stored in key_len field, does not include * the size of the key_len field itself. * So, offsetof(hfsplus_attr_key, key_name) is a trick because * it takes into consideration key_len field (__be16) of * hfsplus_attr_key structure instead of length field (__be16) of * hfsplus_attr_unistr structure. */ key->key_len = cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) + 2 * len); return 0; } hfsplus_attr_entry *hfsplus_alloc_attr_entry(void) { return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL); } void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry) { if (entry) kmem_cache_free(hfsplus_attr_tree_cachep, entry); } #define HFSPLUS_INVALID_ATTR_RECORD -1 static int hfsplus_attr_build_record(hfsplus_attr_entry *entry, int record_type, u32 cnid, const void *value, size_t size) { if (record_type == HFSPLUS_ATTR_FORK_DATA) { /* * Mac OS X supports only inline data attributes. * Do nothing */ memset(entry, 0, sizeof(*entry)); return sizeof(struct hfsplus_attr_fork_data); } else if (record_type == HFSPLUS_ATTR_EXTENTS) { /* * Mac OS X supports only inline data attributes. * Do nothing. */ memset(entry, 0, sizeof(*entry)); return sizeof(struct hfsplus_attr_extents); } else if (record_type == HFSPLUS_ATTR_INLINE_DATA) { u16 len; memset(entry, 0, sizeof(struct hfsplus_attr_inline_data)); entry->inline_data.record_type = cpu_to_be32(record_type); if (size <= HFSPLUS_MAX_INLINE_DATA_SIZE) len = size; else return HFSPLUS_INVALID_ATTR_RECORD; entry->inline_data.length = cpu_to_be16(len); memcpy(entry->inline_data.raw_bytes, value, len); /* * Align len on two-byte boundary. * It needs to add pad byte if we have odd len. */ len = round_up(len, 2); return offsetof(struct hfsplus_attr_inline_data, raw_bytes) + len; } else /* invalid input */ memset(entry, 0, sizeof(*entry)); return HFSPLUS_INVALID_ATTR_RECORD; } int hfsplus_find_attr(struct super_block *sb, u32 cnid, const char *name, struct hfs_find_data *fd) { int err = 0; hfs_dbg("name %s, cnid %d\n", name ? name : NULL, cnid); if (!HFSPLUS_SB(sb)->attr_tree) { pr_err("attributes file doesn't exist\n"); return -EINVAL; } if (name) { err = hfsplus_attr_build_key(sb, fd->search_key, cnid, name); if (err) goto failed_find_attr; err = hfs_brec_find(fd, hfs_find_rec_by_key); if (err) goto failed_find_attr; } else { err = hfsplus_attr_build_key(sb, fd->search_key, cnid, NULL); if (err) goto failed_find_attr; err = hfs_brec_find(fd, hfs_find_1st_rec_by_cnid); if (err) goto failed_find_attr; } failed_find_attr: return err; } int hfsplus_attr_exists(struct inode *inode, const char *name) { int err = 0; struct super_block *sb = inode->i_sb; struct hfs_find_data fd; if (!HFSPLUS_SB(sb)->attr_tree) return 0; err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); if (err) return 0; err = hfsplus_find_attr(sb, inode->i_ino, name, &fd); if (err) goto attr_not_found; hfs_find_exit(&fd); return 1; attr_not_found: hfs_find_exit(&fd); return 0; } int hfsplus_create_attr(struct inode *inode, const char *name, const void *value, size_t size) { struct super_block *sb = inode->i_sb; struct hfs_find_data fd; hfsplus_attr_entry *entry_ptr; int entry_size; int err; hfs_dbg("name %s, ino %ld\n", name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { pr_err("attributes file doesn't exist\n"); return -EINVAL; } entry_ptr = hfsplus_alloc_attr_entry(); if (!entry_ptr) return -ENOMEM; err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); if (err) goto failed_init_create_attr; /* Fail early and avoid ENOSPC during the btree operation */ err = hfs_bmap_reserve(fd.tree, fd.tree->depth + 1); if (err) goto failed_create_attr; if (name) { err = hfsplus_attr_build_key(sb, fd.search_key, inode->i_ino, name); if (err) goto failed_create_attr; } else { err = -EINVAL; goto failed_create_attr; } /* Mac OS X supports only inline data attributes. */ entry_size = hfsplus_attr_build_record(entry_ptr, HFSPLUS_ATTR_INLINE_DATA, inode->i_ino, value, size); if (entry_size == HFSPLUS_INVALID_ATTR_RECORD) { err = -EINVAL; goto failed_create_attr; } err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) err = -EEXIST; goto failed_create_attr; } err = hfs_brec_insert(&fd, entry_ptr, entry_size); if (err) goto failed_create_attr; hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY); failed_create_attr: hfs_find_exit(&fd); failed_init_create_attr: hfsplus_destroy_attr_entry(entry_ptr); return err; } static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, struct hfs_find_data *fd) { int err = 0; __be32 found_cnid, record_type; hfs_bnode_read(fd->bnode, &found_cnid, fd->keyoffset + offsetof(struct hfsplus_attr_key, cnid), sizeof(__be32)); if (cnid != be32_to_cpu(found_cnid)) return -ENOENT; hfs_bnode_read(fd->bnode, &record_type, fd->entryoffset, sizeof(record_type)); switch (be32_to_cpu(record_type)) { case HFSPLUS_ATTR_INLINE_DATA: /* All is OK. Do nothing. */ break; case HFSPLUS_ATTR_FORK_DATA: case HFSPLUS_ATTR_EXTENTS: pr_err("only inline data xattr are supported\n"); return -EOPNOTSUPP; default: pr_err("invalid extended attribute record\n"); return -ENOENT; } /* Avoid btree corruption */ hfs_bnode_read(fd->bnode, fd->search_key, fd->keyoffset, fd->keylength); err = hfs_brec_remove(fd); if (err) return err; hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY); return err; } int hfsplus_delete_attr(struct inode *inode, const char *name) { int err = 0; struct super_block *sb = inode->i_sb; struct hfs_find_data fd; hfs_dbg("name %s, ino %ld\n", name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { pr_err("attributes file doesn't exist\n"); return -EINVAL; } err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); if (err) return err; /* Fail early and avoid ENOSPC during the btree operation */ err = hfs_bmap_reserve(fd.tree, fd.tree->depth); if (err) goto out; if (name) { err = hfsplus_attr_build_key(sb, fd.search_key, inode->i_ino, name); if (err) goto out; } else { pr_err("invalid extended attribute name\n"); err = -EINVAL; goto out; } err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; err = __hfsplus_delete_attr(inode, inode->i_ino, &fd); if (err) goto out; out: hfs_find_exit(&fd); return err; } int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) { int err = 0; struct hfs_find_data fd; hfs_dbg("cnid %d\n", cnid); if (!HFSPLUS_SB(dir->i_sb)->attr_tree) { pr_err("attributes file doesn't exist\n"); return -EINVAL; } err = hfs_find_init(HFSPLUS_SB(dir->i_sb)->attr_tree, &fd); if (err) return err; for (;;) { err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd); if (err) { if (err != -ENOENT) pr_err("xattr search failed\n"); goto end_delete_all; } err = __hfsplus_delete_attr(dir, cnid, &fd); if (err) goto end_delete_all; } end_delete_all: hfs_find_exit(&fd); return err; }
2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause) /* * Copyright (c) 2003, 2004 * Damien Bergamini <damien.bergamini@free.fr>. All rights reserved. * * Copyright (c) 2005-2007 Matthieu Castet <castet.matthieu@free.fr> * Copyright (c) 2005-2007 Stanislaw Gruszka <stf_xl@wp.pl> * * HISTORY : some part of the code was base on ueagle 1.3 BSD driver, * Damien Bergamini agree to put his code under a DUAL GPL/BSD license. * * The rest of the code was rewritten from scratch. */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/crc32.h> #include <linux/usb.h> #include <linux/firmware.h> #include <linux/ctype.h> #include <linux/sched.h> #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/freezer.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/unaligned.h> #include "usbatm.h" #define EAGLEUSBVERSION "ueagle 1.4" /* * Debug macros */ #define uea_dbg(usb_dev, format, args...) \ do { \ if (debug >= 1) \ dev_dbg(&(usb_dev)->dev, \ "[ueagle-atm dbg] %s: " format, \ __func__, ##args); \ } while (0) #define uea_vdbg(usb_dev, format, args...) \ do { \ if (debug >= 2) \ dev_dbg(&(usb_dev)->dev, \ "[ueagle-atm vdbg] " format, ##args); \ } while (0) #define uea_enters(usb_dev) \ uea_vdbg(usb_dev, "entering %s\n" , __func__) #define uea_leaves(usb_dev) \ uea_vdbg(usb_dev, "leaving %s\n" , __func__) #define uea_err(usb_dev, format, args...) \ dev_err(&(usb_dev)->dev , "[UEAGLE-ATM] " format , ##args) #define uea_warn(usb_dev, format, args...) \ dev_warn(&(usb_dev)->dev , "[Ueagle-atm] " format, ##args) #define uea_info(usb_dev, format, args...) \ dev_info(&(usb_dev)->dev , "[ueagle-atm] " format, ##args) struct intr_pkt; /* cmv's from firmware */ struct uea_cmvs_v1 { u32 address; u16 offset; u32 data; } __packed; struct uea_cmvs_v2 { u32 group; u32 address; u32 offset; u32 data; } __packed; /* information about currently processed cmv */ struct cmv_dsc_e1 { u8 function; u16 idx; u32 address; u16 offset; }; struct cmv_dsc_e4 { u16 function; u16 offset; u16 address; u16 group; }; union cmv_dsc { struct cmv_dsc_e1 e1; struct cmv_dsc_e4 e4; }; struct uea_softc { struct usb_device *usb_dev; struct usbatm_data *usbatm; int modem_index; unsigned int driver_info; int annex; #define ANNEXA 0 #define ANNEXB 1 int booting; int reset; wait_queue_head_t sync_q; struct task_struct *kthread; u32 data; u32 data1; int cmv_ack; union cmv_dsc cmv_dsc; struct work_struct task; u16 pageno; u16 ovl; const struct firmware *dsp_firm; struct urb *urb_int; void (*dispatch_cmv)(struct uea_softc *, struct intr_pkt *); void (*schedule_load_page)(struct uea_softc *, struct intr_pkt *); int (*stat)(struct uea_softc *); int (*send_cmvs)(struct uea_softc *); /* keep in sync with eaglectl */ struct uea_stats { struct { u32 state; u32 flags; u32 mflags; u32 vidcpe; u32 vidco; u32 dsrate; u32 usrate; u32 dsunc; u32 usunc; u32 dscorr; u32 uscorr; u32 txflow; u32 rxflow; u32 usattenuation; u32 dsattenuation; u32 dsmargin; u32 usmargin; u32 firmid; } phy; } stats; }; /* * Elsa IDs */ #define ELSA_VID 0x05CC #define ELSA_PID_PSTFIRM 0x3350 #define ELSA_PID_PREFIRM 0x3351 #define ELSA_PID_A_PREFIRM 0x3352 #define ELSA_PID_A_PSTFIRM 0x3353 #define ELSA_PID_B_PREFIRM 0x3362 #define ELSA_PID_B_PSTFIRM 0x3363 /* * Devolo IDs : pots if (pid & 0x10) */ #define DEVOLO_VID 0x1039 #define DEVOLO_EAGLE_I_A_PID_PSTFIRM 0x2110 #define DEVOLO_EAGLE_I_A_PID_PREFIRM 0x2111 #define DEVOLO_EAGLE_I_B_PID_PSTFIRM 0x2100 #define DEVOLO_EAGLE_I_B_PID_PREFIRM 0x2101 #define DEVOLO_EAGLE_II_A_PID_PSTFIRM 0x2130 #define DEVOLO_EAGLE_II_A_PID_PREFIRM 0x2131 #define DEVOLO_EAGLE_II_B_PID_PSTFIRM 0x2120 #define DEVOLO_EAGLE_II_B_PID_PREFIRM 0x2121 /* * Reference design USB IDs */ #define ANALOG_VID 0x1110 #define ADI930_PID_PREFIRM 0x9001 #define ADI930_PID_PSTFIRM 0x9000 #define EAGLE_I_PID_PREFIRM 0x9010 /* Eagle I */ #define EAGLE_I_PID_PSTFIRM 0x900F /* Eagle I */ #define EAGLE_IIC_PID_PREFIRM 0x9024