Total coverage: 257552 (17%)of 1558340
15 1 15 15 15 9 9 9 8 9 9 9 9 203 203 202 202 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 // SPDX-License-Identifier: GPL-2.0-or-later /* */ #include <linux/gfp.h> #include <linux/init.h> #include <linux/ratelimit.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <linux/slab.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include "usbaudio.h" #include "helper.h" #include "card.h" #include "endpoint.h" #include "pcm.h" #include "clock.h" #include "quirks.h" enum { EP_STATE_STOPPED, EP_STATE_RUNNING, EP_STATE_STOPPING, }; /* interface refcounting */ struct snd_usb_iface_ref { unsigned char iface; bool need_setup; int opened; int altset; struct list_head list; }; /* clock refcounting */ struct snd_usb_clock_ref { unsigned char clock; atomic_t locked; int opened; int rate; bool need_setup; struct list_head list; }; /* * snd_usb_endpoint is a model that abstracts everything related to an * USB endpoint and its streaming. * * There are functions to activate and deactivate the streaming URBs and * optional callbacks to let the pcm logic handle the actual content of the * packets for playback and record. Thus, the bus streaming and the audio * handlers are fully decoupled. * * There are two different types of endpoints in audio applications. * * SND_USB_ENDPOINT_TYPE_DATA handles full audio data payload for both * inbound and outbound traffic. * * SND_USB_ENDPOINT_TYPE_SYNC endpoints are for inbound traffic only and * expect the payload to carry Q10.14 / Q16.16 formatted sync information * (3 or 4 bytes). * * Each endpoint has to be configured prior to being used by calling * snd_usb_endpoint_set_params(). * * The model incorporates a reference counting, so that multiple users * can call snd_usb_endpoint_start() and snd_usb_endpoint_stop(), and * only the first user will effectively start the URBs, and only the last * one to stop it will tear the URBs down again. */ /* * convert a sampling rate into our full speed format (fs/1000 in Q16.16) * this will overflow at approx 524 kHz */ static inline unsigned get_usb_full_speed_rate(unsigned int rate) { return ((rate << 13) + 62) / 125; } /* * convert a sampling rate into USB high speed format (fs/8000 in Q16.16) * this will overflow at approx 4 MHz */ static inline unsigned get_usb_high_speed_rate(unsigned int rate) { return ((rate << 10) + 62) / 125; } /* * release a urb data */ static void release_urb_ctx(struct snd_urb_ctx *u) { if (u->urb && u->buffer_size) usb_free_coherent(u->ep->chip->dev, u->buffer_size, u->urb->transfer_buffer, u->urb->transfer_dma); usb_free_urb(u->urb); u->urb = NULL; u->buffer_size = 0; } static const char *usb_error_string(int err) { switch (err) { case -ENODEV: return "no device"; case -ENOENT: return "endpoint not enabled"; case -EPIPE: return "endpoint stalled"; case -ENOSPC: return "not enough bandwidth"; case -ESHUTDOWN: return "device disabled"; case -EHOSTUNREACH: return "device suspended"; case -EINVAL: case -EAGAIN: case -EFBIG: case -EMSGSIZE: return "internal error"; default: return "unknown error"; } } static inline bool ep_state_running(struct snd_usb_endpoint *ep) { return atomic_read(&ep->state) == EP_STATE_RUNNING; } static inline bool ep_state_update(struct snd_usb_endpoint *ep, int old, int new) { return atomic_try_cmpxchg(&ep->state, &old, new); } /** * snd_usb_endpoint_implicit_feedback_sink: Report endpoint usage type * * @ep: The snd_usb_endpoint * * Determine whether an endpoint is driven by an implicit feedback * data endpoint source. */ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep) { return ep->implicit_fb_sync && usb_pipeout(ep->pipe); } /* * Return the number of samples to be sent in the next packet * for streaming based on information derived from sync endpoints * * This won't be used for implicit feedback which takes the packet size * returned from the sync source */ static int slave_next_packet_size(struct snd_usb_endpoint *ep, unsigned int avail) { unsigned long flags; unsigned int phase; int ret; if (ep->fill_max) return ep->maxframesize; spin_lock_irqsave(&ep->lock, flags); phase = (ep->phase & 0xffff) + (ep->freqm << ep->datainterval); ret = min(phase >> 16, ep->maxframesize); if (avail && ret >= avail) ret = -EAGAIN; else ep->phase = phase; spin_unlock_irqrestore(&ep->lock, flags); return ret; } /* * Return the number of samples to be sent in the next packet * for adaptive and synchronous endpoints */ static int next_packet_size(struct snd_usb_endpoint *ep, unsigned int avail) { unsigned int sample_accum; int ret; if (ep->fill_max) return ep->maxframesize; sample_accum = ep->sample_accum + ep->sample_rem; if (sample_accum >= ep->pps) { sample_accum -= ep->pps; ret = ep->packsize[1]; } else { ret = ep->packsize[0]; } if (avail && ret >= avail) ret = -EAGAIN; else ep->sample_accum = sample_accum; return ret; } /* * snd_usb_endpoint_next_packet_size: Return the number of samples to be sent * in the next packet * * If the size is equal or exceeds @avail, don't proceed but return -EAGAIN * Exception: @avail = 0 for skipping the check. */ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, struct snd_urb_ctx *ctx, int idx, unsigned int avail) { unsigned int packet; packet = ctx->packet_size[idx]; if (packet) { if (avail && packet >= avail) return -EAGAIN; return packet; } if (ep->sync_source) return slave_next_packet_size(ep, avail); else return next_packet_size(ep, avail); } static void call_retire_callback(struct snd_usb_endpoint *ep, struct urb *urb) { struct snd_usb_substream *data_subs; data_subs = READ_ONCE(ep->data_subs); if (data_subs && ep->retire_data_urb) ep->retire_data_urb(data_subs, urb); } static void retire_outbound_urb(struct snd_usb_endpoint *ep, struct snd_urb_ctx *urb_ctx) { call_retire_callback(ep, urb_ctx->urb); } static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep, struct snd_usb_endpoint *sender, const struct urb *urb); static void retire_inbound_urb(struct snd_usb_endpoint *ep, struct snd_urb_ctx *urb_ctx) { struct urb *urb = urb_ctx->urb; struct snd_usb_endpoint *sync_sink; if (unlikely(ep->skip_packets > 0)) { ep->skip_packets--; return; } sync_sink = READ_ONCE(ep->sync_sink); if (sync_sink) snd_usb_handle_sync_urb(sync_sink, ep, urb); call_retire_callback(ep, urb); } static inline bool has_tx_length_quirk(struct snd_usb_audio *chip) { return chip->quirk_flags & QUIRK_FLAG_TX_LENGTH; } static void prepare_silent_urb(struct snd_usb_endpoint *ep, struct snd_urb_ctx *ctx) { struct urb *urb = ctx->urb; unsigned int offs = 0; unsigned int extra = 0; __le32 packet_length; int i; /* For tx_length_quirk, put packet length at start of packet */ if (has_tx_length_quirk(ep->chip)) extra = sizeof(packet_length); for (i = 0; i < ctx->packets; ++i) { unsigned int offset; unsigned int length; int counts; counts = snd_usb_endpoint_next_packet_size(ep, ctx, i, 0); length = counts * ep->stride; /* number of silent bytes */ offset = offs * ep->stride + extra * i; urb->iso_frame_desc[i].offset = offset; urb->iso_frame_desc[i].length = length + extra; if (extra) { packet_length = cpu_to_le32(length); memcpy(urb->transfer_buffer + offset, &packet_length, sizeof(packet_length)); } memset(urb->transfer_buffer + offset + extra, ep->silence_value, length); offs += counts; } urb->number_of_packets = ctx->packets; urb->transfer_buffer_length = offs * ep->stride + ctx->packets * extra; ctx->queued = 0; } /* * Prepare a PLAYBACK urb for submission to the bus. */ static int prepare_outbound_urb(struct snd_usb_endpoint *ep, struct snd_urb_ctx *ctx, bool in_stream_lock) { struct urb *urb = ctx->urb; unsigned char *cp = urb->transfer_buffer; struct snd_usb_substream *data_subs; urb->dev = ep->chip->dev; /* we need to set this at each time */ switch (ep->type) { case SND_USB_ENDPOINT_TYPE_DATA: data_subs = READ_ONCE(ep->data_subs); if (data_subs && ep->prepare_data_urb) return ep->prepare_data_urb(data_subs, urb, in_stream_lock); /* no data provider, so send silence */ prepare_silent_urb(ep, ctx); break; case SND_USB_ENDPOINT_TYPE_SYNC: if (snd_usb_get_speed(ep->chip->dev) >= USB_SPEED_HIGH) { /* * fill the length and offset of each urb descriptor. * the fixed 12.13 frequency is passed as 16.16 through the pipe. */ urb->iso_frame_desc[0].length = 4; urb->iso_frame_desc[0].offset = 0; cp[0] = ep->freqn; cp[1] = ep->freqn >> 8; cp[2] = ep->freqn >> 16; cp[3] = ep->freqn >> 24; } else { /* * fill the length and offset of each urb descriptor. * the fixed 10.14 frequency is passed through the pipe. */ urb->iso_frame_desc[0].length = 3; urb->iso_frame_desc[0].offset = 0; cp[0] = ep->freqn >> 2; cp[1] = ep->freqn >> 10; cp[2] = ep->freqn >> 18; } break; } return 0; } /* * Prepare a CAPTURE or SYNC urb for submission to the bus. */ static int prepare_inbound_urb(struct snd_usb_endpoint *ep, struct snd_urb_ctx *urb_ctx) { int i, offs; struct urb *urb = urb_ctx->urb; urb->dev = ep->chip->dev; /* we need to set this at each time */ switch (ep->type) { case SND_USB_ENDPOINT_TYPE_DATA: offs = 0; for (i = 0; i < urb_ctx->packets; i++) { urb->iso_frame_desc[i].offset = offs; urb->iso_frame_desc[i].length = ep->curpacksize; offs += ep->curpacksize; } urb->transfer_buffer_length = offs; urb->number_of_packets = urb_ctx->packets; break; case SND_USB_ENDPOINT_TYPE_SYNC: urb->iso_frame_desc[0].length = min(4u, ep->syncmaxsize); urb->iso_frame_desc[0].offset = 0; break; } return 0; } /* notify an error as XRUN to the assigned PCM data substream */ static void notify_xrun(struct snd_usb_endpoint *ep) { struct snd_usb_substream *data_subs; data_subs = READ_ONCE(ep->data_subs); if (data_subs && data_subs->pcm_substream) snd_pcm_stop_xrun(data_subs->pcm_substream); } static struct snd_usb_packet_info * next_packet_fifo_enqueue(struct snd_usb_endpoint *ep) { struct snd_usb_packet_info *p; p = ep->next_packet + (ep->next_packet_head + ep->next_packet_queued) % ARRAY_SIZE(ep->next_packet); ep->next_packet_queued++; return p; } static struct snd_usb_packet_info * next_packet_fifo_dequeue(struct snd_usb_endpoint *ep) { struct snd_usb_packet_info *p; p = ep->next_packet + ep->next_packet_head; ep->next_packet_head++; ep->next_packet_head %= ARRAY_SIZE(ep->next_packet); ep->next_packet_queued--; return p; } static void push_back_to_ready_list(struct snd_usb_endpoint *ep, struct snd_urb_ctx *ctx) { unsigned long flags; spin_lock_irqsave(&ep->lock, flags); list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs); spin_unlock_irqrestore(&ep->lock, flags); } /* * Send output urbs that have been prepared previously. URBs are dequeued * from ep->ready_playback_urbs and in case there aren't any available * or there are no packets that have been prepared, this function does * nothing. * * The reason why the functionality of sending and preparing URBs is separated * is that host controllers don't guarantee the order in which they return * inbound and outbound packets to their submitters. * * This function is used both for implicit feedback endpoints and in low- * latency playback mode. */ int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, bool in_stream_lock) { bool implicit_fb = snd_usb_endpoint_implicit_feedback_sink(ep); while (ep_state_running(ep)) { unsigned long flags; struct snd_usb_packet_info *packet; struct snd_urb_ctx *ctx = NULL; int err, i; spin_lock_irqsave(&ep->lock, flags); if ((!implicit_fb || ep->next_packet_queued > 0) && !list_empty(&ep->ready_playback_urbs)) { /* take URB out of FIFO */ ctx = list_first_entry(&ep->ready_playback_urbs, struct snd_urb_ctx, ready_list); list_del_init(&ctx->ready_list); if (implicit_fb) packet = next_packet_fifo_dequeue(ep); } spin_unlock_irqrestore(&ep->lock, flags); if (ctx == NULL) break; /* copy over the length information */ if (implicit_fb) { for (i = 0; i < packet->packets; i++) ctx->packet_size[i] = packet->packet_size[i]; } /* call the data handler to fill in playback data */ err = prepare_outbound_urb(ep, ctx, in_stream_lock); /* can be stopped during prepare callback */ if (unlikely(!ep_state_running(ep))) break; if (err < 0) { /* push back to ready list again for -EAGAIN */ if (err == -EAGAIN) { push_back_to_ready_list(ep, ctx); break; } if (!in_stream_lock) notify_xrun(ep); return -EPIPE; } if (!atomic_read(&ep->chip->shutdown)) err = usb_submit_urb(ctx->urb, GFP_ATOMIC); else err = -ENODEV; if (err < 0) { if (!atomic_read(&ep->chip->shutdown)) { usb_audio_err(ep->chip, "Unable to submit urb #%d: %d at %s\n", ctx->index, err, __func__); if (!in_stream_lock) notify_xrun(ep); } return -EPIPE; } set_bit(ctx->index, &ep->active_mask); atomic_inc(&ep->submitted_urbs); } return 0; } /* * complete callback for urbs */ static void snd_complete_urb(struct urb *urb) { struct snd_urb_ctx *ctx = urb->context; struct snd_usb_endpoint *ep = ctx->ep; int err; if (unlikely(urb->status == -ENOENT || /* unlinked */ urb->status == -ENODEV || /* device removed */ urb->status == -ECONNRESET || /* unlinked */ urb->status == -ESHUTDOWN)) /* device disabled */ goto exit_clear; /* device disconnected */ if (unlikely(atomic_read(&ep->chip->shutdown))) goto exit_clear; if (unlikely(!ep_state_running(ep))) goto exit_clear; if (usb_pipeout(ep->pipe)) { retire_outbound_urb(ep, ctx); /* can be stopped during retire callback */ if (unlikely(!ep_state_running(ep))) goto exit_clear; /* in low-latency and implicit-feedback modes, push back the * URB to ready list at first, then process as much as possible */ if (ep->lowlatency_playback || snd_usb_endpoint_implicit_feedback_sink(ep)) { push_back_to_ready_list(ep, ctx); clear_bit(ctx->index, &ep->active_mask); snd_usb_queue_pending_output_urbs(ep, false); atomic_dec(&ep->submitted_urbs); /* decrement at last */ return; } /* in non-lowlatency mode, no error handling for prepare */ prepare_outbound_urb(ep, ctx, false); /* can be stopped during prepare callback */ if (unlikely(!ep_state_running(ep))) goto exit_clear; } else { retire_inbound_urb(ep, ctx); /* can be stopped during retire callback */ if (unlikely(!ep_state_running(ep))) goto exit_clear; prepare_inbound_urb(ep, ctx); } if (!atomic_read(&ep->chip->shutdown)) err = usb_submit_urb(urb, GFP_ATOMIC); else err = -ENODEV; if (err == 0) return; if (!atomic_read(&ep->chip->shutdown)) { usb_audio_err(ep->chip, "cannot submit urb (err = %d)\n", err); notify_xrun(ep); } exit_clear: clear_bit(ctx->index, &ep->active_mask); atomic_dec(&ep->submitted_urbs); } /* * Find or create a refcount object for the given interface * * The objects are released altogether in snd_usb_endpoint_free_all() */ static struct snd_usb_iface_ref * iface_ref_find(struct snd_usb_audio *chip, int iface) { struct snd_usb_iface_ref *ip; list_for_each_entry(ip, &chip->iface_ref_list, list) if (ip->iface == iface) return ip; ip = kzalloc(sizeof(*ip), GFP_KERNEL); if (!ip) return NULL; ip->iface = iface; list_add_tail(&ip->list, &chip->iface_ref_list); return ip; } /* Similarly, a refcount object for clock */ static struct snd_usb_clock_ref * clock_ref_find(struct snd_usb_audio *chip, int clock) { struct snd_usb_clock_ref *ref; list_for_each_entry(ref, &chip->clock_ref_list, list) if (ref->clock == clock) return ref; ref = kzalloc(sizeof(*ref), GFP_KERNEL); if (!ref) return NULL; ref->clock = clock; atomic_set(&ref->locked, 0); list_add_tail(&ref->list, &chip->clock_ref_list); return ref; } /* * Get the existing endpoint object corresponding EP * Returns NULL if not present. */ struct snd_usb_endpoint * snd_usb_get_endpoint(struct snd_usb_audio *chip, int ep_num) { struct snd_usb_endpoint *ep; list_for_each_entry(ep, &chip->ep_list, list) { if (ep->ep_num == ep_num) return ep; } return NULL; } #define ep_type_name(type) \ (type == SND_USB_ENDPOINT_TYPE_DATA ? "data" : "sync") /** * snd_usb_add_endpoint: Add an endpoint to an USB audio chip * * @chip: The chip * @ep_num: The number of the endpoint to use * @type: SND_USB_ENDPOINT_TYPE_DATA or SND_USB_ENDPOINT_TYPE_SYNC * * If the requested endpoint has not been added to the given chip before, * a new instance is created. * * Returns zero on success or a negative error code. * * New endpoints will be added to chip->ep_list and freed by * calling snd_usb_endpoint_free_all(). * * For SND_USB_ENDPOINT_TYPE_SYNC, the caller needs to guarantee that * bNumEndpoints > 1 beforehand. */ int snd_usb_add_endpoint(struct snd_usb_audio *chip, int ep_num, int type) { struct snd_usb_endpoint *ep; bool is_playback; ep = snd_usb_get_endpoint(chip, ep_num); if (ep) return 0; usb_audio_dbg(chip, "Creating new %s endpoint #%x\n", ep_type_name(type), ep_num); ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (!ep) return -ENOMEM; ep->chip = chip; spin_lock_init(&ep->lock); ep->type = type; ep->ep_num = ep_num; INIT_LIST_HEAD(&ep->ready_playback_urbs); atomic_set(&ep->submitted_urbs, 0); is_playback = ((ep_num & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT); ep_num &= USB_ENDPOINT_NUMBER_MASK; if (is_playback) ep->pipe = usb_sndisocpipe(chip->dev, ep_num); else ep->pipe = usb_rcvisocpipe(chip->dev, ep_num); list_add_tail(&ep->list, &chip->ep_list); return 0; } /* Set up syncinterval and maxsyncsize for a sync EP */ static void endpoint_set_syncinterval(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep) { struct usb_host_interface *alts; struct usb_endpoint_descriptor *desc; alts = snd_usb_get_host_interface(chip, ep->iface, ep->altsetting); if (!alts) return; desc = get_endpoint(alts, ep->ep_idx); if (desc->bLength >= USB_DT_ENDPOINT_AUDIO_SIZE && desc->bRefresh >= 1 && desc->bRefresh <= 9) ep->syncinterval = desc->bRefresh; else if (snd_usb_get_speed(chip->dev) == USB_SPEED_FULL) ep->syncinterval = 1; else if (desc->bInterval >= 1 && desc->bInterval <= 16) ep->syncinterval = desc->bInterval - 1; else ep->syncinterval = 3; ep->syncmaxsize = le16_to_cpu(desc->wMaxPacketSize); } static bool endpoint_compatible(struct snd_usb_endpoint *ep, const struct audioformat *fp, const struct snd_pcm_hw_params *params) { if (!ep->opened) return false; if (ep->cur_audiofmt != fp) return false; if (ep->cur_rate != params_rate(params) || ep->cur_format != params_format(params) || ep->cur_period_frames != params_period_size(params) || ep->cur_buffer_periods != params_periods(params)) return false; return true; } /* * Check whether the given fp and hw params are compatible with the current * setup of the target EP for implicit feedback sync */ bool snd_usb_endpoint_compatible(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep, const struct audioformat *fp, const struct snd_pcm_hw_params *params) { bool ret; mutex_lock(&chip->mutex); ret = endpoint_compatible(ep, fp, params); mutex_unlock(&chip->mutex); return ret; } /* * snd_usb_endpoint_open: Open the endpoint * * Called from hw_params to assign the endpoint to the substream. * It's reference-counted, and only the first opener is allowed to set up * arbitrary parameters. The later opener must be compatible with the * former opened parameters. * The endpoint needs to be closed via snd_usb_endpoint_close() later. * * Note that this function doesn't configure the endpoint. The substream * needs to set it up later via snd_usb_endpoint_set_params() and * snd_usb_endpoint_prepare(). */ struct snd_usb_endpoint * snd_usb_endpoint_open(struct snd_usb_audio *chip, const struct audioformat *fp, const struct snd_pcm_hw_params *params, bool is_sync_ep, bool fixed_rate) { struct snd_usb_endpoint *ep; int ep_num = is_sync_ep ? fp->sync_ep : fp->endpoint; mutex_lock(&chip->mutex); ep = snd_usb_get_endpoint(chip, ep_num); if (!ep) { usb_audio_err(chip, "Cannot find EP 0x%x to open\n", ep_num); goto unlock; } if (!ep->opened) { if (is_sync_ep) { ep->iface = fp->sync_iface; ep->altsetting = fp->sync_altsetting; ep->ep_idx = fp->sync_ep_idx; } else { ep->iface = fp->iface; ep->altsetting = fp->altsetting; ep->ep_idx = fp->ep_idx; } usb_audio_dbg(chip, "Open EP 0x%x, iface=%d:%d, idx=%d\n", ep_num, ep->iface, ep->altsetting, ep->ep_idx); ep->iface_ref = iface_ref_find(chip, ep->iface); if (!ep->iface_ref) { ep = NULL; goto unlock; } if (fp->protocol != UAC_VERSION_1) { ep->clock_ref = clock_ref_find(chip, fp->clock); if (!ep->clock_ref) { ep = NULL; goto unlock; } ep->clock_ref->opened++; } ep->cur_audiofmt = fp; ep->cur_channels = fp->channels; ep->cur_rate = params_rate(params); ep->cur_format = params_format(params); ep->cur_frame_bytes = snd_pcm_format_physical_width(ep->cur_format) * ep->cur_channels / 8; ep->cur_period_frames = params_period_size(params); ep->cur_period_bytes = ep->cur_period_frames * ep->cur_frame_bytes; ep->cur_buffer_periods = params_periods(params); if (ep->type == SND_USB_ENDPOINT_TYPE_SYNC) endpoint_set_syncinterval(chip, ep); ep->implicit_fb_sync = fp->implicit_fb; ep->need_setup = true; ep->need_prepare = true; ep->fixed_rate = fixed_rate; usb_audio_dbg(chip, " channels=%d, rate=%d, format=%s, period_bytes=%d, periods=%d, implicit_fb=%d\n", ep->cur_channels, ep->cur_rate, snd_pcm_format_name(ep->cur_format), ep->cur_period_bytes, ep->cur_buffer_periods, ep->implicit_fb_sync); } else { if (WARN_ON(!ep->iface_ref)) { ep = NULL; goto unlock; } if (!endpoint_compatible(ep, fp, params)) { usb_audio_err(chip, "Incompatible EP setup for 0x%x\n", ep_num); ep = NULL; goto unlock; } usb_audio_dbg(chip, "Reopened EP 0x%x (count %d)\n", ep_num, ep->opened); } if (!ep->iface_ref->opened++) ep->iface_ref->need_setup = true; ep->opened++; unlock: mutex_unlock(&chip->mutex); return ep; } /* * snd_usb_endpoint_set_sync: Link data and sync endpoints * * Pass NULL to sync_ep to unlink again */ void snd_usb_endpoint_set_sync(struct snd_usb_audio *chip, struct snd_usb_endpoint *data_ep, struct snd_usb_endpoint *sync_ep) { data_ep->sync_source = sync_ep; } /* * Set data endpoint callbacks and the assigned data stream * * Called at PCM trigger and cleanups. * Pass NULL to deactivate each callback. */ void snd_usb_endpoint_set_callback(struct snd_usb_endpoint *ep, int (*prepare)(struct snd_usb_substream *subs, struct urb *urb, bool in_stream_lock), void (*retire)(struct snd_usb_substream *subs, struct urb *urb), struct snd_usb_substream *data_subs) { ep->prepare_data_urb = prepare; ep->retire_data_urb = retire; if (data_subs) ep->lowlatency_playback = data_subs->lowlatency_playback; else ep->lowlatency_playback = false; WRITE_ONCE(ep->data_subs, data_subs); } static int endpoint_set_interface(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep, bool set) { int altset = set ? ep->altsetting : 0; int err; if (ep->iface_ref->altset == altset) return 0; /* already disconnected? */ if (unlikely(atomic_read(&chip->shutdown))) return -ENODEV; usb_audio_dbg(chip, "Setting usb interface %d:%d for EP 0x%x\n", ep->iface, altset, ep->ep_num); err = usb_set_interface(chip->dev, ep->iface, altset); if (err < 0) { usb_audio_err_ratelimited( chip, "%d:%d: usb_set_interface failed (%d)\n", ep->iface, altset, err); return err; } if (chip->quirk_flags & QUIRK_FLAG_IFACE_DELAY) msleep(50); ep->iface_ref->altset = altset; return 0; } /* * snd_usb_endpoint_close: Close the endpoint * * Unreference the already opened endpoint via snd_usb_endpoint_open(). */ void snd_usb_endpoint_close(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep) { mutex_lock(&chip->mutex); usb_audio_dbg(chip, "Closing EP 0x%x (count %d)\n", ep->ep_num, ep->opened); if (!--ep->iface_ref->opened && !(chip->quirk_flags & QUIRK_FLAG_IFACE_SKIP_CLOSE)) endpoint_set_interface(chip, ep, false); if (!--ep->opened) { if (ep->clock_ref) { if (!--ep->clock_ref->opened) ep->clock_ref->rate = 0; } ep->iface = 0; ep->altsetting = 0; ep->cur_audiofmt = NULL; ep->cur_rate = 0; ep->iface_ref = NULL; ep->clock_ref = NULL; usb_audio_dbg(chip, "EP 0x%x closed\n", ep->ep_num); } mutex_unlock(&chip->mutex); } /* Prepare for suspening EP, called from the main suspend handler */ void snd_usb_endpoint_suspend(struct snd_usb_endpoint *ep) { ep->need_prepare = true; if (ep->iface_ref) ep->iface_ref->need_setup = true; if (ep->clock_ref) ep->clock_ref->rate = 0; } /* * wait until all urbs are processed. */ static int wait_clear_urbs(struct snd_usb_endpoint *ep) { unsigned long end_time = jiffies + msecs_to_jiffies(1000); int alive; if (atomic_read(&ep->state) != EP_STATE_STOPPING) return 0; do { alive = atomic_read(&ep->submitted_urbs); if (!alive) break; schedule_timeout_uninterruptible(1); } while (time_before(jiffies, end_time)); if (alive) usb_audio_err(ep->chip, "timeout: still %d active urbs on EP #%x\n", alive, ep->ep_num); if (ep_state_update(ep, EP_STATE_STOPPING, EP_STATE_STOPPED)) { ep->sync_sink = NULL; snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL); } return 0; } /* sync the pending stop operation; * this function itself doesn't trigger the stop operation */ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep) { if (ep) wait_clear_urbs(ep); } /* * Stop active urbs * * This function moves the EP to STOPPING state if it's being RUNNING. */ static int stop_urbs(struct snd_usb_endpoint *ep, bool force, bool keep_pending) { unsigned int i; unsigned long flags; if (!force && atomic_read(&ep->running)) return -EBUSY; if (!ep_state_update(ep, EP_STATE_RUNNING, EP_STATE_STOPPING)) return 0; spin_lock_irqsave(&ep->lock, flags); INIT_LIST_HEAD(&ep->ready_playback_urbs); ep->next_packet_head = 0; ep->next_packet_queued = 0; spin_unlock_irqrestore(&ep->lock, flags); if (keep_pending) return 0; for (i = 0; i < ep->nurbs; i++) { if (test_bit(i, &ep->active_mask)) { if (!test_and_set_bit(i, &ep->unlink_mask)) { struct urb *u = ep->urb[i].urb; usb_unlink_urb(u); } } } return 0; } /* * release an endpoint's urbs */ static int release_urbs(struct snd_usb_endpoint *ep, bool force) { int i, err; /* route incoming urbs to nirvana */ snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL); /* stop and unlink urbs */ err = stop_urbs(ep, force, false); if (err) return err; wait_clear_urbs(ep); for (i = 0; i < ep->nurbs; i++) release_urb_ctx(&ep->urb[i]); usb_free_coherent(ep->chip->dev, SYNC_URBS * 4, ep->syncbuf, ep->sync_dma); ep->syncbuf = NULL; ep->nurbs = 0; return 0; } /* * configure a data endpoint */ static int data_ep_set_params(struct snd_usb_endpoint *ep) { struct snd_usb_audio *chip = ep->chip; unsigned int maxsize, minsize, packs_per_ms, max_packs_per_urb; unsigned int max_packs_per_period, urbs_per_period, urb_packs; unsigned int max_urbs, i; const struct audioformat *fmt = ep->cur_audiofmt; int frame_bits = ep->cur_frame_bytes * 8; int tx_length_quirk = (has_tx_length_quirk(chip) && usb_pipeout(ep->pipe)); usb_audio_dbg(chip, "Setting params for data EP 0x%x, pipe 0x%x\n", ep->ep_num, ep->pipe); if (ep->cur_format == SNDRV_PCM_FORMAT_DSD_U16_LE && fmt->dsd_dop) { /* * When operating in DSD DOP mode, the size of a sample frame * in hardware differs from the actual physical format width * because we need to make room for the DOP markers. */ frame_bits += ep->cur_channels << 3; } ep->datainterval = fmt->datainterval; ep->stride = frame_bits >> 3; switch (ep->cur_format) { case SNDRV_PCM_FORMAT_U8: ep->silence_value = 0x80; break; case SNDRV_PCM_FORMAT_DSD_U8: case SNDRV_PCM_FORMAT_DSD_U16_LE: case SNDRV_PCM_FORMAT_DSD_U32_LE: case SNDRV_PCM_FORMAT_DSD_U16_BE: case SNDRV_PCM_FORMAT_DSD_U32_BE: ep->silence_value = 0x69; break; default: ep->silence_value = 0; } /* assume max. frequency is 50% higher than nominal */ ep->freqmax = ep->freqn + (ep->freqn >> 1); /* Round up freqmax to nearest integer in order to calculate maximum * packet size, which must represent a whole number of frames. * This is accomplished by adding 0x0.ffff before converting the * Q16.16 format into integer. * In order to accurately calculate the maximum packet size when * the data interval is more than 1 (i.e. ep->datainterval > 0), * multiply by the data interval prior to rounding. For instance, * a freqmax of 41 kHz will result in a max packet size of 6 (5.125) * frames with a data interval of 1, but 11 (10.25) frames with a * data interval of 2. * (ep->freqmax << ep->datainterval overflows at 8.192 MHz for the * maximum datainterval value of 3, at USB full speed, higher for * USB high speed, noting that ep->freqmax is in units of * frames per packet in Q16.16 format.) */ maxsize = (((ep->freqmax << ep->datainterval) + 0xffff) >> 16) * (frame_bits >> 3); if (tx_length_quirk) maxsize += sizeof(__le32); /* Space for length descriptor */ /* but wMaxPacketSize might reduce this */ if (ep->maxpacksize && ep->maxpacksize < maxsize) { /* whatever fits into a max. size packet */ unsigned int data_maxsize = maxsize = ep->maxpacksize; if (tx_length_quirk) /* Need to remove the length descriptor to calc freq */ data_maxsize -= sizeof(__le32); ep->freqmax = (data_maxsize / (frame_bits >> 3)) << (16 - ep->datainterval); } if (ep->fill_max) ep->curpacksize = ep->maxpacksize; else ep->curpacksize = maxsize; if (snd_usb_get_speed(chip->dev) != USB_SPEED_FULL) { packs_per_ms = 8 >> ep->datainterval; max_packs_per_urb = MAX_PACKS_HS; } else { packs_per_ms = 1; max_packs_per_urb = MAX_PACKS; } if (ep->sync_source && !ep->implicit_fb_sync) max_packs_per_urb = min(max_packs_per_urb, 1U << ep->sync_source->syncinterval); max_packs_per_urb = max(1u, max_packs_per_urb >> ep->datainterval); /* * Capture endpoints need to use small URBs because there's no way * to tell in advance where the next period will end, and we don't * want the next URB to complete much after the period ends. * * Playback endpoints with implicit sync much use the same parameters * as their corresponding capture endpoint. */ if (usb_pipein(ep->pipe) || ep->implicit_fb_sync) { /* make capture URBs <= 1 ms and smaller than a period */ urb_packs = min(max_packs_per_urb, packs_per_ms); while (urb_packs > 1 && urb_packs * maxsize >= ep->cur_period_bytes) urb_packs >>= 1; ep->nurbs = MAX_URBS; /* * Playback endpoints without implicit sync are adjusted so that * a period fits as evenly as possible in the smallest number of * URBs. The total number of URBs is adjusted to the size of the * ALSA buffer, subject to the MAX_URBS and MAX_QUEUE limits. */ } else { /* determine how small a packet can be */ minsize = (ep->freqn >> (16 - ep->datainterval)) * (frame_bits >> 3); /* with sync from device, assume it can be 12% lower */ if (ep->sync_source) minsize -= minsize >> 3; minsize = max(minsize, 1u); /* how many packets will contain an entire ALSA period? */ max_packs_per_period = DIV_ROUND_UP(ep->cur_period_bytes, minsize); /* how many URBs will contain a period? */ urbs_per_period = DIV_ROUND_UP(max_packs_per_period, max_packs_per_urb); /* how many packets are needed in each URB? */ urb_packs = DIV_ROUND_UP(max_packs_per_period, urbs_per_period); /* limit the number of frames in a single URB */ ep->max_urb_frames = DIV_ROUND_UP(ep->cur_period_frames, urbs_per_period); /* try to use enough URBs to contain an entire ALSA buffer */ max_urbs = min((unsigned) MAX_URBS, MAX_QUEUE * packs_per_ms / urb_packs); ep->nurbs = min(max_urbs, urbs_per_period * ep->cur_buffer_periods); } /* allocate and initialize data urbs */ for (i = 0; i < ep->nurbs; i++) { struct snd_urb_ctx *u = &ep->urb[i]; u->index = i; u->ep = ep; u->packets = urb_packs; u->buffer_size = maxsize * u->packets; if (fmt->fmt_type == UAC_FORMAT_TYPE_II) u->packets++; /* for transfer delimiter */ u->urb = usb_alloc_urb(u->packets, GFP_KERNEL); if (!u->urb) goto out_of_memory; u->urb->transfer_buffer = usb_alloc_coherent(chip->dev, u->buffer_size, GFP_KERNEL, &u->urb->transfer_dma); if (!u->urb->transfer_buffer) goto out_of_memory; u->urb->pipe = ep->pipe; u->urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; u->urb->interval = 1 << ep->datainterval; u->urb->context = u; u->urb->complete = snd_complete_urb; INIT_LIST_HEAD(&u->ready_list); } return 0; out_of_memory: release_urbs(ep, false); return -ENOMEM; } /* * configure a sync endpoint */ static int sync_ep_set_params(struct snd_usb_endpoint *ep) { struct snd_usb_audio *chip = ep->chip; int i; usb_audio_dbg(chip, "Setting params for sync EP 0x%x, pipe 0x%x\n", ep->ep_num, ep->pipe); ep->syncbuf = usb_alloc_coherent(chip->dev, SYNC_URBS * 4, GFP_KERNEL, &ep->sync_dma); if (!ep->syncbuf) return -ENOMEM; ep->nurbs = SYNC_URBS; for (i = 0; i < SYNC_URBS; i++) { struct snd_urb_ctx *u = &ep->urb[i]; u->index = i; u->ep = ep; u->packets = 1; u->urb = usb_alloc_urb(1, GFP_KERNEL); if (!u->urb) goto out_of_memory; u->urb->transfer_buffer = ep->syncbuf + i * 4; u->urb->transfer_dma = ep->sync_dma + i * 4; u->urb->transfer_buffer_length = 4; u->urb->pipe = ep->pipe; u->urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; u->urb->number_of_packets = 1; u->urb->interval = 1 << ep->syncinterval; u->urb->context = u; u->urb->complete = snd_complete_urb; } return 0; out_of_memory: release_urbs(ep, false); return -ENOMEM; } /* update the rate of the referred clock; return the actual rate */ static int update_clock_ref_rate(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep) { struct snd_usb_clock_ref *clock = ep->clock_ref; int rate = ep->cur_rate; if (!clock || clock->rate == rate) return rate; if (clock->rate) { if (atomic_read(&clock->locked)) return clock->rate; if (clock->rate != rate) { usb_audio_err(chip, "Mismatched sample rate %d vs %d for EP 0x%x\n", clock->rate, rate, ep->ep_num); return clock->rate; } } clock->rate = rate; clock->need_setup = true; return rate; } /* * snd_usb_endpoint_set_params: configure an snd_usb_endpoint * * It's called either from hw_params callback. * Determine the number of URBs to be used on this endpoint. * An endpoint must be configured before it can be started. * An endpoint that is already running can not be reconfigured. */ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep) { const struct audioformat *fmt = ep->cur_audiofmt; int err = 0; mutex_lock(&chip->mutex); if (!ep->need_setup) goto unlock; /* release old buffers, if any */ err = release_urbs(ep, false); if (err < 0) goto unlock; ep->datainterval = fmt->datainterval; ep->maxpacksize = fmt->maxpacksize; ep->fill_max = !!(fmt->attributes & UAC_EP_CS_ATTR_FILL_MAX); if (snd_usb_get_speed(chip->dev) == USB_SPEED_FULL) { ep->freqn = get_usb_full_speed_rate(ep->cur_rate); ep->pps = 1000 >> ep->datainterval; } else { ep->freqn = get_usb_high_speed_rate(ep->cur_rate); ep->pps = 8000 >> ep->datainterval; } ep->sample_rem = ep->cur_rate % ep->pps; ep->packsize[0] = ep->cur_rate / ep->pps; ep->packsize[1] = (ep->cur_rate + (ep->pps - 1)) / ep->pps; /* calculate the frequency in 16.16 format */ ep->freqm = ep->freqn; ep->freqshift = INT_MIN; ep->phase = 0; switch (ep->type) { case SND_USB_ENDPOINT_TYPE_DATA: err = data_ep_set_params(ep); break; case SND_USB_ENDPOINT_TYPE_SYNC: err = sync_ep_set_params(ep); break; default: err = -EINVAL; } usb_audio_dbg(chip, "Set up %d URBS, ret=%d\n", ep->nurbs, err); if (err < 0) goto unlock; /* some unit conversions in runtime */ ep->maxframesize = ep->maxpacksize / ep->cur_frame_bytes; ep->curframesize = ep->curpacksize / ep->cur_frame_bytes; err = update_clock_ref_rate(chip, ep); if (err >= 0) { ep->need_setup = false; err = 0; } unlock: mutex_unlock(&chip->mutex); return err; } static int init_sample_rate(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep) { struct snd_usb_clock_ref *clock = ep->clock_ref; int rate, err; rate = update_clock_ref_rate(chip, ep); if (rate < 0) return rate; if (clock && !clock->need_setup) return 0; if (!ep->fixed_rate) { err = snd_usb_init_sample_rate(chip, ep->cur_audiofmt, rate); if (err < 0) { if (clock) clock->rate = 0; /* reset rate */ return err; } } if (clock) clock->need_setup = false; return 0; } /* * snd_usb_endpoint_prepare: Prepare the endpoint * * This function sets up the EP to be fully usable state. * It's called either from prepare callback. * The function checks need_setup flag, and performs nothing unless needed, * so it's safe to call this multiple times. * * This returns zero if unchanged, 1 if the configuration has changed, * or a negative error code. */ int snd_usb_endpoint_prepare(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep) { bool iface_first; int err = 0; mutex_lock(&chip->mutex); if (WARN_ON(!ep->iface_ref)) goto unlock; if (!ep->need_prepare) goto unlock; /* If the interface has been already set up, just set EP parameters */ if (!ep->iface_ref->need_setup) { /* sample rate setup of UAC1 is per endpoint, and we need * to update at each EP configuration */ if (ep->cur_audiofmt->protocol == UAC_VERSION_1) { err = init_sample_rate(chip, ep); if (err < 0) goto unlock; } goto done; } /* Need to deselect altsetting at first */ endpoint_set_interface(chip, ep, false); /* Some UAC1 devices (e.g. Yamaha THR10) need the host interface * to be set up before parameter setups */ iface_first = ep->cur_audiofmt->protocol == UAC_VERSION_1; /* Workaround for devices that require the interface setup at first like UAC1 */ if (chip->quirk_flags & QUIRK_FLAG_SET_IFACE_FIRST) iface_first = true; if (iface_first) { err = endpoint_set_interface(chip, ep, true); if (err < 0) goto unlock; } err = snd_usb_init_pitch(chip, ep->cur_audiofmt); if (err < 0) goto unlock; err = init_sample_rate(chip, ep); if (err < 0) goto unlock; err = snd_usb_select_mode_quirk(chip, ep->cur_audiofmt); if (err < 0) goto unlock; /* for UAC2/3, enable the interface altset here at last */ if (!iface_first) { err = endpoint_set_interface(chip, ep, true); if (err < 0) goto unlock; } ep->iface_ref->need_setup = false; done: ep->need_prepare = false; err = 1; unlock: mutex_unlock(&chip->mutex); return err; } /* get the current rate set to the given clock by any endpoint */ int snd_usb_endpoint_get_clock_rate(struct snd_usb_audio *chip, int clock) { struct snd_usb_clock_ref *ref; int rate = 0; if (!clock) return 0; mutex_lock(&chip->mutex); list_for_each_entry(ref, &chip->clock_ref_list, list) { if (ref->clock == clock) { rate = ref->rate; break; } } mutex_unlock(&chip->mutex); return rate; } /** * snd_usb_endpoint_start: start an snd_usb_endpoint * * @ep: the endpoint to start * * A call to this function will increment the running count of the endpoint. * In case it is not already running, the URBs for this endpoint will be * submitted. Otherwise, this function does nothing. * * Must be balanced to calls of snd_usb_endpoint_stop(). * * Returns an error if the URB submission failed, 0 in all other cases. */ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) { bool is_playback = usb_pipeout(ep->pipe); int err; unsigned int i; if (atomic_read(&ep->chip->shutdown)) return -EBADFD; if (ep->sync_source) WRITE_ONCE(ep->sync_source->sync_sink, ep); usb_audio_dbg(ep->chip, "Starting %s EP 0x%x (running %d)\n", ep_type_name(ep->type), ep->ep_num, atomic_read(&ep->running)); /* already running? */ if (atomic_inc_return(&ep->running) != 1) return 0; if (ep->clock_ref) atomic_inc(&ep->clock_ref->locked); ep->active_mask = 0; ep->unlink_mask = 0; ep->phase = 0; ep->sample_accum = 0; snd_usb_endpoint_start_quirk(ep); /* * If this endpoint has a data endpoint as implicit feedback source, * don't start the urbs here. Instead, mark them all as available, * wait for the record urbs to return and queue the playback urbs * from that context. */ if (!ep_state_update(ep, EP_STATE_STOPPED, EP_STATE_RUNNING)) goto __error; if (snd_usb_endpoint_implicit_feedback_sink(ep) && !(ep->chip->quirk_flags & QUIRK_FLAG_PLAYBACK_FIRST)) { usb_audio_dbg(ep->chip, "No URB submission due to implicit fb sync\n"); i = 0; goto fill_rest; } for (i = 0; i < ep->nurbs; i++) { struct urb *urb = ep->urb[i].urb; if (snd_BUG_ON(!urb)) goto __error; if (is_playback) err = prepare_outbound_urb(ep, urb->context, true); else err = prepare_inbound_urb(ep, urb->context); if (err < 0) { /* stop filling at applptr */ if (err == -EAGAIN) break; usb_audio_dbg(ep->chip, "EP 0x%x: failed to prepare urb: %d\n", ep->ep_num, err); goto __error; } if (!atomic_read(&ep->chip->shutdown)) err = usb_submit_urb(urb, GFP_ATOMIC); else err = -ENODEV; if (err < 0) { if (!atomic_read(&ep->chip->shutdown)) usb_audio_err(ep->chip, "cannot submit urb %d, error %d: %s\n", i, err, usb_error_string(err)); goto __error; } set_bit(i, &ep->active_mask); atomic_inc(&ep->submitted_urbs); } if (!i) { usb_audio_dbg(ep->chip, "XRUN at starting EP 0x%x\n", ep->ep_num); goto __error; } usb_audio_dbg(ep->chip, "%d URBs submitted for EP 0x%x\n", i, ep->ep_num); fill_rest: /* put the remaining URBs to ready list */ if (is_playback) { for (; i < ep->nurbs; i++) push_back_to_ready_list(ep, ep->urb + i); } return 0; __error: snd_usb_endpoint_stop(ep, false); return -EPIPE; } /** * snd_usb_endpoint_stop: stop an snd_usb_endpoint * * @ep: the endpoint to stop (may be NULL) * @keep_pending: keep in-flight URBs * * A call to this function will decrement the running count of the endpoint. * In case the last user has requested the endpoint stop, the URBs will * actually be deactivated. * * Must be balanced to calls of snd_usb_endpoint_start(). * * The caller needs to synchronize the pending stop operation via * snd_usb_endpoint_sync_pending_stop(). */ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep, bool keep_pending) { if (!ep) return; usb_audio_dbg(ep->chip, "Stopping %s EP 0x%x (running %d)\n", ep_type_name(ep->type), ep->ep_num, atomic_read(&ep->running)); if (snd_BUG_ON(!atomic_read(&ep->running))) return; if (!atomic_dec_return(&ep->running)) { if (ep->sync_source) WRITE_ONCE(ep->sync_source->sync_sink, NULL); stop_urbs(ep, false, keep_pending); if (ep->clock_ref) atomic_dec(&ep->clock_ref->locked); if (ep->chip->quirk_flags & QUIRK_FLAG_FORCE_IFACE_RESET && usb_pipeout(ep->pipe)) { ep->need_prepare = true; if (ep->iface_ref) ep->iface_ref->need_setup = true; } } } /** * snd_usb_endpoint_release: Tear down an snd_usb_endpoint * * @ep: the endpoint to release * * This function does not care for the endpoint's running count but will tear * down all the streaming URBs immediately. */ void snd_usb_endpoint_release(struct snd_usb_endpoint *ep) { release_urbs(ep, true); } /** * snd_usb_endpoint_free_all: Free the resources of an snd_usb_endpoint * @chip: The chip * * This free all endpoints and those resources */ void snd_usb_endpoint_free_all(struct snd_usb_audio *chip) { struct snd_usb_endpoint *ep, *en; struct snd_usb_iface_ref *ip, *in; struct snd_usb_clock_ref *cp, *cn; list_for_each_entry_safe(ep, en, &chip->ep_list, list) kfree(ep); list_for_each_entry_safe(ip, in, &chip->iface_ref_list, list) kfree(ip); list_for_each_entry_safe(cp, cn, &chip->clock_ref_list, list) kfree(cp); } /* * snd_usb_handle_sync_urb: parse an USB sync packet * * @ep: the endpoint to handle the packet * @sender: the sending endpoint * @urb: the received packet * * This function is called from the context of an endpoint that received * the packet and is used to let another endpoint object handle the payload. */ static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep, struct snd_usb_endpoint *sender, const struct urb *urb) { int shift; unsigned int f; unsigned long flags; snd_BUG_ON(ep == sender); /* * In case the endpoint is operating in implicit feedback mode, prepare * a new outbound URB that has the same layout as the received packet * and add it to the list of pending urbs. queue_pending_output_urbs() * will take care of them later. */ if (snd_usb_endpoint_implicit_feedback_sink(ep) && atomic_read(&ep->running)) { /* implicit feedback case */ int i, bytes = 0; struct snd_urb_ctx *in_ctx; struct snd_usb_packet_info *out_packet; in_ctx = urb->context; /* Count overall packet size */ for (i = 0; i < in_ctx->packets; i++) if (urb->iso_frame_desc[i].status == 0) bytes += urb->iso_frame_desc[i].actual_length; /* * skip empty packets. At least M-Audio's Fast Track Ultra stops * streaming once it received a 0-byte OUT URB */ if (bytes == 0) return; spin_lock_irqsave(&ep->lock, flags); if (ep->next_packet_queued >= ARRAY_SIZE(ep->next_packet)) { spin_unlock_irqrestore(&ep->lock, flags); usb_audio_err(ep->chip, "next package FIFO overflow EP 0x%x\n", ep->ep_num); notify_xrun(ep); return; } out_packet = next_packet_fifo_enqueue(ep); /* * Iterate through the inbound packet and prepare the lengths * for the output packet. The OUT packet we are about to send * will have the same amount of payload bytes per stride as the * IN packet we just received. Since the actual size is scaled * by the stride, use the sender stride to calculate the length * in case the number of channels differ between the implicitly * fed-back endpoint and the synchronizing endpoint. */ out_packet->packets = in_ctx->packets; for (i = 0; i < in_ctx->packets; i++) { if (urb->iso_frame_desc[i].status == 0) out_packet->packet_size[i] = urb->iso_frame_desc[i].actual_length / sender->stride; else out_packet->packet_size[i] = 0; } spin_unlock_irqrestore(&ep->lock, flags); snd_usb_queue_pending_output_urbs(ep, false); return; } /* * process after playback sync complete * * Full speed devices report feedback values in 10.14 format as samples * per frame, high speed devices in 16.16 format as samples per * microframe. * * Because the Audio Class 1 spec was written before USB 2.0, many high * speed devices use a wrong interpretation, some others use an * entirely different format. * * Therefore, we cannot predict what format any particular device uses * and must detect it automatically. */ if (urb->iso_frame_desc[0].status != 0 || urb->iso_frame_desc[0].actual_length < 3) return; f = le32_to_cpup(urb->transfer_buffer); if (urb->iso_frame_desc[0].actual_length == 3) f &= 0x00ffffff; else f &= 0x0fffffff; if (f == 0) return; if (unlikely(sender->tenor_fb_quirk)) { /* * Devices based on Tenor 8802 chipsets (TEAC UD-H01 * and others) sometimes change the feedback value * by +/- 0x1.0000. */ if (f < ep->freqn - 0x8000) f += 0xf000; else if (f > ep->freqn + 0x8000) f -= 0xf000; } else if (unlikely(ep->freqshift == INT_MIN)) { /* * The first time we see a feedback value, determine its format * by shifting it left or right until it matches the nominal * frequency value. This assumes that the feedback does not * differ from the nominal value more than +50% or -25%. */ shift = 0; while (f < ep->freqn - ep->freqn / 4) { f <<= 1; shift++; } while (f > ep->freqn + ep->freqn / 2) { f >>= 1; shift--; } ep->freqshift = shift; } else if (ep->freqshift >= 0) f <<= ep->freqshift; else f >>= -ep->freqshift; if (likely(f >= ep->freqn - ep->freqn / 8 && f <= ep->freqmax)) { /* * If the frequency looks valid, set it. * This value is referred to in prepare_playback_urb(). */ spin_lock_irqsave(&ep->lock, flags); ep->freqm = f; spin_unlock_irqrestore(&ep->lock, flags); } else { /* * Out of range; maybe the shift value is wrong. * Reset it so that we autodetect again the next time. */ ep->freqshift = INT_MIN; } }
3 3 1 1 1 3 3 1 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. * Copyright (C) 2018 - 2024 Intel Corporation * Authors: Luis Carlos Cobo <luisca@cozybit.com> * Javier Cardona <javier@cozybit.com> */ #include <linux/slab.h> #include <linux/unaligned.h> #include "ieee80211_i.h" #include "mesh.h" #include "wme.h" #include "driver-ops.h" static int mesh_allocated; static struct kmem_cache *rm_cache; bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) { return (mgmt->u.action.u.mesh_action.action_code == WLAN_MESH_ACTION_HWMP_PATH_SELECTION); } void ieee80211s_init(void) { mesh_allocated = 1; rm_cache = kmem_cache_create("mesh_rmc", sizeof(struct rmc_entry), 0, 0, NULL); } void ieee80211s_stop(void) { if (!mesh_allocated) return; kmem_cache_destroy(rm_cache); } static void ieee80211_mesh_housekeeping_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.mesh.housekeeping_timer); struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); wiphy_work_queue(local->hw.wiphy, &sdata->work); } /** * mesh_matches_local - check if the config of a mesh point matches ours * * @sdata: local mesh subif * @ie: information elements of a management frame from the mesh peer * * This function checks if the mesh configuration of a mesh point matches the * local mesh configuration, i.e. if both nodes belong to the same mesh network. * * Returns: %true if both nodes belong to the same mesh */ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *ie) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 basic_rates = 0; struct cfg80211_chan_def sta_chan_def; struct ieee80211_supported_band *sband; u32 vht_cap_info = 0; /* * As support for each feature is added, check for matching * - On mesh config capabilities * - Power Save Support En * - Sync support enabled * - Sync support active * - Sync support required from peer * - MDA enabled * - Power management control on fc */ if (!(ifmsh->mesh_id_len == ie->mesh_id_len && memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && (ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) && (ifmsh->mesh_pm_id == ie->mesh_config->meshconf_pmetric) && (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) && (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) && (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))) return false; sband = ieee80211_get_sband(sdata); if (!sband) return false; ieee80211_sta_get_rates(sdata, ie, sband->band, &basic_rates); if (sdata->vif.bss_conf.basic_rates != basic_rates) return false; cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chanreq.oper.chan, NL80211_CHAN_NO_HT); ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def); if (ie->vht_cap_elem) vht_cap_info = le32_to_cpu(ie->vht_cap_elem->vht_cap_info); ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, ie->vht_operation, ie->ht_operation, &sta_chan_def); ieee80211_chandef_he_6ghz_oper(sdata->local, ie->he_operation, ie->eht_operation, &sta_chan_def); if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chanreq.oper, &sta_chan_def)) return false; return true; } /** * mesh_peer_accepts_plinks - check if an mp is willing to establish peer links * * @ie: information elements of a management frame from the mesh peer * * Returns: %true if the mesh peer is willing to establish peer links */ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { return (ie->mesh_config->meshconf_cap & IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; } /** * mesh_accept_plinks_update - update accepting_plink in local mesh beacons * * @sdata: mesh interface in which mesh beacons are going to be updated * * Returns: beacon changed flag if the beacon content changed. */ u64 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) { bool free_plinks; u64 changed = 0; /* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0, * the mesh interface might be able to establish plinks with peers that * are already on the table but are not on PLINK_ESTAB state. However, * in general the mesh interface is not accepting peer link requests * from new peers, and that must be reflected in the beacon */ free_plinks = mesh_plink_availables(sdata); if (free_plinks != sdata->u.mesh.accepting_plinks) { sdata->u.mesh.accepting_plinks = free_plinks; changed = BSS_CHANGED_BEACON; } return changed; } /* * mesh_sta_cleanup - clean up any mesh sta state * * @sta: mesh sta to clean up. */ void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; u64 changed = mesh_plink_deactivate(sta); if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) { int i; sdata->u.mesh.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL); if (!sdata->u.mesh.rmc) return -ENOMEM; sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1; for (i = 0; i < RMC_BUCKETS; i++) INIT_HLIST_HEAD(&sdata->u.mesh.rmc->bucket[i]); return 0; } void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) { struct mesh_rmc *rmc = sdata->u.mesh.rmc; struct rmc_entry *p; struct hlist_node *n; int i; if (!sdata->u.mesh.rmc) return; for (i = 0; i < RMC_BUCKETS; i++) { hlist_for_each_entry_safe(p, n, &rmc->bucket[i], list) { hlist_del(&p->list); kmem_cache_free(rm_cache, p); } } kfree(rmc); sdata->u.mesh.rmc = NULL; } /** * mesh_rmc_check - Check frame in recent multicast cache and add if absent. * * @sdata: interface * @sa: source address * @mesh_hdr: mesh_header * * Returns: 0 if the frame is not in the cache, nonzero otherwise. * * Checks using the source address and the mesh sequence number if we have * received this frame lately. If the frame is not in the cache, it is added to * it. */ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, const u8 *sa, struct ieee80211s_hdr *mesh_hdr) { struct mesh_rmc *rmc = sdata->u.mesh.rmc; u32 seqnum = 0; int entries = 0; u8 idx; struct rmc_entry *p; struct hlist_node *n; if (!rmc) return -1; /* Don't care about endianness since only match matters */ memcpy(&seqnum, &mesh_hdr->seqnum, sizeof(mesh_hdr->seqnum)); idx = le32_to_cpu(mesh_hdr->seqnum) & rmc->idx_mask; hlist_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { ++entries; if (time_after(jiffies, p->exp_time) || entries == RMC_QUEUE_MAX_LEN) { hlist_del(&p->list); kmem_cache_free(rm_cache, p); --entries; } else if ((seqnum == p->seqnum) && ether_addr_equal(sa, p->sa)) return -1; } p = kmem_cache_alloc(rm_cache, GFP_ATOMIC); if (!p) return 0; p->seqnum = seqnum; p->exp_time = jiffies + RMC_TIMEOUT; memcpy(p->sa, sa, ETH_ALEN); hlist_add_head(&p->list, &rmc->bucket[idx]); return 0; } int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos, neighbors; u8 meshconf_len = sizeof(struct ieee80211_meshconf_ie); bool is_connected_to_gate = ifmsh->num_gates > 0 || ifmsh->mshcfg.dot11MeshGateAnnouncementProtocol || ifmsh->mshcfg.dot11MeshConnectedToMeshGate; bool is_connected_to_as = ifmsh->mshcfg.dot11MeshConnectedToAuthServer; if (skb_tailroom(skb) < 2 + meshconf_len) return -ENOMEM; pos = skb_put(skb, 2 + meshconf_len); *pos++ = WLAN_EID_MESH_CONFIG; *pos++ = meshconf_len; /* save a pointer for quick updates in pre-tbtt */ ifmsh->meshconf_offset = pos - skb->data; /* Active path selection protocol ID */ *pos++ = ifmsh->mesh_pp_id; /* Active path selection metric ID */ *pos++ = ifmsh->mesh_pm_id; /* Congestion control mode identifier */ *pos++ = ifmsh->mesh_cc_id; /* Synchronization protocol identifier */ *pos++ = ifmsh->mesh_sp_id; /* Authentication Protocol identifier */ *pos++ = ifmsh->mesh_auth_id; /* Mesh Formation Info - number of neighbors */ neighbors = atomic_read(&ifmsh->estab_plinks); neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS); *pos++ = (is_connected_to_as << 7) | (neighbors << 1) | is_connected_to_gate; /* Mesh capability */ *pos = 0x00; *pos |= ifmsh->mshcfg.dot11MeshForwarding ? IEEE80211_MESHCONF_CAPAB_FORWARDING : 0x00; *pos |= ifmsh->accepting_plinks ? IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ *pos |= ifmsh->ps_peers_deep_sleep ? IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00; return 0; } int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos; if (skb_tailroom(skb) < 2 + ifmsh->mesh_id_len) return -ENOMEM; pos = skb_put(skb, 2 + ifmsh->mesh_id_len); *pos++ = WLAN_EID_MESH_ID; *pos++ = ifmsh->mesh_id_len; if (ifmsh->mesh_id_len) memcpy(pos, ifmsh->mesh_id, ifmsh->mesh_id_len); return 0; } static int mesh_add_awake_window_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos; /* see IEEE802.11-2012 13.14.6 */ if (ifmsh->ps_peers_light_sleep == 0 && ifmsh->ps_peers_deep_sleep == 0 && ifmsh->nonpeer_pm == NL80211_MESH_POWER_ACTIVE) return 0; if (skb_tailroom(skb) < 4) return -ENOMEM; pos = skb_put(skb, 2 + 2); *pos++ = WLAN_EID_MESH_AWAKE_WINDOW; *pos++ = 2; put_unaligned_le16(ifmsh->mshcfg.dot11MeshAwakeWindowDuration, pos); return 0; } int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 offset, len; const u8 *data; if (!ifmsh->ie || !ifmsh->ie_len) return 0; /* fast-forward to vendor IEs */ offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0); if (offset < ifmsh->ie_len) { len = ifmsh->ie_len - offset; data = ifmsh->ie + offset; if (skb_tailroom(skb) < len) return -ENOMEM; skb_put_data(skb, data, len); } return 0; } int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 len = 0; const u8 *data; if (!ifmsh->ie || !ifmsh->ie_len) return 0; /* find RSN IE */ data = cfg80211_find_ie(WLAN_EID_RSN, ifmsh->ie, ifmsh->ie_len); if (!data) return 0; len = data[1] + 2; if (skb_tailroom(skb) < len) return -ENOMEM; skb_put_data(skb, data, len); return 0; } static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; u8 *pos; if (skb_tailroom(skb) < 3) return -ENOMEM; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return -EINVAL; } chan = chanctx_conf->def.chan; rcu_read_unlock(); pos = skb_put(skb, 2 + 1); *pos++ = WLAN_EID_DS_PARAMS; *pos++ = 1; *pos++ = ieee80211_frequency_to_channel(chan->center_freq); return 0; } int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_supported_band *sband; u8 *pos; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; /* HT not allowed in 6 GHz */ if (sband->band == NL80211_BAND_6GHZ) return 0; if (!sband->ht_cap.ht_supported || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) return -ENOMEM; pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_cap)); ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, sband->ht_cap.cap); return 0; } int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *channel; struct ieee80211_supported_band *sband; struct ieee80211_sta_ht_cap *ht_cap; u8 *pos; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return -EINVAL; } channel = chanctx_conf->def.chan; rcu_read_unlock(); sband = local->hw.wiphy->bands[channel->band]; ht_cap = &sband->ht_cap; /* HT not allowed in 6 GHz */ if (sband->band == NL80211_BAND_6GHZ) return 0; if (!ht_cap->ht_supported || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation)) return -ENOMEM; pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chanreq.oper, sdata->vif.bss_conf.ht_operation_mode, false); return 0; } int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_supported_band *sband; u8 *pos; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; /* VHT not allowed in 6 GHz */ if (sband->band == NL80211_BAND_6GHZ) return 0; if (!sband->vht_cap.vht_supported || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap)) return -ENOMEM; pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_cap)); ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, sband->vht_cap.cap); return 0; } int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *channel; struct ieee80211_supported_band *sband; struct ieee80211_sta_vht_cap *vht_cap; u8 *pos; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return -EINVAL; } channel = chanctx_conf->def.chan; rcu_read_unlock(); sband = local->hw.wiphy->bands[channel->band]; vht_cap = &sband->vht_cap; /* VHT not allowed in 6 GHz */ if (sband->band == NL80211_BAND_6GHZ) return 0; if (!vht_cap->vht_supported || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_operation)) return -ENOMEM; pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation)); ieee80211_ie_build_vht_oper(pos, vht_cap, &sdata->vif.bss_conf.chanreq.oper); return 0; } int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u8 ie_len) { struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; return ieee80211_put_he_cap(skb, sdata, sband, NULL); } int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_supported_band *sband; u32 len; u8 *pos; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); if (!he_cap || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; len = 2 + 1 + sizeof(struct ieee80211_he_operation); if (sdata->vif.bss_conf.chanreq.oper.chan->band == NL80211_BAND_6GHZ) len += sizeof(struct ieee80211_he_6ghz_oper); if (skb_tailroom(skb) < len) return -ENOMEM; pos = skb_put(skb, len); ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chanreq.oper); return 0; } int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_supported_band *sband; const struct ieee80211_sband_iftype_data *iftd; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; iftd = ieee80211_get_sband_iftype_data(sband, NL80211_IFTYPE_MESH_POINT); /* The device doesn't support HE in mesh mode or at all */ if (!iftd) return 0; ieee80211_put_he_6ghz_cap(skb, sdata, sdata->deflink.smps_mode); return 0; } int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u8 ie_len) { struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; return ieee80211_put_eht_cap(skb, sdata, sband, NULL); } int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { const struct ieee80211_sta_eht_cap *eht_cap; struct ieee80211_supported_band *sband; u32 len; u8 *pos; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); if (!eht_cap || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; len = 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) + offsetof(struct ieee80211_eht_operation_info, optional); if (skb_tailroom(skb) < len) return -ENOMEM; pos = skb_put(skb, len); ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chanreq.oper, eht_cap); return 0; } static void ieee80211_mesh_path_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.mesh.mesh_path_timer); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } static void ieee80211_mesh_path_root_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = from_timer(sdata, t, u.mesh.mesh_path_root_timer); struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) { if (ifmsh->mshcfg.dot11MeshHWMPRootMode > IEEE80211_ROOTMODE_ROOT) set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); else { clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); /* stop running timer */ del_timer_sync(&ifmsh->mesh_path_root_timer); } } static void ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, u8 *ie, u8 ie_len) { struct ieee80211_supported_band *sband; const struct element *cap; const struct ieee80211_he_operation *he_oper = NULL; sband = ieee80211_get_sband(sdata); if (!sband) return; if (!ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT) || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return; sdata->vif.bss_conf.he_support = true; cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); if (cap && cap->datalen >= 1 + sizeof(*he_oper) && cap->datalen >= 1 + ieee80211_he_oper_size(cap->data + 1)) he_oper = (void *)(cap->data + 1); if (he_oper) sdata->vif.bss_conf.he_oper.params = __le32_to_cpu(he_oper->he_oper_params); sdata->vif.bss_conf.eht_support = !!ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); } bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 ctrl_flags) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_mesh_fast_tx_key key = { .type = MESH_FAST_TX_TYPE_LOCAL }; struct ieee80211_mesh_fast_tx *entry; struct ieee80211s_hdr *meshhdr; u8 sa[ETH_ALEN] __aligned(2); struct tid_ampdu_tx *tid_tx; struct sta_info *sta; bool copy_sa = false; u16 ethertype; u8 tid; if (ctrl_flags & IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP) return false; if (ifmsh->mshcfg.dot11MeshNolearn) return false; /* Add support for these cases later */ if (ifmsh->ps_peers_light_sleep || ifmsh->ps_peers_deep_sleep) return false; if (is_multicast_ether_addr(skb->data)) return false; ethertype = (skb->data[12] << 8) | skb->data[13]; if (ethertype < ETH_P_802_3_MIN) return false; if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) return false; if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb_checksum_start_offset(skb)); if (skb_checksum_help(skb)) return false; } ether_addr_copy(key.addr, skb->data); if (!ether_addr_equal(skb->data + ETH_ALEN, sdata->vif.addr)) key.type = MESH_FAST_TX_TYPE_PROXIED; entry = mesh_fast_tx_get(sdata, &key); if (!entry) return false; if (skb_headroom(skb) < entry->hdrlen + entry->fast_tx.hdr_len) return false; sta = rcu_dereference(entry->mpath->next_hop); if (!sta) return false; tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); if (tid_tx) { if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) return false; if (tid_tx->timeout) tid_tx->last_tx = jiffies; } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return true; skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, sta, skb)); meshhdr = (struct ieee80211s_hdr *)entry->hdr; if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) { /* preserve SA from eth header for 6-addr frames */ ether_addr_copy(sa, skb->data + ETH_ALEN); copy_sa = true; } memcpy(skb_push(skb, entry->hdrlen - 2 * ETH_ALEN), entry->hdr, entry->hdrlen); meshhdr = (struct ieee80211s_hdr *)skb->data; put_unaligned_le32(atomic_inc_return(&sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; if (copy_sa) ether_addr_copy(meshhdr->eaddr2, sa); skb_push(skb, 2 * ETH_ALEN); __ieee80211_xmit_fast(sdata, sta, &entry->fast_tx, skb, tid_tx, entry->mpath->dst, sdata->vif.addr); return true; } /** * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame * @hdr: 802.11 frame header * @fc: frame control field * @meshda: destination address in the mesh * @meshsa: source address in the mesh. Same as TA, as frame is * locally originated. * * Returns: the length of the 802.11 frame header (excludes mesh control header) */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, const u8 *meshda, const u8 *meshsa) { if (is_multicast_ether_addr(meshda)) { *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA TA SA */ memcpy(hdr->addr1, meshda, ETH_ALEN); memcpy(hdr->addr2, meshsa, ETH_ALEN); memcpy(hdr->addr3, meshsa, ETH_ALEN); return 24; } else { *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ eth_zero_addr(hdr->addr1); /* RA is resolved later */ memcpy(hdr->addr2, meshsa, ETH_ALEN); memcpy(hdr->addr3, meshda, ETH_ALEN); memcpy(hdr->addr4, meshsa, ETH_ALEN); return 30; } } /** * ieee80211_new_mesh_header - create a new mesh header * @sdata: mesh interface to be used * @meshhdr: uninitialized mesh header * @addr4or5: 1st address in the ae header, which may correspond to address 4 * (if addr6 is NULL) or address 5 (if addr6 is present). It may * be NULL. * @addr6: 2nd address in the ae header, which corresponds to addr6 of the * mesh frame * * Returns: the header length */ unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, struct ieee80211s_hdr *meshhdr, const char *addr4or5, const char *addr6) { if (WARN_ON(!addr4or5 && addr6)) return 0; memset(meshhdr, 0, sizeof(*meshhdr)); meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; put_unaligned_le32(atomic_inc_return(&sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); if (addr4or5 && !addr6) { meshhdr->flags |= MESH_FLAGS_AE_A4; memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); return 2 * ETH_ALEN; } else if (addr4or5 && addr6) { meshhdr->flags |= MESH_FLAGS_AE_A5_A6; memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); memcpy(meshhdr->eaddr2, addr6, ETH_ALEN); return 3 * ETH_ALEN; } return ETH_ALEN; } static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u64 changed; if (ifmsh->mshcfg.plink_timeout > 0) ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); ieee80211_mbss_info_change_notify(sdata, changed); mesh_fast_tx_gc(sdata); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); } static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 interval; mesh_path_tx_root_frame(sdata); if (ifmsh->mshcfg.dot11MeshHWMPRootMode == IEEE80211_PROACTIVE_RANN) interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval; else interval = ifmsh->mshcfg.dot11MeshHWMProotInterval; mod_timer(&ifmsh->mesh_path_root_timer, round_jiffies(TU_TO_EXP_TIME(interval))); } static int ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) { struct beacon_data *bcn; int head_len, tail_len; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; struct mesh_csa_settings *csa; const struct ieee80211_supported_band *sband; u8 ie_len_he_cap, ie_len_eht_cap; u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); u32 rate_flags; sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); sband = ieee80211_get_sband(sdata); rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); ie_len_he_cap = ieee80211_ie_len_he_cap(sdata); ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata); head_len = hdr_len + 2 + /* NULL SSID */ /* Channel Switch Announcement */ 2 + sizeof(struct ieee80211_channel_sw_ie) + /* Mesh Channel Switch Parameters */ 2 + sizeof(struct ieee80211_mesh_chansw_params_ie) + /* Channel Switch Wrapper + Wide Bandwidth CSA IE */ 2 + 2 + sizeof(struct ieee80211_wide_bw_chansw_ie) + 2 + sizeof(struct ieee80211_sec_chan_offs_ie) + 2 + 8 + /* supported rates */ 2 + 3; /* DS params */ tail_len = 2 + (IEEE80211_MAX_SUPP_RATES - 8) + 2 + sizeof(struct ieee80211_ht_cap) + 2 + sizeof(struct ieee80211_ht_operation) + 2 + ifmsh->mesh_id_len + 2 + sizeof(struct ieee80211_meshconf_ie) + 2 + sizeof(__le16) + /* awake window */ 2 + sizeof(struct ieee80211_vht_cap) + 2 + sizeof(struct ieee80211_vht_operation) + ie_len_he_cap + 2 + 1 + sizeof(struct ieee80211_he_operation) + sizeof(struct ieee80211_he_6ghz_oper) + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + ie_len_eht_cap + 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) + offsetof(struct ieee80211_eht_operation_info, optional) + ifmsh->ie_len; bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); /* need an skb for IE builders to operate on */ skb = __dev_alloc_skb(max(head_len, tail_len), GFP_KERNEL); if (!bcn || !skb) goto out_free; /* * pointers go into the block we allocated, * memory is | beacon_data | head | tail | */ bcn->head = ((u8 *) bcn) + sizeof(*bcn); /* fill in the head */ mgmt = skb_put_zero(skb, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); eth_broadcast_addr(mgmt->da); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); ieee80211_mps_set_frame_flags(sdata, NULL, (void *) mgmt); mgmt->u.beacon.beacon_int = cpu_to_le16(sdata->vif.bss_conf.beacon_int); mgmt->u.beacon.capab_info |= cpu_to_le16( sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0); pos = skb_put(skb, 2); *pos++ = WLAN_EID_SSID; *pos++ = 0x0; rcu_read_lock(); csa = rcu_dereference(ifmsh->csa); if (csa) { enum nl80211_channel_type ct; struct cfg80211_chan_def *chandef; int ie_len = 2 + sizeof(struct ieee80211_channel_sw_ie) + 2 + sizeof(struct ieee80211_mesh_chansw_params_ie); pos = skb_put_zero(skb, ie_len); *pos++ = WLAN_EID_CHANNEL_SWITCH; *pos++ = 3; *pos++ = 0x0; *pos++ = ieee80211_frequency_to_channel( csa->settings.chandef.chan->center_freq); bcn->cntdwn_current_counter = csa->settings.count; bcn->cntdwn_counter_offsets[0] = hdr_len + 6; *pos++ = csa->settings.count; *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; *pos++ = 6; if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) { *pos++ = ifmsh->mshcfg.dot11MeshTTL; *pos |= WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; } else { *pos++ = ifmsh->chsw_ttl; } *pos++ |= csa->settings.block_tx ? WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); pos += 2; put_unaligned_le16(ifmsh->pre_value, pos); pos += 2; switch (csa->settings.chandef.width) { case NL80211_CHAN_WIDTH_40: ie_len = 2 + sizeof(struct ieee80211_sec_chan_offs_ie); pos = skb_put_zero(skb, ie_len); *pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; /* EID */ *pos++ = 1; /* len */ ct = cfg80211_get_chandef_type(&csa->settings.chandef); if (ct == NL80211_CHAN_HT40PLUS) *pos++ = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; else *pos++ = IEEE80211_HT_PARAM_CHA_SEC_BELOW; break; case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: /* Channel Switch Wrapper + Wide Bandwidth CSA IE */ ie_len = 2 + 2 + sizeof(struct ieee80211_wide_bw_chansw_ie); pos = skb_put_zero(skb, ie_len); *pos++ = WLAN_EID_CHANNEL_SWITCH_WRAPPER; /* EID */ *pos++ = 5; /* len */ /* put sub IE */ chandef = &csa->settings.chandef; ieee80211_ie_build_wide_bw_cs(pos, chandef); break; default: break; } } rcu_read_unlock(); if (ieee80211_put_srates_elem(skb, sband, sdata->vif.bss_conf.basic_rates, rate_flags, 0, WLAN_EID_SUPP_RATES) || mesh_add_ds_params_ie(sdata, skb)) goto out_free; bcn->head_len = skb->len; memcpy(bcn->head, skb->data, bcn->head_len); /* now the tail */ skb_trim(skb, 0); bcn->tail = bcn->head + bcn->head_len; if (ieee80211_put_srates_elem(skb, sband, sdata->vif.bss_conf.basic_rates, rate_flags, 0, WLAN_EID_EXT_SUPP_RATES) || mesh_add_rsn_ie(sdata, skb) || mesh_add_ht_cap_ie(sdata, skb) || mesh_add_ht_oper_ie(sdata, skb) || mesh_add_meshid_ie(sdata, skb) || mesh_add_meshconf_ie(sdata, skb) || mesh_add_awake_window_ie(sdata, skb) || mesh_add_vht_cap_ie(sdata, skb) || mesh_add_vht_oper_ie(sdata, skb) || mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || mesh_add_he_oper_ie(sdata, skb) || mesh_add_he_6ghz_cap_ie(sdata, skb) || mesh_add_eht_cap_ie(sdata, skb, ie_len_eht_cap) || mesh_add_eht_oper_ie(sdata, skb) || mesh_add_vendor_ies(sdata, skb)) goto out_free; bcn->tail_len = skb->len; memcpy(bcn->tail, skb->data, bcn->tail_len); ieee80211_mesh_update_bss_params(sdata, bcn->tail, bcn->tail_len); bcn->meshconf = (struct ieee80211_meshconf_ie *) (bcn->tail + ifmsh->meshconf_offset); dev_kfree_skb(skb); rcu_assign_pointer(ifmsh->beacon, bcn); return 0; out_free: kfree(bcn); dev_kfree_skb(skb); return -ENOMEM; } static int ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) { struct beacon_data *old_bcn; int ret; old_bcn = sdata_dereference(sdata->u.mesh.beacon, sdata); ret = ieee80211_mesh_build_beacon(&sdata->u.mesh); if (ret) /* just reuse old beacon */ return ret; if (old_bcn) kfree_rcu(old_bcn, rcu_head); return 0; } void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, u64 changed) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; unsigned long bits = changed; u32 bit; if (!bits) return; /* if we race with running work, worst case this work becomes a noop */ for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE) set_bit(bit, ifmsh->mbss_changed); set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); } int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; u64 changed = BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT | BSS_CHANGED_MCAST_RATE; local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ atomic_inc(&local->iff_allmultis); ieee80211_configure_filter(local); ifmsh->mesh_cc_id = 0; /* Disabled */ /* register sync ops from extensible synchronization framework */ ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id); ifmsh->sync_offset_clockdrift_max = 0; set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); ieee80211_mesh_root_setup(ifmsh); wiphy_work_queue(local->hw.wiphy, &sdata->work); sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; changed |= ieee80211_mps_local_status_update(sdata); if (ieee80211_mesh_build_beacon(ifmsh)) { ieee80211_stop_mesh(sdata); return -ENOMEM; } ieee80211_recalc_dtim(local, sdata); ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); netif_carrier_on(sdata->dev); return 0; } void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct beacon_data *bcn; netif_carrier_off(sdata->dev); /* flush STAs and mpaths on this iface */ sta_info_flush(sdata, -1); ieee80211_free_keys(sdata, true); mesh_path_flush_by_iface(sdata); /* stop the beacon */ ifmsh->mesh_id_len = 0; sdata->vif.bss_conf.enable_beacon = false; sdata->beacon_rate_set = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_BEACON_ENABLED); /* remove beacon */ bcn = sdata_dereference(ifmsh->beacon, sdata); RCU_INIT_POINTER(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); /* free all potentially still buffered group-addressed frames */ local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf); skb_queue_purge(&ifmsh->ps.bc_buf); del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); /* clear any mesh work (for next join) we may have accrued */ ifmsh->wrkq_flags = 0; memset(ifmsh->mbss_changed, 0, sizeof(ifmsh->mbss_changed)); local->fif_other_bss--; atomic_dec(&local->iff_allmultis); ieee80211_configure_filter(local); } static void ieee80211_mesh_csa_mark_radar(struct ieee80211_sub_if_data *sdata) { int err; /* if the current channel is a DFS channel, mark the channel as * unavailable. */ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, &sdata->vif.bss_conf.chanreq.oper, NL80211_IFTYPE_MESH_POINT); if (err > 0) cfg80211_radar_event(sdata->local->hw.wiphy, &sdata->vif.bss_conf.chanreq.oper, GFP_ATOMIC); } static bool ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, bool beacon) { struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_supported_band *sband; int err; struct ieee80211_conn_settings conn = ieee80211_conn_settings_unlimited; u32 vht_cap_info = 0; lockdep_assert_wiphy(sdata->local->hw.wiphy); sband = ieee80211_get_sband(sdata); if (!sband) return false; switch (sdata->vif.bss_conf.chanreq.oper.width) { case NL80211_CHAN_WIDTH_20_NOHT: conn.mode = IEEE80211_CONN_MODE_LEGACY; conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; case NL80211_CHAN_WIDTH_20: conn.mode = IEEE80211_CONN_MODE_HT; conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; case NL80211_CHAN_WIDTH_40: conn.mode = IEEE80211_CONN_MODE_HT; conn.bw_limit = IEEE80211_CONN_BW_LIMIT_40; break; default: break; } if (elems->vht_cap_elem) vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); memset(&params, 0, sizeof(params)); err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band, vht_cap_info, &conn, sdata->vif.addr, false, &csa_ie); if (err < 0) return false; if (err) return false; /* Mark the channel unavailable if the reason for the switch is * regulatory. */ if (csa_ie.reason_code == WLAN_REASON_MESH_CHAN_REGULATORY) ieee80211_mesh_csa_mark_radar(sdata); params.chandef = csa_ie.chanreq.oper; params.count = csa_ie.count; if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &params.chandef, IEEE80211_CHAN_DISABLED) || !cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef, NL80211_IFTYPE_MESH_POINT)) { sdata_info(sdata, "mesh STA %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n", sdata->vif.addr, params.chandef.chan->center_freq, params.chandef.width, params.chandef.center_freq1, params.chandef.center_freq2); return false; } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, &params.chandef, NL80211_IFTYPE_MESH_POINT); if (err < 0) return false; if (err > 0 && !ifmsh->userspace_handles_dfs) { sdata_info(sdata, "mesh STA %pM switches to channel requiring DFS (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n", sdata->vif.addr, params.chandef.chan->center_freq, params.chandef.width, params.chandef.center_freq1, params.chandef.center_freq2); return false; } params.radar_required = err; if (cfg80211_chandef_identical(&params.chandef, &sdata->vif.bss_conf.chanreq.oper)) { mcsa_dbg(sdata, "received csa with an identical chandef, ignoring\n"); return true; } mcsa_dbg(sdata, "received channel switch announcement to go to channel %d MHz\n", params.chandef.chan->center_freq); params.block_tx = csa_ie.mode & WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT; if (beacon) { ifmsh->chsw_ttl = csa_ie.ttl - 1; if (ifmsh->pre_value >= csa_ie.pre_value) return false; ifmsh->pre_value = csa_ie.pre_value; } if (ifmsh->chsw_ttl >= ifmsh->mshcfg.dot11MeshTTL) return false; ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_REPEATER; if (ieee80211_channel_switch(sdata->local->hw.wiphy, sdata->dev, &params) < 0) return false; return true; } static void ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct sk_buff *presp; struct beacon_data *bcn; struct ieee80211_mgmt *hdr; struct ieee802_11_elems *elems; size_t baselen; u8 *pos; pos = mgmt->u.probe_req.variable; baselen = (u8 *) pos - (u8 *) mgmt; if (baselen > len) return; elems = ieee802_11_parse_elems(pos, len - baselen, false, NULL); if (!elems) return; if (!elems->mesh_id) goto free; /* 802.11-2012 10.1.4.3.2 */ if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && !is_broadcast_ether_addr(mgmt->da)) || elems->ssid_len != 0) goto free; if (elems->mesh_id_len != 0 && (elems->mesh_id_len != ifmsh->mesh_id_len || memcmp(elems->mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len))) goto free; rcu_read_lock(); bcn = rcu_dereference(ifmsh->beacon); if (!bcn) goto out; presp = dev_alloc_skb(local->tx_headroom + bcn->head_len + bcn->tail_len); if (!presp) goto out; skb_reserve(presp, local->tx_headroom); skb_put_data(presp, bcn->head, bcn->head_len); skb_put_data(presp, bcn->tail, bcn->tail_len); hdr = (struct ieee80211_mgmt *) presp->data; hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); memcpy(hdr->da, mgmt->sa, ETH_ALEN); IEEE80211_SKB_CB(presp)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, presp); out: rcu_read_unlock(); free: kfree(elems); } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee802_11_elems *elems; struct ieee80211_channel *channel; size_t baselen; int freq; enum nl80211_band band = rx_status->band; /* ignore ProbeResp to foreign address */ if (stype == IEEE80211_STYPE_PROBE_RESP && !ether_addr_equal(mgmt->da, sdata->vif.addr)) return; baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; if (baselen > len) return; elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, false, NULL); if (!elems) return; /* ignore non-mesh or secure / unsecure mismatch */ if ((!elems->mesh_id || !elems->mesh_config) || (elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) || (!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)) goto free; if (elems->ds_params) freq = ieee80211_channel_to_frequency(elems->ds_params[0], band); else freq = rx_status->freq; channel = ieee80211_get_channel(local->hw.wiphy, freq); if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) goto free; if (mesh_matches_local(sdata, elems)) { mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n", sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal); if (!sdata->u.mesh.user_mpm || sdata->u.mesh.mshcfg.rssi_threshold == 0 || sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal) mesh_neighbour_update(sdata, mgmt->sa, elems, rx_status); if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT && !sdata->vif.bss_conf.csa_active) ieee80211_mesh_process_chnswitch(sdata, elems, true); } if (ifmsh->sync_ops) ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len, elems->mesh_config, rx_status); free: kfree(elems); } int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_csa_settings *tmp_csa_settings; int ret = 0; /* Reset the TTL value and Initiator flag */ ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; ifmsh->chsw_ttl = 0; /* Remove the CSA and MCSP elements from the beacon */ tmp_csa_settings = sdata_dereference(ifmsh->csa, sdata); RCU_INIT_POINTER(ifmsh->csa, NULL); if (tmp_csa_settings) kfree_rcu(tmp_csa_settings, rcu_head); ret = ieee80211_mesh_rebuild_beacon(sdata); if (ret) return -EINVAL; *changed |= BSS_CHANGED_BEACON; mcsa_dbg(sdata, "complete switching to center freq %d MHz", sdata->vif.bss_conf.chanreq.oper.chan->center_freq); return 0; } int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings, u64 *changed) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_csa_settings *tmp_csa_settings; int ret = 0; lockdep_assert_wiphy(sdata->local->hw.wiphy); tmp_csa_settings = kmalloc(sizeof(*tmp_csa_settings), GFP_ATOMIC); if (!tmp_csa_settings) return -ENOMEM; memcpy(&tmp_csa_settings->settings, csa_settings, sizeof(struct cfg80211_csa_settings)); rcu_assign_pointer(ifmsh->csa, tmp_csa_settings); ret = ieee80211_mesh_rebuild_beacon(sdata); if (ret) { tmp_csa_settings = rcu_dereference(ifmsh->csa); RCU_INIT_POINTER(ifmsh->csa, NULL); kfree_rcu(tmp_csa_settings, rcu_head); return ret; } *changed |= BSS_CHANGED_BEACON; return 0; } static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems) { struct ieee80211_mgmt *mgmt_fwd; struct sk_buff *skb; struct ieee80211_local *local = sdata->local; skb = dev_alloc_skb(local->tx_headroom + len); if (!skb) return -ENOMEM; skb_reserve(skb, local->tx_headroom); mgmt_fwd = skb_put(skb, len); elems->mesh_chansw_params_ie->mesh_ttl--; elems->mesh_chansw_params_ie->mesh_flags &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); memcpy(mgmt_fwd->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt_fwd->bssid, sdata->vif.addr, ETH_ALEN); ieee80211_tx_skb(sdata, skb); return 0; } static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee802_11_elems *elems; u16 pre_value; bool fwd_csa = true; size_t baselen; u8 *pos; if (mgmt->u.action.u.measurement.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) return; pos = mgmt->u.action.u.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); elems = ieee802_11_parse_elems(pos, len - baselen, true, NULL); if (!elems) return; if (!mesh_matches_local(sdata, elems)) goto free; ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) fwd_csa = false; pre_value = le16_to_cpu(elems->mesh_chansw_params_ie->mesh_pre_value); if (ifmsh->pre_value >= pre_value) goto free; ifmsh->pre_value = pre_value; if (!sdata->vif.bss_conf.csa_active && !ieee80211_mesh_process_chnswitch(sdata, elems, false)) { mcsa_dbg(sdata, "Failed to process CSA action frame"); goto free; } /* forward or re-broadcast the CSA frame */ if (fwd_csa) { if (mesh_fwd_csa_frame(sdata, mgmt, len, elems) < 0) mcsa_dbg(sdata, "Failed to forward the CSA frame"); } free: kfree(elems); } static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { switch (mgmt->u.action.category) { case WLAN_CATEGORY_SELF_PROTECTED: switch (mgmt->u.action.u.self_prot.action_code) { case WLAN_SP_MESH_PEERING_OPEN: case WLAN_SP_MESH_PEERING_CLOSE: case WLAN_SP_MESH_PEERING_CONFIRM: mesh_rx_plink_frame(sdata, mgmt, len, rx_status); break; } break; case WLAN_CATEGORY_MESH_ACTION: if (mesh_action_is_path_sel(mgmt)) mesh_rx_path_sel_frame(sdata, mgmt, len); break; case WLAN_CATEGORY_SPECTRUM_MGMT: mesh_rx_csa_frame(sdata, mgmt, len); break; } } void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 stype; lockdep_assert_wiphy(sdata->local->hw.wiphy); /* mesh already went down */ if (!sdata->u.mesh.mesh_id_len) return; rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; switch (stype) { case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_BEACON: ieee80211_mesh_rx_bcn_presp(sdata, stype, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_PROBE_REQ: ieee80211_mesh_rx_probe_req(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } } static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 bit; u64 changed = 0; for_each_set_bit(bit, ifmsh->mbss_changed, sizeof(changed) * BITS_PER_BYTE) { clear_bit(bit, ifmsh->mbss_changed); changed |= BIT(bit); } if (sdata->vif.bss_conf.enable_beacon && (changed & (BSS_CHANGED_BEACON | BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT))) if (ieee80211_mesh_rebuild_beacon(sdata)) return; ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); } void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; lockdep_assert_wiphy(sdata->local->hw.wiphy); /* mesh already went down */ if (!sdata->u.mesh.mesh_id_len) return; if (ifmsh->preq_queue_len && time_after(jiffies, ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) mesh_path_start_discovery(sdata); if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) ieee80211_mesh_housekeeping(sdata); if (test_and_clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags)) ieee80211_mesh_rootpath(sdata); if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags)) mesh_sync_adjust_tsf(sdata); if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags)) mesh_bss_info_changed(sdata); } void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; static u8 zero_addr[ETH_ALEN] = {}; timer_setup(&ifmsh->housekeeping_timer, ieee80211_mesh_housekeeping_timer, 0); ifmsh->accepting_plinks = true; atomic_set(&ifmsh->mpaths, 0); mesh_rmc_init(sdata); ifmsh->last_preq = jiffies; ifmsh->next_perr = jiffies; ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); mesh_pathtbl_init(sdata); timer_setup(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, 0); timer_setup(&ifmsh->mesh_path_root_timer, ieee80211_mesh_path_root_timer, 0); INIT_LIST_HEAD(&ifmsh->preq_queue.list); skb_queue_head_init(&ifmsh->ps.bc_buf); spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); RCU_INIT_POINTER(ifmsh->beacon, NULL); sdata->vif.bss_conf.bssid = zero_addr; } void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata) { mesh_rmc_free(sdata); mesh_pathtbl_unregister(sdata); }
11 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PACKET_INTERNAL_H__ #define __PACKET_INTERNAL_H__ #include <linux/refcount.h> struct packet_mclist { struct packet_mclist *next; int ifindex; int count; unsigned short type; unsigned short alen; unsigned char addr[MAX_ADDR_LEN]; }; /* kbdq - kernel block descriptor queue */ struct tpacket_kbdq_core { struct pgv *pkbdq; unsigned int feature_req_word; unsigned int hdrlen; unsigned char reset_pending_on_curr_blk; unsigned char delete_blk_timer; unsigned short kactive_blk_num; unsigned short blk_sizeof_priv; /* last_kactive_blk_num: * trick to see if user-space has caught up * in order to avoid refreshing timer when every single pkt arrives. */ unsigned short last_kactive_blk_num; char *pkblk_start; char *pkblk_end; int kblk_size; unsigned int max_frame_len; unsigned int knum_blocks; uint64_t knxt_seq_num; char *prev; char *nxt_offset; struct sk_buff *skb; rwlock_t blk_fill_in_prog_lock; /* Default is set to 8ms */ #define DEFAULT_PRB_RETIRE_TOV (8) unsigned short retire_blk_tov; unsigned short version; unsigned long tov_in_jiffies; /* timer to retire an outstanding block */ struct timer_list retire_blk_timer; }; struct pgv { char *buffer; }; struct packet_ring_buffer { struct pgv *pg_vec; unsigned int head; unsigned int frames_per_block; unsigned int frame_size; unsigned int frame_max; unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; unsigned int __percpu *pending_refcnt; union { unsigned long *rx_owner_map; struct tpacket_kbdq_core prb_bdqc; }; }; extern struct mutex fanout_mutex; #define PACKET_FANOUT_MAX (1 << 16) struct packet_fanout { possible_net_t net; unsigned int num_members; u32 max_num_members; u16 id; u8 type; u8 flags; union { atomic_t rr_cur; struct bpf_prog __rcu *bpf_prog; }; struct list_head list; spinlock_t lock; refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; struct sock __rcu *arr[] __counted_by(max_num_members); }; struct packet_rollover { int sock; atomic_long_t num; atomic_long_t num_huge; atomic_long_t num_failed; #define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32)) u32 history[ROLLOVER_HLEN] ____cacheline_aligned; } ____cacheline_aligned_in_smp; struct packet_sock { /* struct sock has to be the first member of packet_sock */ struct sock sk; struct packet_fanout *fanout; union tpacket_stats_u stats; struct packet_ring_buffer rx_ring; struct packet_ring_buffer tx_ring; int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; int ifindex; /* bound device */ u8 vnet_hdr_sz; __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; atomic_long_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_tstamp; struct completion skb_completion; struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; atomic_t tp_drops ____cacheline_aligned_in_smp; }; #define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, PACKET_SOCK_RUNNING, PACKET_SOCK_PRESSURE, PACKET_SOCK_QDISC_BYPASS, }; static inline void packet_sock_flag_set(struct packet_sock *po, enum packet_sock_flags flag, bool val) { if (val) set_bit(flag, &po->flags); else clear_bit(flag, &po->flags); } static inline bool packet_sock_flag(const struct packet_sock *po, enum packet_sock_flags flag) { return test_bit(flag, &po->flags); } #endif
15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 #ifndef __LZ4DEFS_H__ #define __LZ4DEFS_H__ /* * lz4defs.h -- common and architecture specific defines for the kernel usage * LZ4 - Fast LZ compression algorithm * Copyright (C) 2011-2016, Yann Collet. * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * You can contact the author at : * - LZ4 homepage : http://www.lz4.org * - LZ4 source repository : https://github.com/lz4/lz4 * * Changed for kernel usage by: * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ #include <linux/unaligned.h> #include <linux/bitops.h> #include <linux/string.h> /* memset, memcpy */ #define FORCE_INLINE __always_inline /*-************************************ * Basic Types **************************************/ #include <linux/types.h> typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; typedef uint64_t U64; typedef uintptr_t uptrval; /*-************************************ * Architecture specifics **************************************/ #if defined(CONFIG_64BIT) #define LZ4_ARCH64 1 #else #define LZ4_ARCH64 0 #endif #if defined(__LITTLE_ENDIAN) #define LZ4_LITTLE_ENDIAN 1 #else #define LZ4_LITTLE_ENDIAN 0 #endif /*-************************************ * Constants **************************************/ #define MINMATCH 4 #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 #define MFLIMIT (WILDCOPYLENGTH + MINMATCH) /* * ensure it's possible to write 2 x wildcopyLength * without overflowing output buffer */ #define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH) /* Increase this value ==> compression run slower on incompressible data */ #define LZ4_SKIPTRIGGER 6 #define HASH_UNIT sizeof(size_t) #define KB (1 << 10) #define MB (1 << 20) #define GB (1U << 30) #define MAXD_LOG 16 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) #define STEPSIZE sizeof(size_t) #define ML_BITS 4 #define ML_MASK ((1U << ML_BITS) - 1) #define RUN_BITS (8 - ML_BITS) #define RUN_MASK ((1U << RUN_BITS) - 1) /*-************************************ * Reading and writing into memory **************************************/ static FORCE_INLINE U16 LZ4_read16(const void *ptr) { return get_unaligned((const U16 *)ptr); } static FORCE_INLINE U32 LZ4_read32(const void *ptr) { return get_unaligned((const U32 *)ptr); } static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr) { return get_unaligned((const size_t *)ptr); } static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); } static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); } static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); } static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) { return put_unaligned_le16(value, memPtr); } /* * LZ4 relies on memcpy with a constant size being inlined. In freestanding * environments, the compiler can't assume the implementation of memcpy() is * standard compliant, so apply its specialized memcpy() inlining logic. When * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy() * as-if it were standard compliant, so it can inline it in freestanding * environments. This is needed when decompressing the Linux Kernel, for example. */ #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) #define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size) static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) { #if LZ4_ARCH64 U64 a = get_unaligned((const U64 *)src); put_unaligned(a, (U64 *)dst); #else U32 a = get_unaligned((const U32 *)src); U32 b = get_unaligned((const U32 *)src + 1); put_unaligned(a, (U32 *)dst); put_unaligned(b, (U32 *)dst + 1); #endif } /* * customized variant of memcpy, * which can overwrite up to 7 bytes beyond dstEnd */ static FORCE_INLINE void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd) { BYTE *d = (BYTE *)dstPtr; const BYTE *s = (const BYTE *)srcPtr; BYTE *const e = (BYTE *)dstEnd; do { LZ4_copy8(d, s); d += 8; s += 8; } while (d < e); } static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val) { #if LZ4_LITTLE_ENDIAN return __ffs(val) >> 3; #else return (BITS_PER_LONG - 1 - __fls(val)) >> 3; #endif } static FORCE_INLINE unsigned int LZ4_count( const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit) { const BYTE *const pStart = pIn; while (likely(pIn < pInLimit - (STEPSIZE - 1))) { size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); if (!diff) { pIn += STEPSIZE; pMatch += STEPSIZE; continue; } pIn += LZ4_NbCommonBytes(diff); return (unsigned int)(pIn - pStart); } #if LZ4_ARCH64 if ((pIn < (pInLimit - 3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn += 4; pMatch += 4; } #endif if ((pIn < (pInLimit - 1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn += 2; pMatch += 2; } if ((pIn < pInLimit) && (*pMatch == *pIn)) pIn++; return (unsigned int)(pIn - pStart); } typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; typedef enum { byPtr, byU32, byU16 } tableType_t; typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; #define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c)) #endif
5 5 93 91 5 69 69 445 446 42 42 46 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> #include <net/ip.h> #include <net/ip6_route.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> static DEFINE_MUTEX(flowtable_lock); static LIST_HEAD(flowtables); static void flow_offload_fill_dir(struct flow_offload *flow, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple; ft->dir = dir; switch (ctt->src.l3num) { case NFPROTO_IPV4: ft->src_v4 = ctt->src.u3.in; ft->dst_v4 = ctt->dst.u3.in; break; case NFPROTO_IPV6: ft->src_v6 = ctt->src.u3.in6; ft->dst_v6 = ctt->dst.u3.in6; break; } ft->l3proto = ctt->src.l3num; ft->l4proto = ctt->dst.protonum; switch (ctt->dst.protonum) { case IPPROTO_TCP: case IPPROTO_UDP: ft->src_port = ctt->src.u.tcp.port; ft->dst_port = ctt->dst.u.tcp.port; break; } } struct flow_offload *flow_offload_alloc(struct nf_conn *ct) { struct flow_offload *flow; if (unlikely(nf_ct_is_dying(ct))) return NULL; flow = kzalloc(sizeof(*flow), GFP_ATOMIC); if (!flow) return NULL; refcount_inc(&ct->ct_general.use); flow->ct = ct; flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY); if (ct->status & IPS_SRC_NAT) __set_bit(NF_FLOW_SNAT, &flow->flags); if (ct->status & IPS_DST_NAT) __set_bit(NF_FLOW_DNAT, &flow->flags); return flow; } EXPORT_SYMBOL_GPL(flow_offload_alloc); static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) { if (flow_tuple->l3proto == NFPROTO_IPV6) return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); return 0; } static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct dst_entry *dst = route->tuple[dir].dst; route->tuple[dir].dst = NULL; return dst; } static int flow_offload_fill_route(struct flow_offload *flow, struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { case NFPROTO_IPV4: flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); break; case NFPROTO_IPV6: flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); break; } flow_tuple->iifidx = route->tuple[dir].in.ifindex; for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) { flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id; flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto; if (route->tuple[dir].in.ingress_vlans & BIT(i)) flow_tuple->in_vlan_ingress |= BIT(j); j++; } flow_tuple->encap_num = route->tuple[dir].in.num_encaps; switch (route->tuple[dir].xmit_type) { case FLOW_OFFLOAD_XMIT_DIRECT: memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest, ETH_ALEN); memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: flow_tuple->dst_cache = dst; flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); break; default: WARN_ON_ONCE(1); break; } flow_tuple->xmit_type = route->tuple[dir].xmit_type; return 0; } static void nft_flow_dst_release(struct flow_offload *flow, enum flow_offload_tuple_dir dir) { if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) dst_release(flow->tuplehash[dir].tuple.dst_cache); } void flow_offload_route_init(struct flow_offload *flow, struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); flow->type = NF_FLOW_OFFLOAD_ROUTE; } EXPORT_SYMBOL_GPL(flow_offload_route_init); static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) { tcp->seen[0].td_maxwin = 0; tcp->seen[1].td_maxwin = 0; } static void flow_offload_fixup_ct(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); s32 timeout; if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); flow_offload_fixup_tcp(&ct->proto.tcp); timeout = tn->timeouts[ct->proto.tcp.state]; timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); enum udp_conntrack state = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? UDP_CT_REPLIED : UDP_CT_UNREPLIED; timeout = tn->timeouts[state]; timeout -= tn->offload_timeout; } else { return; } if (timeout < 0) timeout = 0; if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } static void flow_offload_route_release(struct flow_offload *flow) { nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY); } void flow_offload_free(struct flow_offload *flow) { switch (flow->type) { case NF_FLOW_OFFLOAD_ROUTE: flow_offload_route_release(flow); break; default: break; } nf_ct_put(flow->ct); kfree_rcu(flow, rcu_head); } EXPORT_SYMBOL_GPL(flow_offload_free); static u32 flow_offload_hash(const void *data, u32 len, u32 seed) { const struct flow_offload_tuple *tuple = data; return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed); } static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) { const struct flow_offload_tuple_rhash *tuplehash = data; return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed); } static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct flow_offload_tuple *tuple = arg->key; const struct flow_offload_tuple_rhash *x = ptr; if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash))) return 1; return 0; } static const struct rhashtable_params nf_flow_offload_rhash_params = { .head_offset = offsetof(struct flow_offload_tuple_rhash, node), .hashfn = flow_offload_hash, .obj_hashfn = flow_offload_hash_obj, .obj_cmpfn = flow_offload_hash_cmp, .automatic_shrinking = true, }; unsigned long flow_offload_get_timeout(struct flow_offload *flow) { unsigned long timeout = NF_FLOW_TIMEOUT; struct net *net = nf_ct_net(flow->ct); int l4num = nf_ct_protonum(flow->ct); if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); timeout = tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); timeout = tn->offload_timeout; } return timeout; } int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) { int err; flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, nf_flow_offload_rhash_params); if (err < 0) return err; err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[1].node, nf_flow_offload_rhash_params); if (err < 0) { rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, nf_flow_offload_rhash_params); return err; } nf_ct_offload_timeout(flow->ct); if (nf_flowtable_hw_offload(flow_table)) { __set_bit(NF_FLOW_HW, &flow->flags); nf_flow_offload_add(flow_table, flow); } return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload *flow, bool force) { u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); if (force || timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); else return; if (likely(!nf_flowtable_hw_offload(flow_table))) return; nf_flow_offload_add(flow_table, flow); } EXPORT_SYMBOL_GPL(flow_offload_refresh); static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; } static void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow) { rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, nf_flow_offload_rhash_params); rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, nf_flow_offload_rhash_params); flow_offload_free(flow); } void flow_offload_teardown(struct flow_offload *flow) { clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); set_bit(NF_FLOW_TEARDOWN, &flow->flags); flow_offload_fixup_ct(flow->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); struct flow_offload_tuple_rhash * flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple) { struct flow_offload_tuple_rhash *tuplehash; struct flow_offload *flow; int dir; tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, nf_flow_offload_rhash_params); if (!tuplehash) return NULL; dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) return NULL; if (unlikely(nf_ct_is_dying(flow->ct))) return NULL; return tuplehash; } EXPORT_SYMBOL_GPL(flow_offload_lookup); static int nf_flow_table_iterate(struct nf_flowtable *flow_table, void (*iter)(struct nf_flowtable *flowtable, struct flow_offload *flow, void *data), void *data) { struct flow_offload_tuple_rhash *tuplehash; struct rhashtable_iter hti; struct flow_offload *flow; int err = 0; rhashtable_walk_enter(&flow_table->rhashtable, &hti); rhashtable_walk_start(&hti); while ((tuplehash = rhashtable_walk_next(&hti))) { if (IS_ERR(tuplehash)) { if (PTR_ERR(tuplehash) != -EAGAIN) { err = PTR_ERR(tuplehash); break; } continue; } if (tuplehash->tuple.dir) continue; flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); iter(flow_table, flow, data); } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); return err; } static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, const struct flow_offload *flow) { return flow_table->type->gc && flow_table->type->gc(flow); } static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || nf_flow_custom_gc(flow_table, flow)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) nf_flow_offload_del(flow_table, flow); else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) flow_offload_del(flow_table, flow); } else { flow_offload_del(flow_table, flow); } } else if (test_bit(NF_FLOW_HW, &flow->flags)) { nf_flow_offload_stats(flow_table, flow); } } void nf_flow_table_gc_run(struct nf_flowtable *flow_table) { nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); } static void nf_flow_offload_work_gc(struct work_struct *work) { struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); nf_flow_table_gc_run(flow_table); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) { struct tcphdr *tcph; tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); } static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) { struct udphdr *udph; udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace2(&udph->check, skb, port, new_port, false); if (!udph->check) udph->check = CSUM_MANGLED_0; } } static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, u8 protocol, __be16 port, __be16 new_port) { switch (protocol) { case IPPROTO_TCP: nf_flow_nat_port_tcp(skb, thoff, port, new_port); break; case IPPROTO_UDP: nf_flow_nat_port_udp(skb, thoff, port, new_port); break; } } void nf_flow_snat_port(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, u8 protocol, enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port; hdr = (void *)(skb_network_header(skb) + thoff); switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = hdr->source; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; hdr->source = new_port; break; case FLOW_OFFLOAD_DIR_REPLY: port = hdr->dest; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; hdr->dest = new_port; break; } nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_snat_port); void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, u8 protocol, enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port; hdr = (void *)(skb_network_header(skb) + thoff); switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = hdr->dest; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; hdr->dest = new_port; break; case FLOW_OFFLOAD_DIR_REPLY: port = hdr->source; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; hdr->source = new_port; break; } nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_dnat_port); int nf_flow_table_init(struct nf_flowtable *flowtable) { int err; INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); flow_block_init(&flowtable->flow_block); init_rwsem(&flowtable->flow_block_lock); err = rhashtable_init(&flowtable->rhashtable, &nf_flow_offload_rhash_params); if (err < 0) return err; queue_delayed_work(system_power_efficient_wq, &flowtable->gc_work, HZ); mutex_lock(&flowtable_lock); list_add(&flowtable->list, &flowtables); mutex_unlock(&flowtable_lock); return 0; } EXPORT_SYMBOL_GPL(nf_flow_table_init); static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { struct net_device *dev = data; if (!dev) { flow_offload_teardown(flow); return; } if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) && (flow->tuplehash[0].tuple.iifidx == dev->ifindex || flow->tuplehash[1].tuple.iifidx == dev->ifindex)) flow_offload_teardown(flow); } void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, struct net_device *dev) { nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); flush_delayed_work(&flowtable->gc_work); nf_flow_table_offload_flush(flowtable); } void nf_flow_table_cleanup(struct net_device *dev) { struct nf_flowtable *flowtable; mutex_lock(&flowtable_lock); list_for_each_entry(flowtable, &flowtables, list) nf_flow_table_gc_cleanup(flowtable, dev); mutex_unlock(&flowtable_lock); } EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); void nf_flow_table_free(struct nf_flowtable *flow_table) { mutex_lock(&flowtable_lock); list_del(&flow_table->list); mutex_unlock(&flowtable_lock); cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_offload_flush(flow_table); /* ... no more pending work after this stage ... */ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_gc_run(flow_table); nf_flow_table_offload_flush_cleanup(flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); static int nf_flow_table_init_net(struct net *net) { net->ft.stat = alloc_percpu(struct nf_flow_table_stat); return net->ft.stat ? 0 : -ENOMEM; } static void nf_flow_table_fini_net(struct net *net) { free_percpu(net->ft.stat); } static int nf_flow_table_pernet_init(struct net *net) { int ret; ret = nf_flow_table_init_net(net); if (ret < 0) return ret; ret = nf_flow_table_init_proc(net); if (ret < 0) goto out_proc; return 0; out_proc: nf_flow_table_fini_net(net); return ret; } static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) { struct net *net; list_for_each_entry(net, net_exit_list, exit_list) { nf_flow_table_fini_proc(net); nf_flow_table_fini_net(net); } } static struct pernet_operations nf_flow_table_net_ops = { .init = nf_flow_table_pernet_init, .exit_batch = nf_flow_table_pernet_exit, }; static int __init nf_flow_table_module_init(void) { int ret; ret = register_pernet_subsys(&nf_flow_table_net_ops); if (ret < 0) return ret; ret = nf_flow_table_offload_init(); if (ret) goto out_offload; ret = nf_flow_register_bpf(); if (ret) goto out_bpf; return 0; out_bpf: nf_flow_table_offload_exit(); out_offload: unregister_pernet_subsys(&nf_flow_table_net_ops); return ret; } static void __exit nf_flow_table_module_exit(void) { nf_flow_table_offload_exit(); unregister_pernet_subsys(&nf_flow_table_net_ops); } module_init(nf_flow_table_module_init); module_exit(nf_flow_table_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("Netfilter flow table module");
10408 2131 136 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/fs.h> #define DEVCG_ACC_MKNOD 1 #define DEVCG_ACC_READ 2 #define DEVCG_ACC_WRITE 4 #define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE) #define DEVCG_DEV_BLOCK 1 #define DEVCG_DEV_CHAR 2 #define DEVCG_DEV_ALL 4 /* this represents all devices */ #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) int devcgroup_check_permission(short type, u32 major, u32 minor, short access); static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; if (likely(!inode->i_rdev)) return 0; if (S_ISBLK(inode->i_mode)) type = DEVCG_DEV_BLOCK; else if (S_ISCHR(inode->i_mode)) type = DEVCG_DEV_CHAR; else return 0; if (mask & MAY_WRITE) access |= DEVCG_ACC_WRITE; if (mask & MAY_READ) access |= DEVCG_ACC_READ; return devcgroup_check_permission(type, imajor(inode), iminor(inode), access); } static inline int devcgroup_inode_mknod(int mode, dev_t dev) { short type; if (!S_ISBLK(mode) && !S_ISCHR(mode)) return 0; if (S_ISCHR(mode) && dev == WHITEOUT_DEV) return 0; if (S_ISBLK(mode)) type = DEVCG_DEV_BLOCK; else type = DEVCG_DEV_CHAR; return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev), DEVCG_ACC_MKNOD); } #else static inline int devcgroup_check_permission(short type, u32 major, u32 minor, short access) { return 0; } static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } static inline int devcgroup_inode_mknod(int mode, dev_t dev) { return 0; } #endif
16 6 13 2 1 8 8 7 1 2 1 1 2 13 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> struct nft_last { unsigned long jiffies; unsigned int set; }; struct nft_last_priv { struct nft_last *last; }; static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = { [NFTA_LAST_SET] = { .type = NLA_U32 }, [NFTA_LAST_MSECS] = { .type = NLA_U64 }, }; static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_last_priv *priv = nft_expr_priv(expr); struct nft_last *last; u64 last_jiffies; int err; last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT); if (!last) return -ENOMEM; if (tb[NFTA_LAST_SET]) last->set = ntohl(nla_get_be32(tb[NFTA_LAST_SET])); if (last->set && tb[NFTA_LAST_MSECS]) { err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies); if (err < 0) goto err; last->jiffies = jiffies - (unsigned long)last_jiffies; } priv->last = last; return 0; err: kfree(last); return err; } static void nft_last_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_last_priv *priv = nft_expr_priv(expr); struct nft_last *last = priv->last; if (READ_ONCE(last->jiffies) != jiffies) WRITE_ONCE(last->jiffies, jiffies); if (READ_ONCE(last->set) == 0) WRITE_ONCE(last->set, 1); } static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_last_priv *priv = nft_expr_priv(expr); struct nft_last *last = priv->last; unsigned long last_jiffies = READ_ONCE(last->jiffies); u32 last_set = READ_ONCE(last->set); __be64 msecs; if (time_before(jiffies, last_jiffies)) { WRITE_ONCE(last->set, 0); last_set = 0; } if (last_set) msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies); else msecs = 0; if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) || nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static void nft_last_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_last_priv *priv = nft_expr_priv(expr); kfree(priv->last); } static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_last_priv *priv_dst = nft_expr_priv(dst); struct nft_last_priv *priv_src = nft_expr_priv(src); priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp); if (!priv_dst->last) return -ENOMEM; priv_dst->last->set = priv_src->last->set; priv_dst->last->jiffies = priv_src->last->jiffies; return 0; } static const struct nft_expr_ops nft_last_ops = { .type = &nft_last_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)), .eval = nft_last_eval, .init = nft_last_init, .destroy = nft_last_destroy, .clone = nft_last_clone, .dump = nft_last_dump, .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_last_type __read_mostly = { .name = "last", .ops = &nft_last_ops, .policy = nft_last_policy, .maxattr = NFTA_LAST_MAX, .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, };
12 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. */ #include <linux/kernel.h> #include <linux/bio.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/highmem.h> #include <linux/kthread.h> #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/writeback.h> #include <linux/psi.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/log2.h> #include <linux/shrinker.h> #include <crypto/hash.h> #include "misc.h" #include "ctree.h" #include "fs.h" #include "btrfs_inode.h" #include "bio.h" #include "ordered-data.h" #include "compression.h" #include "extent_io.h" #include "extent_map.h" #include "subpage.h" #include "messages.h" #include "super.h" static struct bio_set btrfs_compressed_bioset; static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; const char* btrfs_compress_type2str(enum btrfs_compression_type type) { switch (type) { case BTRFS_COMPRESS_ZLIB: case BTRFS_COMPRESS_LZO: case BTRFS_COMPRESS_ZSTD: case BTRFS_COMPRESS_NONE: return btrfs_compress_types[type]; default: break; } return NULL; } static inline struct compressed_bio *to_compressed_bio(struct btrfs_bio *bbio) { return container_of(bbio, struct compressed_bio, bbio); } static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode, u64 start, blk_opf_t op, btrfs_bio_end_io_t end_io) { struct btrfs_bio *bbio; bbio = btrfs_bio(bio_alloc_bioset(NULL, BTRFS_MAX_COMPRESSED_PAGES, op, GFP_NOFS, &btrfs_compressed_bioset)); btrfs_bio_init(bbio, inode->root->fs_info, end_io, NULL); bbio->inode = inode; bbio->file_offset = start; return to_compressed_bio(bbio); } bool btrfs_compress_is_valid_type(const char *str, size_t len) { int i; for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) { size_t comp_len = strlen(btrfs_compress_types[i]); if (len < comp_len) continue; if (!strncmp(btrfs_compress_types[i], str, comp_len)) return true; } return false; } static int compression_compress_pages(int type, struct list_head *ws, struct address_space *mapping, u64 start, struct folio **folios, unsigned long *out_folios, unsigned long *total_in, unsigned long *total_out) { switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_compress_folios(ws, mapping, start, folios, out_folios, total_in, total_out); case BTRFS_COMPRESS_LZO: return lzo_compress_folios(ws, mapping, start, folios, out_folios, total_in, total_out); case BTRFS_COMPRESS_ZSTD: return zstd_compress_folios(ws, mapping, start, folios, out_folios, total_in, total_out); case BTRFS_COMPRESS_NONE: default: /* * This can happen when compression races with remount setting * it to 'no compress', while caller doesn't call * inode_need_compress() to check if we really need to * compress. * * Not a big deal, just need to inform caller that we * haven't allocated any pages yet. */ *out_folios = 0; return -E2BIG; } } static int compression_decompress_bio(struct list_head *ws, struct compressed_bio *cb) { switch (cb->compress_type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb); case BTRFS_COMPRESS_LZO: return lzo_decompress_bio(ws, cb); case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb); case BTRFS_COMPRESS_NONE: default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static int compression_decompress(int type, struct list_head *ws, const u8 *data_in, struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, size_t destlen) { switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_folio, dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_folio, dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_folio, dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_NONE: default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static void btrfs_free_compressed_folios(struct compressed_bio *cb) { for (unsigned int i = 0; i < cb->nr_folios; i++) btrfs_free_compr_folio(cb->compressed_folios[i]); kfree(cb->compressed_folios); } static int btrfs_decompress_bio(struct compressed_bio *cb); /* * Global cache of last unused pages for compression/decompression. */ static struct btrfs_compr_pool { struct shrinker *shrinker; spinlock_t lock; struct list_head list; int count; int thresh; } compr_pool; static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc) { int ret; /* * We must not read the values more than once if 'ret' gets expanded in * the return statement so we don't accidentally return a negative * number, even if the first condition finds it positive. */ ret = READ_ONCE(compr_pool.count) - READ_ONCE(compr_pool.thresh); return ret > 0 ? ret : 0; } static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc) { struct list_head remove; struct list_head *tmp, *next; int freed; if (compr_pool.count == 0) return SHRINK_STOP; INIT_LIST_HEAD(&remove); /* For now, just simply drain the whole list. */ spin_lock(&compr_pool.lock); list_splice_init(&compr_pool.list, &remove); freed = compr_pool.count; compr_pool.count = 0; spin_unlock(&compr_pool.lock); list_for_each_safe(tmp, next, &remove) { struct page *page = list_entry(tmp, struct page, lru); ASSERT(page_ref_count(page) == 1); put_page(page); } return freed; } /* * Common wrappers for page allocation from compression wrappers */ struct folio *btrfs_alloc_compr_folio(void) { struct folio *folio = NULL; spin_lock(&compr_pool.lock); if (compr_pool.count > 0) { folio = list_first_entry(&compr_pool.list, struct folio, lru); list_del_init(&folio->lru); compr_pool.count--; } spin_unlock(&compr_pool.lock); if (folio) return folio; return folio_alloc(GFP_NOFS, 0); } void btrfs_free_compr_folio(struct folio *folio) { bool do_free = false; spin_lock(&compr_pool.lock); if (compr_pool.count > compr_pool.thresh) { do_free = true; } else { list_add(&folio->lru, &compr_pool.list); compr_pool.count++; } spin_unlock(&compr_pool.lock); if (!do_free) return; ASSERT(folio_ref_count(folio) == 1); folio_put(folio); } static void end_bbio_compressed_read(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); blk_status_t status = bbio->bio.bi_status; if (!status) status = errno_to_blk_status(btrfs_decompress_bio(cb)); btrfs_free_compressed_folios(cb); btrfs_bio_end_io(cb->orig_bbio, status); bio_put(&bbio->bio); } /* * Clear the writeback bits on all of the file * pages for a compressed write */ static noinline void end_compressed_writeback(const struct compressed_bio *cb) { struct inode *inode = &cb->bbio.inode->vfs_inode; struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); unsigned long index = cb->start >> PAGE_SHIFT; unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; struct folio_batch fbatch; const int error = blk_status_to_errno(cb->bbio.bio.bi_status); int i; int ret; if (error) mapping_set_error(inode->i_mapping, error); folio_batch_init(&fbatch); while (index <= end_index) { ret = filemap_get_folios(inode->i_mapping, &index, end_index, &fbatch); if (ret == 0) return; for (i = 0; i < ret; i++) { struct folio *folio = fbatch.folios[i]; btrfs_folio_clamp_clear_writeback(fs_info, folio, cb->start, cb->len); } folio_batch_release(&fbatch); } /* the inode may be gone now */ } static void btrfs_finish_compressed_write_work(struct work_struct *work) { struct compressed_bio *cb = container_of(work, struct compressed_bio, write_end_work); btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len, cb->bbio.bio.bi_status == BLK_STS_OK); if (cb->writeback) end_compressed_writeback(cb); /* Note, our inode could be gone now */ btrfs_free_compressed_folios(cb); bio_put(&cb->bbio.bio); } /* * Do the cleanup once all the compressed pages hit the disk. This will clear * writeback on the file pages and free the compressed pages. * * This also calls the writeback end hooks for the file pages so that metadata * and checksums can be updated in the file. */ static void end_bbio_compressed_write(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; queue_work(fs_info->compressed_write_workers, &cb->write_end_work); } static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) { struct bio *bio = &cb->bbio.bio; u32 offset = 0; while (offset < cb->compressed_len) { int ret; u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE); /* Maximum compressed extent is smaller than bio size limit. */ ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT], len, 0); ASSERT(ret); offset += len; } } /* * worker function to build and submit bios for previously compressed pages. * The corresponding pages in the inode should be marked for writeback * and the compressed pages should have a reference on them for dropping * when the IO is complete. * * This also checksums the file bytes and gets things ready for * the end io hooks. */ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, struct folio **compressed_folios, unsigned int nr_folios, blk_opf_t write_flags, bool writeback) { struct btrfs_inode *inode = ordered->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct compressed_bio *cb; ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize)); ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize)); cb = alloc_compressed_bio(inode, ordered->file_offset, REQ_OP_WRITE | write_flags, end_bbio_compressed_write); cb->start = ordered->file_offset; cb->len = ordered->num_bytes; cb->compressed_folios = compressed_folios; cb->compressed_len = ordered->disk_num_bytes; cb->writeback = writeback; INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work); cb->nr_folios = nr_folios; cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT; cb->bbio.ordered = ordered; btrfs_add_compressed_bio_folios(cb); btrfs_submit_bbio(&cb->bbio, 0); } /* * Add extra pages in the same compressed file extent so that we don't need to * re-read the same extent again and again. * * NOTE: this won't work well for subpage, as for subpage read, we lock the * full page then submit bio for each compressed/regular extents. * * This means, if we have several sectors in the same page points to the same * on-disk compressed data, we will re-read the same extent many times and * this function can only help for the next page. */ static noinline int add_ra_bio_pages(struct inode *inode, u64 compressed_end, struct compressed_bio *cb, int *memstall, unsigned long *pflags) { struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); unsigned long end_index; struct bio *orig_bio = &cb->orig_bbio->bio; u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size; u64 isize = i_size_read(inode); int ret; struct folio *folio; struct extent_map *em; struct address_space *mapping = inode->i_mapping; struct extent_map_tree *em_tree; struct extent_io_tree *tree; int sectors_missed = 0; em_tree = &BTRFS_I(inode)->extent_tree; tree = &BTRFS_I(inode)->io_tree; if (isize == 0) return 0; /* * For current subpage support, we only support 64K page size, * which means maximum compressed extent size (128K) is just 2x page * size. * This makes readahead less effective, so here disable readahead for * subpage for now, until full compressed write is supported. */ if (fs_info->sectorsize < PAGE_SIZE) return 0; end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; while (cur < compressed_end) { u64 page_end; u64 pg_index = cur >> PAGE_SHIFT; u32 add_size; if (pg_index > end_index) break; folio = __filemap_get_folio(mapping, pg_index, 0, 0); if (!IS_ERR(folio)) { u64 folio_sz = folio_size(folio); u64 offset = offset_in_folio(folio, cur); folio_put(folio); sectors_missed += (folio_sz - offset) >> fs_info->sectorsize_bits; /* Beyond threshold, no need to continue */ if (sectors_missed > 4) break; /* * Jump to next page start as we already have page for * current offset. */ cur += (folio_sz - offset); continue; } folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, ~__GFP_FS), 0); if (!folio) break; if (filemap_add_folio(mapping, folio, pg_index, GFP_NOFS)) { /* There is already a page, skip to page end */ cur += folio_size(folio); folio_put(folio); continue; } if (!*memstall && folio_test_workingset(folio)) { psi_memstall_enter(pflags); *memstall = 1; } ret = set_folio_extent_mapped(folio); if (ret < 0) { folio_unlock(folio); folio_put(folio); break; } page_end = (pg_index << PAGE_SHIFT) + folio_size(folio) - 1; lock_extent(tree, cur, page_end, NULL); read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur); read_unlock(&em_tree->lock); /* * At this point, we have a locked page in the page cache for * these bytes in the file. But, we have to make sure they map * to this compressed extent on disk. */ if (!em || cur < em->start || (cur + fs_info->sectorsize > extent_map_end(em)) || (extent_map_block_start(em) >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) { free_extent_map(em); unlock_extent(tree, cur, page_end, NULL); folio_unlock(folio); folio_put(folio); break; } add_size = min(em->start + em->len, page_end + 1) - cur; free_extent_map(em); unlock_extent(tree, cur, page_end, NULL); if (folio->index == end_index) { size_t zero_offset = offset_in_folio(folio, isize); if (zero_offset) { int zeros; zeros = folio_size(folio) - zero_offset; folio_zero_range(folio, zero_offset, zeros); } } if (!bio_add_folio(orig_bio, folio, add_size, offset_in_folio(folio, cur))) { folio_unlock(folio); folio_put(folio); break; } /* * If it's subpage, we also need to increase its * subpage::readers number, as at endio we will decrease * subpage::readers and to unlock the page. */ if (fs_info->sectorsize < PAGE_SIZE) btrfs_subpage_start_reader(fs_info, folio, cur, add_size); folio_put(folio); cur += add_size; } return 0; } /* * for a compressed read, the bio we get passed has all the inode pages * in it. We don't actually do IO on those pages but allocate new ones * to hold the compressed pages on disk. * * bio->bi_iter.bi_sector points to the compressed extent on disk * bio->bi_io_vec points to all of the inode pages * * After the compressed pages are read, we copy the bytes into the * bio we were passed and then call the bio end_io calls */ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) { struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_map_tree *em_tree = &inode->extent_tree; struct compressed_bio *cb; unsigned int compressed_len; u64 file_offset = bbio->file_offset; u64 em_len; u64 em_start; struct extent_map *em; unsigned long pflags; int memstall = 0; blk_status_t ret; int ret2; /* we need the actual starting offset of this extent in the file */ read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize); read_unlock(&em_tree->lock); if (!em) { ret = BLK_STS_IOERR; goto out; } ASSERT(extent_map_is_compressed(em)); compressed_len = em->disk_num_bytes; cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ, end_bbio_compressed_read); cb->start = em->start - em->offset; em_len = em->len; em_start = em->start; cb->len = bbio->bio.bi_iter.bi_size; cb->compressed_len = compressed_len; cb->compress_type = extent_map_compression(em); cb->orig_bbio = bbio; free_extent_map(em); cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS); if (!cb->compressed_folios) { ret = BLK_STS_RESOURCE; goto out_free_bio; } ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios); if (ret2) { ret = BLK_STS_RESOURCE; goto out_free_compressed_pages; } add_ra_bio_pages(&inode->vfs_inode, em_start + em_len, cb, &memstall, &pflags); /* include any pages we added in add_ra-bio_pages */ cb->len = bbio->bio.bi_iter.bi_size; cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector; btrfs_add_compressed_bio_folios(cb); if (memstall) psi_memstall_leave(&pflags); btrfs_submit_bbio(&cb->bbio, 0); return; out_free_compressed_pages: kfree(cb->compressed_folios); out_free_bio: bio_put(&cb->bbio.bio); out: btrfs_bio_end_io(bbio, ret); } /* * Heuristic uses systematic sampling to collect data from the input data * range, the logic can be tuned by the following constants: * * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample * @SAMPLING_INTERVAL - range from which the sampled data can be collected */ #define SAMPLING_READ_SIZE (16) #define SAMPLING_INTERVAL (256) /* * For statistical analysis of the input data we consider bytes that form a * Galois Field of 256 objects. Each object has an attribute count, ie. how * many times the object appeared in the sample. */ #define BUCKET_SIZE (256) /* * The size of the sample is based on a statistical sampling rule of thumb. * The common way is to perform sampling tests as long as the number of * elements in each cell is at least 5. * * Instead of 5, we choose 32 to obtain more accurate results. * If the data contain the maximum number of symbols, which is 256, we obtain a * sample size bound by 8192. * * For a sample of at most 8KB of data per data range: 16 consecutive bytes * from up to 512 locations. */ #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \ SAMPLING_READ_SIZE / SAMPLING_INTERVAL) struct bucket_item { u32 count; }; struct heuristic_ws { /* Partial copy of input data */ u8 *sample; u32 sample_size; /* Buckets store counters for each byte value */ struct bucket_item *bucket; /* Sorting buffer */ struct bucket_item *bucket_b; struct list_head list; }; static struct workspace_manager heuristic_wsm; static void free_heuristic_ws(struct list_head *ws) { struct heuristic_ws *workspace; workspace = list_entry(ws, struct heuristic_ws, list); kvfree(workspace->sample); kfree(workspace->bucket); kfree(workspace->bucket_b); kfree(workspace); } static struct list_head *alloc_heuristic_ws(unsigned int level) { struct heuristic_ws *ws; ws = kzalloc(sizeof(*ws), GFP_KERNEL); if (!ws) return ERR_PTR(-ENOMEM); ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL); if (!ws->sample) goto fail; ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL); if (!ws->bucket) goto fail; ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL); if (!ws->bucket_b) goto fail; INIT_LIST_HEAD(&ws->list); return &ws->list; fail: free_heuristic_ws(&ws->list); return ERR_PTR(-ENOMEM); } const struct btrfs_compress_op btrfs_heuristic_compress = { .workspace_manager = &heuristic_wsm, }; static const struct btrfs_compress_op * const btrfs_compress_op[] = { /* The heuristic is represented as compression type 0 */ &btrfs_heuristic_compress, &btrfs_zlib_compress, &btrfs_lzo_compress, &btrfs_zstd_compress, }; static struct list_head *alloc_workspace(int type, unsigned int level) { switch (type) { case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level); case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level); case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(level); case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static void free_workspace(int type, struct list_head *ws) { switch (type) { case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws); case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws); case BTRFS_COMPRESS_LZO: return lzo_free_workspace(ws); case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } static void btrfs_init_workspace_manager(int type) { struct workspace_manager *wsm; struct list_head *workspace; wsm = btrfs_compress_op[type]->workspace_manager; INIT_LIST_HEAD(&wsm->idle_ws); spin_lock_init(&wsm->ws_lock); atomic_set(&wsm->total_ws, 0); init_waitqueue_head(&wsm->ws_wait); /* * Preallocate one workspace for each compression type so we can * guarantee forward progress in the worst case */ workspace = alloc_workspace(type, 0); if (IS_ERR(workspace)) { pr_warn( "BTRFS: cannot preallocate compression workspace, will try later\n"); } else { atomic_set(&wsm->total_ws, 1); wsm->free_ws = 1; list_add(workspace, &wsm->idle_ws); } } static void btrfs_cleanup_workspace_manager(int type) { struct workspace_manager *wsman; struct list_head *ws; wsman = btrfs_compress_op[type]->workspace_manager; while (!list_empty(&wsman->idle_ws)) { ws = wsman->idle_ws.next; list_del(ws); free_workspace(type, ws); atomic_dec(&wsman->total_ws); } } /* * This finds an available workspace or allocates a new one. * If it's not possible to allocate a new one, waits until there's one. * Preallocation makes a forward progress guarantees and we do not return * errors. */ struct list_head *btrfs_get_workspace(int type, unsigned int level) { struct workspace_manager *wsm; struct list_head *workspace; int cpus = num_online_cpus(); unsigned nofs_flag; struct list_head *idle_ws; spinlock_t *ws_lock; atomic_t *total_ws; wait_queue_head_t *ws_wait; int *free_ws; wsm = btrfs_compress_op[type]->workspace_manager; idle_ws = &wsm->idle_ws; ws_lock = &wsm->ws_lock; total_ws = &wsm->total_ws; ws_wait = &wsm->ws_wait; free_ws = &wsm->free_ws; again: spin_lock(ws_lock); if (!list_empty(idle_ws)) { workspace = idle_ws->next; list_del(workspace); (*free_ws)--; spin_unlock(ws_lock); return workspace; } if (atomic_read(total_ws) > cpus) { DEFINE_WAIT(wait); spin_unlock(ws_lock); prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); if (atomic_read(total_ws) > cpus && !*free_ws) schedule(); finish_wait(ws_wait, &wait); goto again; } atomic_inc(total_ws); spin_unlock(ws_lock); /* * Allocation helpers call vmalloc that can't use GFP_NOFS, so we have * to turn it off here because we might get called from the restricted * context of btrfs_compress_bio/btrfs_compress_pages */ nofs_flag = memalloc_nofs_save(); workspace = alloc_workspace(type, level); memalloc_nofs_restore(nofs_flag); if (IS_ERR(workspace)) { atomic_dec(total_ws); wake_up(ws_wait); /* * Do not return the error but go back to waiting. There's a * workspace preallocated for each type and the compression * time is bounded so we get to a workspace eventually. This * makes our caller's life easier. * * To prevent silent and low-probability deadlocks (when the * initial preallocation fails), check if there are any * workspaces at all. */ if (atomic_read(total_ws) == 0) { static DEFINE_RATELIMIT_STATE(_rs, /* once per minute */ 60 * HZ, /* no burst */ 1); if (__ratelimit(&_rs)) { pr_warn("BTRFS: no compression workspaces, low memory, retrying\n"); } } goto again; } return workspace; } static struct list_head *get_workspace(int type, int level) { switch (type) { case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level); case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level); case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level); case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } /* * put a workspace struct back on the list or free it if we have enough * idle ones sitting around */ void btrfs_put_workspace(int type, struct list_head *ws) { struct workspace_manager *wsm; struct list_head *idle_ws; spinlock_t *ws_lock; atomic_t *total_ws; wait_queue_head_t *ws_wait; int *free_ws; wsm = btrfs_compress_op[type]->workspace_manager; idle_ws = &wsm->idle_ws; ws_lock = &wsm->ws_lock; total_ws = &wsm->total_ws; ws_wait = &wsm->ws_wait; free_ws = &wsm->free_ws; spin_lock(ws_lock); if (*free_ws <= num_online_cpus()) { list_add(ws, idle_ws); (*free_ws)++; spin_unlock(ws_lock); goto wake; } spin_unlock(ws_lock); free_workspace(type, ws); atomic_dec(total_ws); wake: cond_wake_up(ws_wait); } static void put_workspace(int type, struct list_head *ws) { switch (type) { case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws); case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws); case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws); case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws); default: /* * This can't happen, the type is validated several times * before we get here. */ BUG(); } } /* * Adjust @level according to the limits of the compression algorithm or * fallback to default */ static unsigned int btrfs_compress_set_level(int type, unsigned level) { const struct btrfs_compress_op *ops = btrfs_compress_op[type]; if (level == 0) level = ops->default_level; else level = min(level, ops->max_level); return level; } /* Wrapper around find_get_page(), with extra error message. */ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, struct folio **in_folio_ret) { struct folio *in_folio; /* * The compressed write path should have the folio locked already, thus * we only need to grab one reference. */ in_folio = filemap_get_folio(mapping, start >> PAGE_SHIFT); if (IS_ERR(in_folio)) { struct btrfs_inode *inode = BTRFS_I(mapping->host); btrfs_crit(inode->root->fs_info, "failed to get page cache, root %lld ino %llu file offset %llu", btrfs_root_id(inode->root), btrfs_ino(inode), start); return -ENOENT; } *in_folio_ret = in_folio; return 0; } /* * Given an address space and start and length, compress the bytes into @pages * that are allocated on demand. * * @type_level is encoded algorithm and level, where level 0 means whatever * default the algorithm chooses and is opaque here; * - compression algo are 0-3 * - the level are bits 4-7 * * @out_pages is an in/out parameter, holds maximum number of pages to allocate * and returns number of actually allocated pages * * @total_in is used to return the number of bytes actually read. It * may be smaller than the input length if we had to exit early because we * ran out of room in the pages array or because we cross the * max_out threshold. * * @total_out is an in/out parameter, must be set to the input length and will * be also used to return the total number of compressed bytes */ int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping, u64 start, struct folio **folios, unsigned long *out_folios, unsigned long *total_in, unsigned long *total_out) { int type = btrfs_compress_type(type_level); int level = btrfs_compress_level(type_level); struct list_head *workspace; int ret; level = btrfs_compress_set_level(type, level); workspace = get_workspace(type, level); ret = compression_compress_pages(type, workspace, mapping, start, folios, out_folios, total_in, total_out); put_workspace(type, workspace); return ret; } static int btrfs_decompress_bio(struct compressed_bio *cb) { struct list_head *workspace; int ret; int type = cb->compress_type; workspace = get_workspace(type, 0); ret = compression_decompress_bio(workspace, cb); put_workspace(type, workspace); if (!ret) zero_fill_bio(&cb->orig_bbio->bio); return ret; } /* * a less complex decompression routine. Our compressed data fits in a * single page, and we want to read a single page out of it. * start_byte tells us the offset into the compressed data we're interested in */ int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio); struct list_head *workspace; const u32 sectorsize = fs_info->sectorsize; int ret; /* * The full destination page range should not exceed the page size. * And the @destlen should not exceed sectorsize, as this is only called for * inline file extents, which should not exceed sectorsize. */ ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize); workspace = get_workspace(type, 0); ret = compression_decompress(type, workspace, data_in, dest_folio, dest_pgoff, srclen, destlen); put_workspace(type, workspace); return ret; } int __init btrfs_init_compress(void) { if (bioset_init(&btrfs_compressed_bioset, BIO_POOL_SIZE, offsetof(struct compressed_bio, bbio.bio), BIOSET_NEED_BVECS)) return -ENOMEM; compr_pool.shrinker = shrinker_alloc(SHRINKER_NONSLAB, "btrfs-compr-pages"); if (!compr_pool.shrinker) return -ENOMEM; btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE); btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB); btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO); zstd_init_workspace_manager(); spin_lock_init(&compr_pool.lock); INIT_LIST_HEAD(&compr_pool.list); compr_pool.count = 0; /* 128K / 4K = 32, for 8 threads is 256 pages. */ compr_pool.thresh = BTRFS_MAX_COMPRESSED / PAGE_SIZE * 8; compr_pool.shrinker->count_objects = btrfs_compr_pool_count; compr_pool.shrinker->scan_objects = btrfs_compr_pool_scan; compr_pool.shrinker->batch = 32; compr_pool.shrinker->seeks = DEFAULT_SEEKS; shrinker_register(compr_pool.shrinker); return 0; } void __cold btrfs_exit_compress(void) { /* For now scan drains all pages and does not touch the parameters. */ btrfs_compr_pool_scan(NULL, NULL); shrinker_free(compr_pool.shrinker); btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE); btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB); btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO); zstd_cleanup_workspace_manager(); bioset_exit(&btrfs_compressed_bioset); } /* * Copy decompressed data from working buffer to pages. * * @buf: The decompressed data buffer * @buf_len: The decompressed data length * @decompressed: Number of bytes that are already decompressed inside the * compressed extent * @cb: The compressed extent descriptor * @orig_bio: The original bio that the caller wants to read for * * An easier to understand graph is like below: * * |<- orig_bio ->| |<- orig_bio->| * |<------- full decompressed extent ----->| * |<----------- @cb range ---->| * | |<-- @buf_len -->| * |<--- @decompressed --->| * * Note that, @cb can be a subpage of the full decompressed extent, but * @cb->start always has the same as the orig_file_offset value of the full * decompressed extent. * * When reading compressed extent, we have to read the full compressed extent, * while @orig_bio may only want part of the range. * Thus this function will ensure only data covered by @orig_bio will be copied * to. * * Return 0 if we have copied all needed contents for @orig_bio. * Return >0 if we need continue decompress. */ int btrfs_decompress_buf2page(const char *buf, u32 buf_len, struct compressed_bio *cb, u32 decompressed) { struct bio *orig_bio = &cb->orig_bbio->bio; /* Offset inside the full decompressed extent */ u32 cur_offset; cur_offset = decompressed; /* The main loop to do the copy */ while (cur_offset < decompressed + buf_len) { struct bio_vec bvec; size_t copy_len; u32 copy_start; /* Offset inside the full decompressed extent */ u32 bvec_offset; bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter); /* * cb->start may underflow, but subtracting that value can still * give us correct offset inside the full decompressed extent. */ bvec_offset = page_offset(bvec.bv_page) + bvec.bv_offset - cb->start; /* Haven't reached the bvec range, exit */ if (decompressed + buf_len <= bvec_offset) return 1; copy_start = max(cur_offset, bvec_offset); copy_len = min(bvec_offset + bvec.bv_len, decompressed + buf_len) - copy_start; ASSERT(copy_len); /* * Extra range check to ensure we didn't go beyond * @buf + @buf_len. */ ASSERT(copy_start - decompressed < buf_len); memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + copy_start - decompressed, copy_len); cur_offset += copy_len; bio_advance(orig_bio, copy_len); /* Finished the bio */ if (!orig_bio->bi_iter.bi_size) return 0; } return 1; } /* * Shannon Entropy calculation * * Pure byte distribution analysis fails to determine compressibility of data. * Try calculating entropy to estimate the average minimum number of bits * needed to encode the sampled data. * * For convenience, return the percentage of needed bits, instead of amount of * bits directly. * * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy * and can be compressible with high probability * * @ENTROPY_LVL_HIGH - data are not compressible with high probability * * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate. */ #define ENTROPY_LVL_ACEPTABLE (65) #define ENTROPY_LVL_HIGH (80) /* * For increasead precision in shannon_entropy calculation, * let's do pow(n, M) to save more digits after comma: * * - maximum int bit length is 64 * - ilog2(MAX_SAMPLE_SIZE) -> 13 * - 13 * 4 = 52 < 64 -> M = 4 * * So use pow(n, 4). */ static inline u32 ilog2_w(u64 n) { return ilog2(n * n * n * n); } static u32 shannon_entropy(struct heuristic_ws *ws) { const u32 entropy_max = 8 * ilog2_w(2); u32 entropy_sum = 0; u32 p, p_base, sz_base; u32 i; sz_base = ilog2_w(ws->sample_size); for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) { p = ws->bucket[i].count; p_base = ilog2_w(p); entropy_sum += p * (sz_base - p_base); } entropy_sum /= ws->sample_size; return entropy_sum * 100 / entropy_max; } #define RADIX_BASE 4U #define COUNTERS_SIZE (1U << RADIX_BASE) static u8 get4bits(u64 num, int shift) { u8 low4bits; num >>= shift; /* Reverse order */ low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE); return low4bits; } /* * Use 4 bits as radix base * Use 16 u32 counters for calculating new position in buf array * * @array - array that will be sorted * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size */ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, int num) { u64 max_num; u64 buf_num; u32 counters[COUNTERS_SIZE]; u32 new_addr; u32 addr; int bitlen; int shift; int i; /* * Try avoid useless loop iterations for small numbers stored in big * counters. Example: 48 33 4 ... in 64bit array */ max_num = array[0].count; for (i = 1; i < num; i++) { buf_num = array[i].count; if (buf_num > max_num) max_num = buf_num; } buf_num = ilog2(max_num); bitlen = ALIGN(buf_num, RADIX_BASE * 2); shift = 0; while (shift < bitlen) { memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i++) { buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } for (i = 1; i < COUNTERS_SIZE; i++) counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; array_buf[new_addr] = array[i]; } shift += RADIX_BASE; /* * Normal radix expects to move data from a temporary array, to * the main one. But that requires some CPU time. Avoid that * by doing another sort iteration to original array instead of * memcpy() */ memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i ++) { buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } for (i = 1; i < COUNTERS_SIZE; i++) counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; array[new_addr] = array_buf[i]; } shift += RADIX_BASE; } } /* * Size of the core byte set - how many bytes cover 90% of the sample * * There are several types of structured binary data that use nearly all byte * values. The distribution can be uniform and counts in all buckets will be * nearly the same (eg. encrypted data). Unlikely to be compressible. * * Other possibility is normal (Gaussian) distribution, where the data could * be potentially compressible, but we have to take a few more steps to decide * how much. * * @BYTE_CORE_SET_LOW - main part of byte values repeated frequently, * compression algo can easy fix that * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high * probability is not compressible */ #define BYTE_CORE_SET_LOW (64) #define BYTE_CORE_SET_HIGH (200) static int byte_core_set_size(struct heuristic_ws *ws) { u32 i; u32 coreset_sum = 0; const u32 core_set_threshold = ws->sample_size * 90 / 100; struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; if (coreset_sum > core_set_threshold) return i; for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) { coreset_sum += bucket[i].count; if (coreset_sum > core_set_threshold) break; } return i; } /* * Count byte values in buckets. * This heuristic can detect textual data (configs, xml, json, html, etc). * Because in most text-like data byte set is restricted to limited number of * possible characters, and that restriction in most cases makes data easy to * compress. * * @BYTE_SET_THRESHOLD - consider all data within this byte set size: * less - compressible * more - need additional analysis */ #define BYTE_SET_THRESHOLD (64) static u32 byte_set_size(const struct heuristic_ws *ws) { u32 i; u32 byte_set_size = 0; for (i = 0; i < BYTE_SET_THRESHOLD; i++) { if (ws->bucket[i].count > 0) byte_set_size++; } /* * Continue collecting count of byte values in buckets. If the byte * set size is bigger then the threshold, it's pointless to continue, * the detection technique would fail for this type of data. */ for (; i < BUCKET_SIZE; i++) { if (ws->bucket[i].count > 0) { byte_set_size++; if (byte_set_size > BYTE_SET_THRESHOLD) return byte_set_size; } } return byte_set_size; } static bool sample_repeated_patterns(struct heuristic_ws *ws) { const u32 half_of_sample = ws->sample_size / 2; const u8 *data = ws->sample; return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0; } static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, struct heuristic_ws *ws) { struct page *page; u64 index, index_end; u32 i, curr_sample_pos; u8 *in_data; /* * Compression handles the input data by chunks of 128KiB * (defined by BTRFS_MAX_UNCOMPRESSED) * * We do the same for the heuristic and loop over the whole range. * * MAX_SAMPLE_SIZE - calculated under assumption that heuristic will * process no more than BTRFS_MAX_UNCOMPRESSED at a time. */ if (end - start > BTRFS_MAX_UNCOMPRESSED) end = start + BTRFS_MAX_UNCOMPRESSED; index = start >> PAGE_SHIFT; index_end = end >> PAGE_SHIFT; /* Don't miss unaligned end */ if (!PAGE_ALIGNED(end)) index_end++; curr_sample_pos = 0; while (index < index_end) { page = find_get_page(inode->i_mapping, index); in_data = kmap_local_page(page); /* Handle case where the start is not aligned to PAGE_SIZE */ i = start % PAGE_SIZE; while (i < PAGE_SIZE - SAMPLING_READ_SIZE) { /* Don't sample any garbage from the last page */ if (start > end - SAMPLING_READ_SIZE) break; memcpy(&ws->sample[curr_sample_pos], &in_data[i], SAMPLING_READ_SIZE); i += SAMPLING_INTERVAL; start += SAMPLING_INTERVAL; curr_sample_pos += SAMPLING_READ_SIZE; } kunmap_local(in_data); put_page(page); index++; } ws->sample_size = curr_sample_pos; } /* * Compression heuristic. * * The following types of analysis can be performed: * - detect mostly zero data * - detect data with low "byte set" size (text, etc) * - detect data with low/high "core byte" set * * Return non-zero if the compression should be done, 0 otherwise. */ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end) { struct list_head *ws_list = get_workspace(0, 0); struct heuristic_ws *ws; u32 i; u8 byte; int ret = 0; ws = list_entry(ws_list, struct heuristic_ws, list); heuristic_collect_sample(&inode->vfs_inode, start, end, ws); if (sample_repeated_patterns(ws)) { ret = 1; goto out; } memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE); for (i = 0; i < ws->sample_size; i++) { byte = ws->sample[i]; ws->bucket[byte].count++; } i = byte_set_size(ws); if (i < BYTE_SET_THRESHOLD) { ret = 2; goto out; } i = byte_core_set_size(ws); if (i <= BYTE_CORE_SET_LOW) { ret = 3; goto out; } if (i >= BYTE_CORE_SET_HIGH) { ret = 0; goto out; } i = shannon_entropy(ws); if (i <= ENTROPY_LVL_ACEPTABLE) { ret = 4; goto out; } /* * For the levels below ENTROPY_LVL_HIGH, additional analysis would be * needed to give green light to compression. * * For now just assume that compression at that level is not worth the * resources because: * * 1. it is possible to defrag the data later * * 2. the data would turn out to be hardly compressible, eg. 150 byte * values, every bucket has counter at level ~54. The heuristic would * be confused. This can happen when data have some internal repeated * patterns like "abbacbbc...". This can be detected by analyzing * pairs of bytes, which is too costly. */ if (i < ENTROPY_LVL_HIGH) { ret = 5; goto out; } else { ret = 0; goto out; } out: put_workspace(0, ws_list); return ret; } /* * Convert the compression suffix (eg. after "zlib" starting with ":") to * level, unrecognized string will set the default level */ unsigned int btrfs_compress_str2level(unsigned int type, const char *str) { unsigned int level = 0; int ret; if (!type) return 0; if (str[0] == ':') { ret = kstrtouint(str + 1, 10, &level); if (ret) level = 0; } level = btrfs_compress_set_level(type, level); return level; }
15 10 2 3 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 // SPDX-License-Identifier: GPL-2.0-only #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/types.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <linux/in6.h> #include <net/ip.h> int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack) { *ip_proto = nla_get_u8(attr); switch (*ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: return 0; case IPPROTO_ICMP: if (family != AF_INET) break; return 0; #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: if (family != AF_INET6) break; return 0; #endif } NL_SET_ERR_MSG(extack, "Unsupported ip proto"); return -EOPNOTSUPP; } EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
4 368 370 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Checksumming functions for IPv6 * * Authors: Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Borrows very liberally from tcp.c and ip.c, see those * files for more names. */ /* * Fixes: * * Ralf Baechle : generic ipv6 checksum * <ralf@waldorf-gmbh.de> */ #ifndef _CHECKSUM_IPV6_H #define _CHECKSUM_IPV6_H #include <asm/types.h> #include <asm/byteorder.h> #include <net/ip.h> #include <asm/checksum.h> #include <linux/in6.h> #include <linux/tcp.h> #include <linux/ipv6.h> #ifndef _HAVE_ARCH_IPV6_CSUM __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum csum); #endif static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) { return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, proto, 0)); } static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } static inline void __tcp_v6_send_check(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct tcphdr *th = tcp_hdr(skb); th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); ipv6h->payload_len = 0; th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0); } static inline __sum16 udp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base); } void udp6_set_csum(bool nocheck, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len); int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); #endif
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 /* * net/tipc/bcast.h: Include file for TIPC broadcast code * * Copyright (c) 2003-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H #include "core.h" struct tipc_node; struct tipc_msg; struct tipc_nl_msg; struct tipc_nlist; struct tipc_nitem; extern const char tipc_bclink_name[]; extern unsigned long sysctl_tipc_bc_retruni; #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) #define BCLINK_MODE_BCAST 0x1 #define BCLINK_MODE_RCAST 0x2 #define BCLINK_MODE_SEL 0x4 struct tipc_nlist { struct list_head list; u32 self; u16 remote; bool local; }; void tipc_nlist_init(struct tipc_nlist *nl, u32 self); void tipc_nlist_purge(struct tipc_nlist *nl); void tipc_nlist_add(struct tipc_nlist *nl, u32 node); void tipc_nlist_del(struct tipc_nlist *nl, u32 node); /* Cookie to be used between socket and broadcast layer * @rcast: replicast (instead of broadcast) was used at previous xmit * @mandatory: broadcast/replicast indication was set by user * @deferredq: defer queue to make message in order * @expires: re-evaluate non-mandatory transmit method if we are past this */ struct tipc_mc_method { bool rcast; bool mandatory; struct sk_buff_head deferredq; unsigned long expires; }; int tipc_bcast_init(struct net *net); void tipc_bcast_stop(struct net *net); void tipc_bcast_add_peer(struct net *net, struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); int tipc_bcast_get_mtu(struct net *net); void tipc_bcast_toggle_rcast(struct net *net, bool supp); int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_mc_method *method, struct tipc_nlist *dests, u16 *cong_link_cnt); int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *retrq); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *bcl); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l); u32 tipc_bcast_get_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq); static inline void tipc_bcast_lock(struct net *net) { spin_lock_bh(&tipc_net(net)->bclock); } static inline void tipc_bcast_unlock(struct net *net) { spin_unlock_bh(&tipc_net(net)->bclock); } static inline struct tipc_link *tipc_bc_sndlink(struct net *net) { return tipc_net(net)->bcl; } #endif
565 344 969 717 821 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 #undef TRACE_SYSTEM #define TRACE_SYSTEM qdisc #if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_QDISC_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/tracepoint.h> #include <linux/ftrace.h> #include <linux/pkt_sched.h> #include <net/sch_generic.h> TRACE_EVENT(qdisc_dequeue, TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, int packets, struct sk_buff *skb), TP_ARGS(qdisc, txq, packets, skb), TP_STRUCT__entry( __field( struct Qdisc *, qdisc ) __field(const struct netdev_queue *, txq ) __field( int, packets ) __field( void *, skbaddr ) __field( int, ifindex ) __field( u32, handle ) __field( u32, parent ) __field( unsigned long, txq_state) ), /* skb==NULL indicate packets dequeued was 0, even when packets==1 */ TP_fast_assign( __entry->qdisc = qdisc; __entry->txq = txq; __entry->packets = skb ? packets : 0; __entry->skbaddr = skb; __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; __entry->handle = qdisc->handle; __entry->parent = qdisc->parent; __entry->txq_state = txq->state; ), TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", __entry->ifindex, __entry->handle, __entry->parent, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); TRACE_EVENT(qdisc_enqueue, TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), TP_ARGS(qdisc, txq, skb), TP_STRUCT__entry( __field(struct Qdisc *, qdisc) __field(const struct netdev_queue *, txq) __field(void *, skbaddr) __field(int, ifindex) __field(u32, handle) __field(u32, parent) ), TP_fast_assign( __entry->qdisc = qdisc; __entry->txq = txq; __entry->skbaddr = skb; __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; __entry->handle = qdisc->handle; __entry->parent = qdisc->parent; ), TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%p", __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) ); TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), TP_ARGS(q), TP_STRUCT__entry( __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" ) __string( kind, q->ops->id ) __field( u32, parent ) __field( u32, handle ) ), TP_fast_assign( __assign_str(dev); __assign_str(kind); __entry->parent = q->parent; __entry->handle = q->handle; ), TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev), __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent), TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle)) ); TRACE_EVENT(qdisc_destroy, TP_PROTO(struct Qdisc *q), TP_ARGS(q), TP_STRUCT__entry( __string( dev, qdisc_dev(q)->name ) __string( kind, q->ops->id ) __field( u32, parent ) __field( u32, handle ) ), TP_fast_assign( __assign_str(dev); __assign_str(kind); __entry->parent = q->parent; __entry->handle = q->handle; ), TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev), __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent), TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle)) ); TRACE_EVENT(qdisc_create, TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent), TP_ARGS(ops, dev, parent), TP_STRUCT__entry( __string( dev, dev->name ) __string( kind, ops->id ) __field( u32, parent ) ), TP_fast_assign( __assign_str(dev); __assign_str(kind); __entry->parent = parent; ), TP_printk("dev=%s kind=%s parent=%x:%x", __get_str(dev), __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent)) ); #endif /* _TRACE_QDISC_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 // SPDX-License-Identifier: GPL-2.0+ /* Siemens ID Mouse driver v0.6 Copyright (C) 2004-5 by Florian 'Floe' Echtler <echtler@fs.tum.de> and Andreas 'ad' Deresch <aderesch@fs.tum.de> Derived from the USB Skeleton driver 1.1, Copyright (C) 2003 Greg Kroah-Hartman (greg@kroah.com) Additional information provided by Martin Reising <Martin.Reising@natural-computing.de> */ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/completion.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> /* image constants */ #define WIDTH 225 #define HEIGHT 289 #define HEADER "P5 225 289 255 " #define IMGSIZE ((WIDTH * HEIGHT) + sizeof(HEADER)-1) #define DRIVER_SHORT "idmouse" #define DRIVER_AUTHOR "Florian 'Floe' Echtler <echtler@fs.tum.de>" #define DRIVER_DESC "Siemens ID Mouse FingerTIP Sensor Driver" /* minor number for misc USB devices */ #define USB_IDMOUSE_MINOR_BASE 132 /* vendor and device IDs */ #define ID_SIEMENS 0x0681 #define ID_IDMOUSE 0x0005 #define ID_CHERRY 0x0010 /* device ID table */ static const struct usb_device_id idmouse_table[] = { {USB_DEVICE(ID_SIEMENS, ID_IDMOUSE)}, /* Siemens ID Mouse (Professional) */ {USB_DEVICE(ID_SIEMENS, ID_CHERRY )}, /* Cherry FingerTIP ID Board */ {} /* terminating null entry */ }; /* sensor commands */ #define FTIP_RESET 0x20 #define FTIP_ACQUIRE 0x21 #define FTIP_RELEASE 0x22 #define FTIP_BLINK 0x23 /* LSB of value = blink pulse width */ #define FTIP_SCROLL 0x24 #define ftip_command(dev, command, value, index) \ usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), command, \ USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT, value, index, NULL, 0, 1000) MODULE_DEVICE_TABLE(usb, idmouse_table); /* structure to hold all of our device specific stuff */ struct usb_idmouse { struct usb_device *udev; /* save off the usb device pointer */ struct usb_interface *interface; /* the interface for this device */ unsigned char *bulk_in_buffer; /* the buffer to receive data */ size_t bulk_in_size; /* the maximum bulk packet size */ size_t orig_bi_size; /* same as above, but reported by the device */ __u8 bulk_in_endpointAddr; /* the address of the bulk in endpoint */ int open; /* if the port is open or not */ int present; /* if the device is not disconnected */ struct mutex lock; /* locks this structure */ }; /* local function prototypes */ static ssize_t idmouse_read(struct file *file, char __user *buffer, size_t count, loff_t * ppos); static int idmouse_open(struct inode *inode, struct file *file); static int idmouse_release(struct inode *inode, struct file *file); static int idmouse_probe(struct usb_interface *interface, const struct usb_device_id *id); static void idmouse_disconnect(struct usb_interface *interface); static int idmouse_suspend(struct usb_interface *intf, pm_message_t message); static int idmouse_resume(struct usb_interface *intf); /* file operation pointers */ static const struct file_operations idmouse_fops = { .owner = THIS_MODULE, .read = idmouse_read, .open = idmouse_open, .release = idmouse_release, .llseek = default_llseek, }; /* class driver information */ static struct usb_class_driver idmouse_class = { .name = "idmouse%d", .fops = &idmouse_fops, .minor_base = USB_IDMOUSE_MINOR_BASE, }; /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver idmouse_driver = { .name = DRIVER_SHORT, .probe = idmouse_probe, .disconnect = idmouse_disconnect, .suspend = idmouse_suspend, .resume = idmouse_resume, .reset_resume = idmouse_resume, .id_table = idmouse_table, .supports_autosuspend = 1, }; static int idmouse_create_image(struct usb_idmouse *dev) { int bytes_read; int bulk_read; int result; memcpy(dev->bulk_in_buffer, HEADER, sizeof(HEADER)-1); bytes_read = sizeof(HEADER)-1; /* reset the device and set a fast blink rate */ result = ftip_command(dev, FTIP_RELEASE, 0, 0); if (result < 0) goto reset; result = ftip_command(dev, FTIP_BLINK, 1, 0); if (result < 0) goto reset; /* initialize the sensor - sending this command twice */ /* significantly reduces the rate of failed reads */ result = ftip_command(dev, FTIP_ACQUIRE, 0, 0); if (result < 0) goto reset; result = ftip_command(dev, FTIP_ACQUIRE, 0, 0); if (result < 0) goto reset; /* start the readout - sending this command twice */ /* presumably enables the high dynamic range mode */ result = ftip_command(dev, FTIP_RESET, 0, 0); if (result < 0) goto reset; result = ftip_command(dev, FTIP_RESET, 0, 0); if (result < 0) goto reset; /* loop over a blocking bulk read to get data from the device */ while (bytes_read < IMGSIZE) { result = usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, dev->bulk_in_endpointAddr), dev->bulk_in_buffer + bytes_read, dev->bulk_in_size, &bulk_read, 5000); if (result < 0) { /* Maybe this error was caused by the increased packet size? */ /* Reset to the original value and tell userspace to retry. */ if (dev->bulk_in_size != dev->orig_bi_size) { dev->bulk_in_size = dev->orig_bi_size; result = -EAGAIN; } break; } if (signal_pending(current)) { result = -EINTR; break; } bytes_read += bulk_read; } /* check for valid image */ /* right border should be black (0x00) */ for (bytes_read = sizeof(HEADER)-1 + WIDTH-1; bytes_read < IMGSIZE; bytes_read += WIDTH) if (dev->bulk_in_buffer[bytes_read] != 0x00) return -EAGAIN; /* lower border should be white (0xFF) */ for (bytes_read = IMGSIZE-WIDTH; bytes_read < IMGSIZE-1; bytes_read++) if (dev->bulk_in_buffer[bytes_read] != 0xFF) return -EAGAIN; /* reset the device */ reset: ftip_command(dev, FTIP_RELEASE, 0, 0); /* should be IMGSIZE == 65040 */ dev_dbg(&dev->interface->dev, "read %d bytes fingerprint data\n", bytes_read); return result; } /* PM operations are nops as this driver does IO only during open() */ static int idmouse_suspend(struct usb_interface *intf, pm_message_t message) { return 0; } static int idmouse_resume(struct usb_interface *intf) { return 0; } static inline void idmouse_delete(struct usb_idmouse *dev) { kfree(dev->bulk_in_buffer); kfree(dev); } static int idmouse_open(struct inode *inode, struct file *file) { struct usb_idmouse *dev; struct usb_interface *interface; int result; /* get the interface from minor number and driver information */ interface = usb_find_interface(&idmouse_driver, iminor(inode)); if (!interface) return -ENODEV; /* get the device information block from the interface */ dev = usb_get_intfdata(interface); if (!dev) return -ENODEV; /* lock this device */ mutex_lock(&dev->lock); /* check if already open */ if (dev->open) { /* already open, so fail */ result = -EBUSY; } else { /* create a new image and check for success */ result = usb_autopm_get_interface(interface); if (result) goto error; result = idmouse_create_image(dev); usb_autopm_put_interface(interface); if (result) goto error; /* increment our usage count for the driver */ ++dev->open; /* save our object in the file's private structure */ file->private_data = dev; } error: /* unlock this device */ mutex_unlock(&dev->lock); return result; } static int idmouse_release(struct inode *inode, struct file *file) { struct usb_idmouse *dev; dev = file->private_data; if (dev == NULL) return -ENODEV; /* lock our device */ mutex_lock(&dev->lock); --dev->open; if (!dev->present) { /* the device was unplugged before the file was released */ mutex_unlock(&dev->lock); idmouse_delete(dev); } else { mutex_unlock(&dev->lock); } return 0; } static ssize_t idmouse_read(struct file *file, char __user *buffer, size_t count, loff_t * ppos) { struct usb_idmouse *dev = file->private_data; int result; /* lock this object */ mutex_lock(&dev->lock); /* verify that the device wasn't unplugged */ if (!dev->present) { mutex_unlock(&dev->lock); return -ENODEV; } result = simple_read_from_buffer(buffer, count, ppos, dev->bulk_in_buffer, IMGSIZE); /* unlock the device */ mutex_unlock(&dev->lock); return result; } static int idmouse_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_idmouse *dev; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; int result; /* check if we have gotten the data or the hid interface */ iface_desc = interface->cur_altsetting; if (iface_desc->desc.bInterfaceClass != 0x0A) return -ENODEV; if (iface_desc->desc.bNumEndpoints < 1) return -ENODEV; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) return -ENOMEM; mutex_init(&dev->lock); dev->udev = udev; dev->interface = interface; /* set up the endpoint information - use only the first bulk-in endpoint */ result = usb_find_bulk_in_endpoint(iface_desc, &endpoint); if (result) { dev_err(&interface->dev, "Unable to find bulk-in endpoint.\n"); idmouse_delete(dev); return result; } dev->orig_bi_size = usb_endpoint_maxp(endpoint); dev->bulk_in_size = 0x200; /* works _much_ faster */ dev->bulk_in_endpointAddr = endpoint->bEndpointAddress; dev->bulk_in_buffer = kmalloc(IMGSIZE + dev->bulk_in_size, GFP_KERNEL); if (!dev->bulk_in_buffer) { idmouse_delete(dev); return -ENOMEM; } /* allow device read, write and ioctl */ dev->present = 1; /* we can register the device now, as it is ready */ usb_set_intfdata(interface, dev); result = usb_register_dev(interface, &idmouse_class); if (result) { /* something prevented us from registering this device */ dev_err(&interface->dev, "Unable to allocate minor number.\n"); idmouse_delete(dev); return result; } /* be noisy */ dev_info(&interface->dev,"%s now attached\n",DRIVER_DESC); return 0; } static void idmouse_disconnect(struct usb_interface *interface) { struct usb_idmouse *dev = usb_get_intfdata(interface); /* give back our minor */ usb_deregister_dev(interface, &idmouse_class); /* lock the device */ mutex_lock(&dev->lock); /* prevent device read, write and ioctl */ dev->present = 0; /* if the device is opened, idmouse_release will clean this up */ if (!dev->open) { mutex_unlock(&dev->lock); idmouse_delete(dev); } else { /* unlock */ mutex_unlock(&dev->lock); } dev_info(&interface->dev, "disconnected\n"); } module_usb_driver(idmouse_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
46 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 // SPDX-License-Identifier: GPL-2.0-only /* * Common code for control of lockd and nfsv4 grace periods. * * Transplanted from lockd code */ #include <linux/module.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/fs.h> #include <linux/filelock.h> static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); /** * locks_start_grace * @net: net namespace that this lock manager belongs to * @lm: who this grace period is for * * A grace period is a period during which locks should not be given * out. Currently grace periods are only enforced by the two lock * managers (lockd and nfsd), using the locks_in_grace() function to * check when they are in a grace period. * * This function is called to start a grace period. */ void locks_start_grace(struct net *net, struct lock_manager *lm) { struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); if (list_empty(&lm->list)) list_add(&lm->list, grace_list); else WARN(1, "double list_add attempt detected in net %x %s\n", net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); /** * locks_end_grace * @lm: who this grace period is for * * Call this function to state that the given lock manager is ready to * resume regular locking. The grace period will not end until all lock * managers that called locks_start_grace() also call locks_end_grace(). * Note that callers count on it being safe to call this more than once, * and the second call should be a no-op. */ void locks_end_grace(struct lock_manager *lm) { spin_lock(&grace_lock); list_del_init(&lm->list); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_end_grace); static bool __state_in_grace(struct net *net, bool open) { struct list_head *grace_list = net_generic(net, grace_net_id); struct lock_manager *lm; if (!open) return !list_empty(grace_list); spin_lock(&grace_lock); list_for_each_entry(lm, grace_list, list) { if (lm->block_opens) { spin_unlock(&grace_lock); return true; } } spin_unlock(&grace_lock); return false; } /** * locks_in_grace * @net: network namespace * * Lock managers call this function to determine when it is OK for them * to answer ordinary lock requests, and when they should accept only * lock reclaims. */ bool locks_in_grace(struct net *net) { return __state_in_grace(net, false); } EXPORT_SYMBOL_GPL(locks_in_grace); bool opens_in_grace(struct net *net) { return __state_in_grace(net, true); } EXPORT_SYMBOL_GPL(opens_in_grace); static int __net_init grace_init_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); INIT_LIST_HEAD(grace_list); return 0; } static void __net_exit grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); WARN_ONCE(!list_empty(grace_list), "net %x %s: grace_list is not empty\n", net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { .init = grace_init_net, .exit = grace_exit_net, .id = &grace_net_id, .size = sizeof(struct list_head), }; static int __init init_grace(void) { return register_pernet_subsys(&grace_net_ops); } static void __exit exit_grace(void) { unregister_pernet_subsys(&grace_net_ops); } MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>"); MODULE_DESCRIPTION("NFS client and server infrastructure"); MODULE_LICENSE("GPL"); module_init(init_grace) module_exit(exit_grace)
27 17 25 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 /* * linux/fs/nls/nls_cp737.c * * Charset cp737 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, /* 0x90*/ 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, /* 0xa0*/ 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, 0x03c1, 0x03c3, 0x03c2, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, /* 0xd0*/ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, /* 0xe0*/ 0x03c9, 0x03ac, 0x03ad, 0x03ae, 0x03ca, 0x03af, 0x03cc, 0x03cd, 0x03cb, 0x03ce, 0x0386, 0x0388, 0x0389, 0x038a, 0x038c, 0x038e, /* 0xf0*/ 0x038f, 0x00b1, 0x2265, 0x2264, 0x03aa, 0x03ab, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0x00, 0x00, 0xfa, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, /* 0xf0-0xf7 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xea, 0x00, /* 0x80-0x87 */ 0xeb, 0xec, 0xed, 0x00, 0xee, 0x00, 0xef, 0xf0, /* 0x88-0x8f */ 0x00, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, /* 0x90-0x97 */ 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, /* 0x98-0x9f */ 0x8f, 0x90, 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, /* 0xa0-0xa7 */ 0x96, 0x97, 0xf4, 0xf5, 0xe1, 0xe2, 0xe3, 0xe5, /* 0xa8-0xaf */ 0x00, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, /* 0xb0-0xb7 */ 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, /* 0xb8-0xbf */ 0xa7, 0xa8, 0xaa, 0xa9, 0xab, 0xac, 0xad, 0xae, /* 0xc0-0xc7 */ 0xaf, 0xe0, 0xe4, 0xe8, 0xe6, 0xe7, 0xe9, 0x00, /* 0xc8-0xcf */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, /* 0x78-0x7f */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0xf9, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0xf3, 0xf2, 0x00, 0x00, /* 0x60-0x67 */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0xd5, 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, /* 0x50-0x57 */ 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, /* 0x58-0x5f */ 0xcc, 0xb5, 0xb6, 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, /* 0x60-0x67 */ 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xde, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, page22, NULL, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x80-0x87 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0x88-0x8f */ 0xa8, 0xa9, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xe0, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xe1, 0xe2, 0xe3, 0xe5, 0xe6, 0xe7, /* 0xe8-0xef */ 0xe9, 0xf1, 0xf2, 0xf3, 0xe4, 0xe8, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x98-0x9f */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0xa0-0xa7 */ 0x90, 0x91, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0x97, 0xea, 0xeb, 0xec, 0xf4, 0xed, 0xee, 0xef, /* 0xe0-0xe7 */ 0xf5, 0xf0, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp737", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp737(void) { return register_nls(&table); } static void __exit exit_nls_cp737(void) { unregister_nls(&table); } module_init(init_nls_cp737) module_exit(exit_nls_cp737) MODULE_DESCRIPTION("NLS Codepage 737 (Greek)"); MODULE_LICENSE("Dual BSD/GPL");
13 4047 2121 1240 92 1974 1131 2302 2285 455 26 7 13 6 2143 2168 2074 7 2074 1541 1545 1543 2061 7 2065 1271 1272 307 131 179 2564 129 2499 12 8 8 8 8 8 55 56 11 16 1 1 56 56 56 1301 51 51 13 13 14 64 64 22 2182 2 2189 2194 2182 2176 2088 2087 2083 112 127 1142 1142 1142 1139 1139 1141 1244 1243 1242 1246 1240 694 1246 61 1265 2217 58 2172 1067 2172 1514 2113 3 5 2119 1 2122 1 1 2 2253 2240 8 11 973 2239 2224 2063 1210 1135 1129 1228 2240 2093 2134 2241 8 2211 2221 1202 2087 2212 4 976 4 1726 1 1 390 2112 1 9 2110 2096 18 23 7 4 12 1139 1137 1144 1325 17 693 1245 1320 1223 1139 2 1320 1135 1136 2203 7 31 20 1 2169 1201 1200 1 1199 1199 21 21 21 21 21 29 26 3 3 2238 2063 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright (C) 2016 - 2020 Christoph Hellwig */ #include <linux/init.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/major.h> #include <linux/device_cgroup.h> #include <linux/blkdev.h> #include <linux/blk-integrity.h> #include <linux/backing-dev.h> #include <linux/module.h> #include <linux/blkpg.h> #include <linux/magic.h> #include <linux/buffer_head.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/uio.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/part_stat.h> #include <linux/uaccess.h> #include <linux/stat.h> #include "../fs/internal.h" #include "blk.h" /* Should we allow writing to mounted block devices? */ static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED); struct bdev_inode { struct block_device bdev; struct inode vfs_inode; }; static inline struct bdev_inode *BDEV_I(struct inode *inode) { return container_of(inode, struct bdev_inode, vfs_inode); } static inline struct inode *BD_INODE(struct block_device *bdev) { return &container_of(bdev, struct bdev_inode, bdev)->vfs_inode; } struct block_device *I_BDEV(struct inode *inode) { return &BDEV_I(inode)->bdev; } EXPORT_SYMBOL(I_BDEV); struct block_device *file_bdev(struct file *bdev_file) { return I_BDEV(bdev_file->f_mapping->host); } EXPORT_SYMBOL(file_bdev); static void bdev_write_inode(struct block_device *bdev) { struct inode *inode = BD_INODE(bdev); int ret; spin_lock(&inode->i_lock); while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); ret = write_inode_now(inode, true); if (ret) pr_warn_ratelimited( "VFS: Dirty inode writeback failed for block device %pg (err=%d).\n", bdev, ret); spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); } /* Kill _all_ buffers and pagecache , dirty or not.. */ static void kill_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_mapping; if (mapping_empty(mapping)) return; invalidate_bh_lrus(); truncate_inode_pages(mapping, 0); } /* Invalidate clean unused buffers and pagecache. */ void invalidate_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_mapping; if (mapping->nrpages) { invalidate_bh_lrus(); lru_add_drain_all(); /* make sure all lru add caches are flushed */ invalidate_mapping_pages(mapping, 0, -1); } } EXPORT_SYMBOL(invalidate_bdev); /* * Drop all buffers & page cache for given bdev range. This function bails * with error if bdev has other exclusive owner (such as filesystem). */ int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode, loff_t lstart, loff_t lend) { /* * If we don't hold exclusive handle for the device, upgrade to it * while we discard the buffer cache to avoid discarding buffers * under live filesystem. */ if (!(mode & BLK_OPEN_EXCL)) { int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL); if (err) goto invalidate; } truncate_inode_pages_range(bdev->bd_mapping, lstart, lend); if (!(mode & BLK_OPEN_EXCL)) bd_abort_claiming(bdev, truncate_bdev_range); return 0; invalidate: /* * Someone else has handle exclusively open. Try invalidating instead. * The 'end' argument is inclusive so the rounding is safe. */ return invalidate_inode_pages2_range(bdev->bd_mapping, lstart >> PAGE_SHIFT, lend >> PAGE_SHIFT); } static void set_init_blocksize(struct block_device *bdev) { unsigned int bsize = bdev_logical_block_size(bdev); loff_t size = i_size_read(BD_INODE(bdev)); while (bsize < PAGE_SIZE) { if (size & bsize) break; bsize <<= 1; } BD_INODE(bdev)->i_blkbits = blksize_bits(bsize); } int set_blocksize(struct file *file, int size) { struct inode *inode = file->f_mapping->host; struct block_device *bdev = I_BDEV(inode); /* Size must be a power of two, and between 512 and PAGE_SIZE */ if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ if (size < bdev_logical_block_size(bdev)) return -EINVAL; if (!file->private_data) return -EINVAL; /* Don't change the size if it is same as current */ if (inode->i_blkbits != blksize_bits(size)) { sync_blockdev(bdev); inode->i_blkbits = blksize_bits(size); kill_bdev(bdev); } return 0; } EXPORT_SYMBOL(set_blocksize); int sb_set_blocksize(struct super_block *sb, int size) { if (set_blocksize(sb->s_bdev_file, size)) return 0; /* If we get here, we know size is power of two * and it's value is between 512 and PAGE_SIZE */ sb->s_blocksize = size; sb->s_blocksize_bits = blksize_bits(size); return sb->s_blocksize; } EXPORT_SYMBOL(sb_set_blocksize); int sb_min_blocksize(struct super_block *sb, int size) { int minsize = bdev_logical_block_size(sb->s_bdev); if (size < minsize) size = minsize; return sb_set_blocksize(sb, size); } EXPORT_SYMBOL(sb_min_blocksize); int sync_blockdev_nowait(struct block_device *bdev) { if (!bdev) return 0; return filemap_flush(bdev->bd_mapping); } EXPORT_SYMBOL_GPL(sync_blockdev_nowait); /* * Write out and wait upon all the dirty data associated with a block * device via its mapping. Does not take the superblock lock. */ int sync_blockdev(struct block_device *bdev) { if (!bdev) return 0; return filemap_write_and_wait(bdev->bd_mapping); } EXPORT_SYMBOL(sync_blockdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) { return filemap_write_and_wait_range(bdev->bd_mapping, lstart, lend); } EXPORT_SYMBOL(sync_blockdev_range); /** * bdev_freeze - lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last * unfreeze process can unfreeze the frozen filesystem actually when multiple * freeze requests arrive simultaneously. It counts up in bdev_freeze() and * count down in bdev_thaw(). When it becomes 0, thaw_bdev() will unfreeze * actually. * * Return: On success zero is returned, negative error code on failure. */ int bdev_freeze(struct block_device *bdev) { int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) { mutex_unlock(&bdev->bd_fsfreeze_mutex); return 0; } mutex_lock(&bdev->bd_holder_lock); if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) { error = bdev->bd_holder_ops->freeze(bdev); lockdep_assert_not_held(&bdev->bd_holder_lock); } else { mutex_unlock(&bdev->bd_holder_lock); error = sync_blockdev(bdev); } if (error) atomic_dec(&bdev->bd_fsfreeze_count); mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } EXPORT_SYMBOL(bdev_freeze); /** * bdev_thaw - unlock filesystem * @bdev: blockdevice to unlock * * Unlocks the filesystem and marks it writeable again after bdev_freeze(). * * Return: On success zero is returned, negative error code on failure. */ int bdev_thaw(struct block_device *bdev) { int error = -EINVAL, nr_freeze; mutex_lock(&bdev->bd_fsfreeze_mutex); /* * If this returns < 0 it means that @bd_fsfreeze_count was * already 0 and no decrement was performed. */ nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count); if (nr_freeze < 0) goto out; error = 0; if (nr_freeze > 0) goto out; mutex_lock(&bdev->bd_holder_lock); if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) { error = bdev->bd_holder_ops->thaw(bdev); lockdep_assert_not_held(&bdev->bd_holder_lock); } else { mutex_unlock(&bdev->bd_holder_lock); } if (error) atomic_inc(&bdev->bd_fsfreeze_count); out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } EXPORT_SYMBOL(bdev_thaw); /* * pseudo-fs */ static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock); static struct kmem_cache *bdev_cachep __ro_after_init; static struct inode *bdev_alloc_inode(struct super_block *sb) { struct bdev_inode *ei = alloc_inode_sb(sb, bdev_cachep, GFP_KERNEL); if (!ei) return NULL; memset(&ei->bdev, 0, sizeof(ei->bdev)); if (security_bdev_alloc(&ei->bdev)) { kmem_cache_free(bdev_cachep, ei); return NULL; } return &ei->vfs_inode; } static void bdev_free_inode(struct inode *inode) { struct block_device *bdev = I_BDEV(inode); free_percpu(bdev->bd_stats); kfree(bdev->bd_meta_info); security_bdev_free(bdev); if (!bdev_is_partition(bdev)) { if (bdev->bd_disk && bdev->bd_disk->bdi) bdi_put(bdev->bd_disk->bdi); kfree(bdev->bd_disk); } if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR) blk_free_ext_minor(MINOR(bdev->bd_dev)); kmem_cache_free(bdev_cachep, BDEV_I(inode)); } static void init_once(void *data) { struct bdev_inode *ei = data; inode_init_once(&ei->vfs_inode); } static void bdev_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); invalidate_inode_buffers(inode); /* is it needed here? */ clear_inode(inode); } static const struct super_operations bdev_sops = { .statfs = simple_statfs, .alloc_inode = bdev_alloc_inode, .free_inode = bdev_free_inode, .drop_inode = generic_delete_inode, .evict_inode = bdev_evict_inode, }; static int bd_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC); if (!ctx) return -ENOMEM; fc->s_iflags |= SB_I_CGROUPWB; ctx->ops = &bdev_sops; return 0; } static struct file_system_type bd_type = { .name = "bdev", .init_fs_context = bd_init_fs_context, .kill_sb = kill_anon_super, }; struct super_block *blockdev_superblock __ro_after_init; static struct vfsmount *blockdev_mnt __ro_after_init; EXPORT_SYMBOL_GPL(blockdev_superblock); void __init bdev_cache_init(void) { int err; bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT|SLAB_PANIC), init_once); err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); blockdev_mnt = kern_mount(&bd_type); if (IS_ERR(blockdev_mnt)) panic("Cannot create bdev pseudo-fs"); blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */ } struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) { struct block_device *bdev; struct inode *inode; inode = new_inode(blockdev_superblock); if (!inode) return NULL; inode->i_mode = S_IFBLK; inode->i_rdev = 0; inode->i_data.a_ops = &def_blk_aops; mapping_set_gfp_mask(&inode->i_data, GFP_USER); bdev = I_BDEV(inode); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); mutex_init(&bdev->bd_holder_lock); atomic_set(&bdev->__bd_flags, partno); bdev->bd_mapping = &inode->i_data; bdev->bd_queue = disk->queue; if (partno && bdev_test_flag(disk->part0, BD_HAS_SUBMIT_BIO)) bdev_set_flag(bdev, BD_HAS_SUBMIT_BIO); bdev->bd_stats = alloc_percpu(struct disk_stats); if (!bdev->bd_stats) { iput(inode); return NULL; } bdev->bd_disk = disk; return bdev; } void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) { spin_lock(&bdev->bd_size_lock); i_size_write(BD_INODE(bdev), (loff_t)sectors << SECTOR_SHIFT); bdev->bd_nr_sectors = sectors; spin_unlock(&bdev->bd_size_lock); } void bdev_add(struct block_device *bdev, dev_t dev) { struct inode *inode = BD_INODE(bdev); if (bdev_stable_writes(bdev)) mapping_set_stable_writes(bdev->bd_mapping); bdev->bd_dev = dev; inode->i_rdev = dev; inode->i_ino = dev; insert_inode_hash(inode); } void bdev_unhash(struct block_device *bdev) { remove_inode_hash(BD_INODE(bdev)); } void bdev_drop(struct block_device *bdev) { iput(BD_INODE(bdev)); } long nr_blockdev_pages(void) { struct inode *inode; long ret = 0; spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) ret += inode->i_mapping->nrpages; spin_unlock(&blockdev_superblock->s_inode_list_lock); return ret; } /** * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest * @holder: holder trying to claim @bdev * @hops: holder ops * * Test whether @bdev can be claimed by @holder. * * RETURNS: * %true if @bdev can be claimed, %false otherwise. */ static bool bd_may_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); lockdep_assert_held(&bdev_lock); if (bdev->bd_holder) { /* * The same holder can always re-claim. */ if (bdev->bd_holder == holder) { if (WARN_ON_ONCE(bdev->bd_holder_ops != hops)) return false; return true; } return false; } /* * If the whole devices holder is set to bd_may_claim, a partition on * the device is claimed, but not the whole device. */ if (whole != bdev && whole->bd_holder && whole->bd_holder != bd_may_claim) return false; return true; } /** * bd_prepare_to_claim - claim a block device * @bdev: block device of interest * @holder: holder trying to claim @bdev * @hops: holder ops. * * Claim @bdev. This function fails if @bdev is already claimed by another * holder and waits if another claiming is in progress. return, the caller * has ownership of bd_claiming and bd_holder[s]. * * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); if (WARN_ON_ONCE(!holder)) return -EINVAL; retry: mutex_lock(&bdev_lock); /* if someone else claimed, fail */ if (!bd_may_claim(bdev, holder, hops)) { mutex_unlock(&bdev_lock); return -EBUSY; } /* if claiming is already in progress, wait for it to finish */ if (whole->bd_claiming) { wait_queue_head_t *wq = __var_waitqueue(&whole->bd_claiming); DEFINE_WAIT(wait); prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&bdev_lock); schedule(); finish_wait(wq, &wait); goto retry; } /* yay, all mine */ whole->bd_claiming = holder; mutex_unlock(&bdev_lock); return 0; } EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ static void bd_clear_claiming(struct block_device *whole, void *holder) { lockdep_assert_held(&bdev_lock); /* tell others that we're done */ BUG_ON(whole->bd_claiming != holder); whole->bd_claiming = NULL; wake_up_var(&whole->bd_claiming); } /** * bd_finish_claiming - finish claiming of a block device * @bdev: block device of interest * @holder: holder that has claimed @bdev * @hops: block device holder operations * * Finish exclusive open of a block device. Mark the device as exlusively * open by the holder and wake up all waiters for exclusive open to finish. */ static void bd_finish_claiming(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); mutex_lock(&bdev_lock); BUG_ON(!bd_may_claim(bdev, holder, hops)); /* * Note that for a whole device bd_holders will be incremented twice, * and bd_holder will be set to bd_may_claim before being set to holder */ whole->bd_holders++; whole->bd_holder = bd_may_claim; bdev->bd_holders++; mutex_lock(&bdev->bd_holder_lock); bdev->bd_holder = holder; bdev->bd_holder_ops = hops; mutex_unlock(&bdev->bd_holder_lock); bd_clear_claiming(whole, holder); mutex_unlock(&bdev_lock); } /** * bd_abort_claiming - abort claiming of a block device * @bdev: block device of interest * @holder: holder that has claimed @bdev * * Abort claiming of a block device when the exclusive open failed. This can be * also used when exclusive open is not actually desired and we just needed * to block other exclusive openers for a while. */ void bd_abort_claiming(struct block_device *bdev, void *holder) { mutex_lock(&bdev_lock); bd_clear_claiming(bdev_whole(bdev), holder); mutex_unlock(&bdev_lock); } EXPORT_SYMBOL(bd_abort_claiming); static void bd_end_claim(struct block_device *bdev, void *holder) { struct block_device *whole = bdev_whole(bdev); bool unblock = false; /* * Release a claim on the device. The holder fields are protected with * bdev_lock. open_mutex is used to synchronize disk_holder unlinking. */ mutex_lock(&bdev_lock); WARN_ON_ONCE(bdev->bd_holder != holder); WARN_ON_ONCE(--bdev->bd_holders < 0); WARN_ON_ONCE(--whole->bd_holders < 0); if (!bdev->bd_holders) { mutex_lock(&bdev->bd_holder_lock); bdev->bd_holder = NULL; bdev->bd_holder_ops = NULL; mutex_unlock(&bdev->bd_holder_lock); if (bdev_test_flag(bdev, BD_WRITE_HOLDER)) unblock = true; } if (!whole->bd_holders) whole->bd_holder = NULL; mutex_unlock(&bdev_lock); /* * If this was the last claim, remove holder link and unblock evpoll if * it was a write holder. */ if (unblock) { disk_unblock_events(bdev->bd_disk); bdev_clear_flag(bdev, BD_WRITE_HOLDER); } } static void blkdev_flush_mapping(struct block_device *bdev) { WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); bdev_write_inode(bdev); } static void blkdev_put_whole(struct block_device *bdev) { if (atomic_dec_and_test(&bdev->bd_openers)) blkdev_flush_mapping(bdev); if (bdev->bd_disk->fops->release) bdev->bd_disk->fops->release(bdev->bd_disk); } static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode) { struct gendisk *disk = bdev->bd_disk; int ret; if (disk->fops->open) { ret = disk->fops->open(disk, mode); if (ret) { /* avoid ghost partitions on a removed medium */ if (ret == -ENOMEDIUM && test_bit(GD_NEED_PART_SCAN, &disk->state)) bdev_disk_changed(disk, true); return ret; } } if (!atomic_read(&bdev->bd_openers)) set_init_blocksize(bdev); atomic_inc(&bdev->bd_openers); if (test_bit(GD_NEED_PART_SCAN, &disk->state)) { /* * Only return scanning errors if we are called from contexts * that explicitly want them, e.g. the BLKRRPART ioctl. */ ret = bdev_disk_changed(disk, false); if (ret && (mode & BLK_OPEN_STRICT_SCAN)) { blkdev_put_whole(bdev); return ret; } } return 0; } static int blkdev_get_part(struct block_device *part, blk_mode_t mode) { struct gendisk *disk = part->bd_disk; int ret; ret = blkdev_get_whole(bdev_whole(part), mode); if (ret) return ret; ret = -ENXIO; if (!bdev_nr_sectors(part)) goto out_blkdev_put; if (!atomic_read(&part->bd_openers)) { disk->open_partitions++; set_init_blocksize(part); } atomic_inc(&part->bd_openers); return 0; out_blkdev_put: blkdev_put_whole(bdev_whole(part)); return ret; } int bdev_permission(dev_t dev, blk_mode_t mode, void *holder) { int ret; ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, MAJOR(dev), MINOR(dev), ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); if (ret) return ret; /* Blocking writes requires exclusive opener */ if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) return -EINVAL; /* * We're using error pointers to indicate to ->release() when we * failed to open that block device. Also this doesn't make sense. */ if (WARN_ON_ONCE(IS_ERR(holder))) return -EINVAL; return 0; } static void blkdev_put_part(struct block_device *part) { struct block_device *whole = bdev_whole(part); if (atomic_dec_and_test(&part->bd_openers)) { blkdev_flush_mapping(part); whole->bd_disk->open_partitions--; } blkdev_put_whole(whole); } struct block_device *blkdev_get_no_open(dev_t dev) { struct block_device *bdev; struct inode *inode; inode = ilookup(blockdev_superblock, dev); if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { blk_request_module(dev); inode = ilookup(blockdev_superblock, dev); if (inode) pr_warn_ratelimited( "block device autoloading is deprecated and will be removed.\n"); } if (!inode) return NULL; /* switch from the inode reference to a device mode one: */ bdev = &BDEV_I(inode)->bdev; if (!kobject_get_unless_zero(&bdev->bd_device.kobj)) bdev = NULL; iput(inode); return bdev; } void blkdev_put_no_open(struct block_device *bdev) { put_device(&bdev->bd_device); } static bool bdev_writes_blocked(struct block_device *bdev) { return bdev->bd_writers < 0; } static void bdev_block_writes(struct block_device *bdev) { bdev->bd_writers--; } static void bdev_unblock_writes(struct block_device *bdev) { bdev->bd_writers++; } static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode) { if (bdev_allow_write_mounted) return true; /* Writes blocked? */ if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev)) return false; if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0) return false; return true; } static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) { if (bdev_allow_write_mounted) return; /* Claim exclusive or shared write access. */ if (mode & BLK_OPEN_RESTRICT_WRITES) bdev_block_writes(bdev); else if (mode & BLK_OPEN_WRITE) bdev->bd_writers++; } static inline bool bdev_unclaimed(const struct file *bdev_file) { return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host); } static void bdev_yield_write_access(struct file *bdev_file) { struct block_device *bdev; if (bdev_allow_write_mounted) return; if (bdev_unclaimed(bdev_file)) return; bdev = file_bdev(bdev_file); if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED) bdev_unblock_writes(bdev); else if (bdev_file->f_mode & FMODE_WRITE) bdev->bd_writers--; } /** * bdev_open - open a block device * @bdev: block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier * @hops: holder operations * @bdev_file: file for the block device * * Open the block device. If @holder is not %NULL, the block device is opened * with exclusive access. Exclusive opens may nest for the same @holder. * * CONTEXT: * Might sleep. * * RETURNS: * zero on success, -errno on failure. */ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops, struct file *bdev_file) { bool unblock_events = true; struct gendisk *disk = bdev->bd_disk; int ret; if (holder) { mode |= BLK_OPEN_EXCL; ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) return ret; } else { if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) return -EIO; } disk_block_events(disk); mutex_lock(&disk->open_mutex); ret = -ENXIO; if (!disk_live(disk)) goto abort_claiming; if (!try_module_get(disk->fops->owner)) goto abort_claiming; ret = -EBUSY; if (!bdev_may_open(bdev, mode)) goto put_module; if (bdev_is_partition(bdev)) ret = blkdev_get_part(bdev, mode); else ret = blkdev_get_whole(bdev, mode); if (ret) goto put_module; bdev_claim_write_access(bdev, mode); if (holder) { bd_finish_claiming(bdev, holder, hops); /* * Block event polling for write claims if requested. Any write * holder makes the write_holder state stick until all are * released. This is good enough and tracking individual * writeable reference is too fragile given the way @mode is * used in blkdev_get/put(). */ if ((mode & BLK_OPEN_WRITE) && !bdev_test_flag(bdev, BD_WRITE_HOLDER) && (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) { bdev_set_flag(bdev, BD_WRITE_HOLDER); unblock_events = false; } } mutex_unlock(&disk->open_mutex); if (unblock_events) disk_unblock_events(disk); bdev_file->f_flags |= O_LARGEFILE; bdev_file->f_mode |= FMODE_CAN_ODIRECT; if (bdev_nowait(bdev)) bdev_file->f_mode |= FMODE_NOWAIT; if (mode & BLK_OPEN_RESTRICT_WRITES) bdev_file->f_mode |= FMODE_WRITE_RESTRICTED; bdev_file->f_mapping = bdev->bd_mapping; bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); bdev_file->private_data = holder; return 0; put_module: module_put(disk->fops->owner); abort_claiming: if (holder) bd_abort_claiming(bdev, holder); mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); return ret; } /* * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk * associated with the floppy driver where it has allowed ioctls if the * file was opened for writing, but does not allow reads or writes. * Make sure that this quirk is reflected in @f_flags. * * It can also happen if a block device is opened as O_RDWR | O_WRONLY. */ static unsigned blk_to_file_flags(blk_mode_t mode) { unsigned int flags = 0; if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) == (BLK_OPEN_READ | BLK_OPEN_WRITE)) flags |= O_RDWR; else if (mode & BLK_OPEN_WRITE_IOCTL) flags |= O_RDWR | O_WRONLY; else if (mode & BLK_OPEN_WRITE) flags |= O_WRONLY; else if (mode & BLK_OPEN_READ) flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */ else WARN_ON_ONCE(true); if (mode & BLK_OPEN_NDELAY) flags |= O_NDELAY; return flags; } struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { struct file *bdev_file; struct block_device *bdev; unsigned int flags; int ret; ret = bdev_permission(dev, mode, holder); if (ret) return ERR_PTR(ret); bdev = blkdev_get_no_open(dev); if (!bdev) return ERR_PTR(-ENXIO); flags = blk_to_file_flags(mode); bdev_file = alloc_file_pseudo_noaccount(BD_INODE(bdev), blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops); if (IS_ERR(bdev_file)) { blkdev_put_no_open(bdev); return bdev_file; } ihold(BD_INODE(bdev)); ret = bdev_open(bdev, mode, holder, hops, bdev_file); if (ret) { /* We failed to open the block device. Let ->release() know. */ bdev_file->private_data = ERR_PTR(ret); fput(bdev_file); return ERR_PTR(ret); } return bdev_file; } EXPORT_SYMBOL(bdev_file_open_by_dev); struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { struct file *file; dev_t dev; int error; error = lookup_bdev(path, &dev); if (error) return ERR_PTR(error); file = bdev_file_open_by_dev(dev, mode, holder, hops); if (!IS_ERR(file) && (mode & BLK_OPEN_WRITE)) { if (bdev_read_only(file_bdev(file))) { fput(file); file = ERR_PTR(-EACCES); } } return file; } EXPORT_SYMBOL(bdev_file_open_by_path); static inline void bd_yield_claim(struct file *bdev_file) { struct block_device *bdev = file_bdev(bdev_file); void *holder = bdev_file->private_data; lockdep_assert_held(&bdev->bd_disk->open_mutex); if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder))) return; if (!bdev_unclaimed(bdev_file)) bd_end_claim(bdev, holder); } void bdev_release(struct file *bdev_file) { struct block_device *bdev = file_bdev(bdev_file); void *holder = bdev_file->private_data; struct gendisk *disk = bdev->bd_disk; /* We failed to open that block device. */ if (IS_ERR(holder)) goto put_no_open; /* * Sync early if it looks like we're the last one. If someone else * opens the block device between now and the decrement of bd_openers * then we did a sync that we didn't need to, but that's not the end * of the world and we want to avoid long (could be several minute) * syncs while holding the mutex. */ if (atomic_read(&bdev->bd_openers) == 1) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); bdev_yield_write_access(bdev_file); if (holder) bd_yield_claim(bdev_file); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE * event. This is to ensure detection of media removal commanded * from userland - e.g. eject(1). */ disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); if (bdev_is_partition(bdev)) blkdev_put_part(bdev); else blkdev_put_whole(bdev); mutex_unlock(&disk->open_mutex); module_put(disk->fops->owner); put_no_open: blkdev_put_no_open(bdev); } /** * bdev_fput - yield claim to the block device and put the file * @bdev_file: open block device * * Yield claim on the block device and put the file. Ensure that the * block device can be reclaimed before the file is closed which is a * deferred operation. */ void bdev_fput(struct file *bdev_file) { if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops)) return; if (bdev_file->private_data) { struct block_device *bdev = file_bdev(bdev_file); struct gendisk *disk = bdev->bd_disk; mutex_lock(&disk->open_mutex); bdev_yield_write_access(bdev_file); bd_yield_claim(bdev_file); /* * Tell release we already gave up our hold on the * device and if write restrictions are available that * we already gave up write access to the device. */ bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host); mutex_unlock(&disk->open_mutex); } fput(bdev_file); } EXPORT_SYMBOL(bdev_fput); /** * lookup_bdev() - Look up a struct block_device by name. * @pathname: Name of the block device in the filesystem. * @dev: Pointer to the block device's dev_t, if found. * * Lookup the block device's dev_t at @pathname in the current * namespace if possible and return it in @dev. * * Context: May sleep. * Return: 0 if succeeded, negative errno otherwise. */ int lookup_bdev(const char *pathname, dev_t *dev) { struct inode *inode; struct path path; int error; if (!pathname || !*pathname) return -EINVAL; error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) return error; inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto out_path_put; error = -EACCES; if (!may_open_dev(&path)) goto out_path_put; *dev = inode->i_rdev; error = 0; out_path_put: path_put(&path); return error; } EXPORT_SYMBOL(lookup_bdev); /** * bdev_mark_dead - mark a block device as dead * @bdev: block device to operate on * @surprise: indicate a surprise removal * * Tell the file system that this devices or media is dead. If @surprise is set * to %true the device or media is already gone, if not we are preparing for an * orderly removal. * * This calls into the file system, which then typicall syncs out all dirty data * and writes back inodes and then invalidates any cached data in the inodes on * the file system. In addition we also invalidate the block device mapping. */ void bdev_mark_dead(struct block_device *bdev, bool surprise) { mutex_lock(&bdev->bd_holder_lock); if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead) bdev->bd_holder_ops->mark_dead(bdev, surprise); else { mutex_unlock(&bdev->bd_holder_lock); sync_blockdev(bdev); } invalidate_bdev(bdev); } /* * New drivers should not use this directly. There are some drivers however * that needs this for historical reasons. For example, the DASD driver has * historically had a shutdown to offline mode that doesn't actually remove the * gendisk that otherwise looks a lot like a safe device removal. */ EXPORT_SYMBOL_GPL(bdev_mark_dead); void sync_bdevs(bool wait) { struct inode *inode, *old_inode = NULL; spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; struct block_device *bdev; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || mapping->nrpages == 0) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&blockdev_superblock->s_inode_list_lock); /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the * s_inode_list_lock We cannot iput the inode now as we can * be holding the last reference and we cannot iput it under * s_inode_list_lock. So we keep the reference and iput it * later. */ iput(old_inode); old_inode = inode; bdev = I_BDEV(inode); mutex_lock(&bdev->bd_disk->open_mutex); if (!atomic_read(&bdev->bd_openers)) { ; /* skip */ } else if (wait) { /* * We keep the error status of individual mapping so * that applications can catch the writeback error using * fsync(2). See filemap_fdatawait_keep_errors() for * details. */ filemap_fdatawait_keep_errors(inode->i_mapping); } else { filemap_fdatawrite(inode->i_mapping); } mutex_unlock(&bdev->bd_disk->open_mutex); spin_lock(&blockdev_superblock->s_inode_list_lock); } spin_unlock(&blockdev_superblock->s_inode_list_lock); iput(old_inode); } /* * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices. */ void bdev_statx(struct path *path, struct kstat *stat, u32 request_mask) { struct inode *backing_inode; struct block_device *bdev; if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC))) return; backing_inode = d_backing_inode(path->dentry); /* * Note that backing_inode is the inode of a block device node file, * not the block device's internal inode. Therefore it is *not* valid * to use I_BDEV() here; the block device has to be looked up by i_rdev * instead. */ bdev = blkdev_get_no_open(backing_inode->i_rdev); if (!bdev) return; if (request_mask & STATX_DIOALIGN) { stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; stat->dio_offset_align = bdev_logical_block_size(bdev); stat->result_mask |= STATX_DIOALIGN; } if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) { struct request_queue *bd_queue = bdev->bd_queue; generic_fill_statx_atomic_writes(stat, queue_atomic_write_unit_min_bytes(bd_queue), queue_atomic_write_unit_max_bytes(bd_queue)); } blkdev_put_no_open(bdev); } bool disk_live(struct gendisk *disk) { return !inode_unhashed(BD_INODE(disk->part0)); } EXPORT_SYMBOL_GPL(disk_live); unsigned int block_size(struct block_device *bdev) { return 1 << BD_INODE(bdev)->i_blkbits; } EXPORT_SYMBOL_GPL(block_size); static int __init setup_bdev_allow_write_mounted(char *str) { if (kstrtobool(str, &bdev_allow_write_mounted)) pr_warn("Invalid option string for bdev_allow_write_mounted:" " '%s'\n", str); return 1; } __setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);
798 797 798 798 63 61 61 61 55 5 10 64 7 61 63 64 64 31 31 31 31 798 793 798 798 64 797 797 797 798 798 798 798 798 798 93 761 798 798 798 798 798 77 790 32 64 798 795 77 798 795 77 798 64 798 63 1 798 797 798 798 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 // SPDX-License-Identifier: GPL-2.0 /* * Generic ring buffer * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ #include <linux/trace_recursion.h> #include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> #include <linux/sched/clock.h> #include <linux/cacheflush.h> #include <linux/trace_seq.h> #include <linux/spinlock.h> #include <linux/irq_work.h> #include <linux/security.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kthread.h> /* for self test */ #include <linux/module.h> #include <linux/percpu.h> #include <linux/mutex.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/list.h> #include <linux/cpu.h> #include <linux/oom.h> #include <linux/mm.h> #include <asm/local64.h> #include <asm/local.h> #include "trace.h" /* * The "absolute" timestamp in the buffer is only 59 bits. * If a clock has the 5 MSBs set, it needs to be saved and * reinserted. */ #define TS_MSB (0xf8ULL << 56) #define ABS_TS_MASK (~TS_MSB) static void update_pages_handler(struct work_struct *work); #define RING_BUFFER_META_MAGIC 0xBADFEED struct ring_buffer_meta { int magic; int struct_size; unsigned long text_addr; unsigned long data_addr; unsigned long first_buffer; unsigned long head_buffer; unsigned long commit_buffer; __u32 subbuf_size; __u32 nr_subbufs; int buffers[]; }; /* * The ring buffer header is special. We must manually up keep it. */ int ring_buffer_print_entry_header(struct trace_seq *s) { trace_seq_puts(s, "# compressed entry header\n"); trace_seq_puts(s, "\ttype_len : 5 bits\n"); trace_seq_puts(s, "\ttime_delta : 27 bits\n"); trace_seq_puts(s, "\tarray : 32 bits\n"); trace_seq_putc(s, '\n'); trace_seq_printf(s, "\tpadding : type == %d\n", RINGBUF_TYPE_PADDING); trace_seq_printf(s, "\ttime_extend : type == %d\n", RINGBUF_TYPE_TIME_EXTEND); trace_seq_printf(s, "\ttime_stamp : type == %d\n", RINGBUF_TYPE_TIME_STAMP); trace_seq_printf(s, "\tdata max type_len == %d\n", RINGBUF_TYPE_DATA_TYPE_LEN_MAX); return !trace_seq_has_overflowed(s); } /* * The ring buffer is made up of a list of pages. A separate list of pages is * allocated for each CPU. A writer may only write to a buffer that is * associated with the CPU it is currently executing on. A reader may read * from any per cpu buffer. * * The reader is special. For each per cpu buffer, the reader has its own * reader page. When a reader has read the entire reader page, this reader * page is swapped with another page in the ring buffer. * * Now, as long as the writer is off the reader page, the reader can do what * ever it wants with that page. The writer will never write to that page * again (as long as it is out of the ring buffer). * * Here's some silly ASCII art. * * +------+ * |reader| RING BUFFER * |page | * +------+ +---+ +---+ +---+ * | |-->| |-->| | * +---+ +---+ +---+ * ^ | * | | * +---------------+ * * * +------+ * |reader| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * | |-->| |-->| | * +---+ +---+ +---+ * ^ | * | | * +---------------+ * * * +------+ * |reader| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * ^ | |-->| |-->| | * | +---+ +---+ +---+ * | | * | | * +------------------------------+ * * * +------+ * |buffer| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * ^ | | | |-->| | * | New +---+ +---+ +---+ * | Reader------^ | * | page | * +------------------------------+ * * * After we make this swap, the reader can hand this page off to the splice * code and be done with it. It can even allocate a new page if it needs to * and swap that into the ring buffer. * * We will be using cmpxchg soon to make all this lockless. * */ /* Used for individual buffers (after the counter) */ #define RB_BUFFER_OFF (1 << 20) #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ #ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS # define RB_FORCE_8BYTE_ALIGNMENT 0 # define RB_ARCH_ALIGNMENT RB_ALIGNMENT #else # define RB_FORCE_8BYTE_ALIGNMENT 1 # define RB_ARCH_ALIGNMENT 8U #endif #define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX enum { RB_LEN_TIME_EXTEND = 8, RB_LEN_TIME_STAMP = 8, }; #define skip_time_extend(event) \ ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) #define extended_time(event) \ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND) static inline bool rb_null_event(struct ring_buffer_event *event) { return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; } static void rb_event_set_padding(struct ring_buffer_event *event) { /* padding has a NULL time_delta */ event->type_len = RINGBUF_TYPE_PADDING; event->time_delta = 0; } static unsigned rb_event_data_length(struct ring_buffer_event *event) { unsigned length; if (event->type_len) length = event->type_len * RB_ALIGNMENT; else length = event->array[0]; return length + RB_EVNT_HDR_SIZE; } /* * Return the length of the given event. Will return * the length of the time extend if the event is a * time extend. */ static inline unsigned rb_event_length(struct ring_buffer_event *event) { switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) /* undefined */ return -1; return event->array[0] + RB_EVNT_HDR_SIZE; case RINGBUF_TYPE_TIME_EXTEND: return RB_LEN_TIME_EXTEND; case RINGBUF_TYPE_TIME_STAMP: return RB_LEN_TIME_STAMP; case RINGBUF_TYPE_DATA: return rb_event_data_length(event); default: WARN_ON_ONCE(1); } /* not hit */ return 0; } /* * Return total length of time extend and data, * or just the event length for all other events. */ static inline unsigned rb_event_ts_length(struct ring_buffer_event *event) { unsigned len = 0; if (extended_time(event)) { /* time extends include the data event after it */ len = RB_LEN_TIME_EXTEND; event = skip_time_extend(event); } return len + rb_event_length(event); } /** * ring_buffer_event_length - return the length of the event * @event: the event to get the length of * * Returns the size of the data load of a data event. * If the event is something other than a data event, it * returns the size of the event itself. With the exception * of a TIME EXTEND, where it still returns the size of the * data load of the data event after it. */ unsigned ring_buffer_event_length(struct ring_buffer_event *event) { unsigned length; if (extended_time(event)) event = skip_time_extend(event); length = rb_event_length(event); if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) return length; length -= RB_EVNT_HDR_SIZE; if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) length -= sizeof(event->array[0]); return length; } EXPORT_SYMBOL_GPL(ring_buffer_event_length); /* inline for ring buffer fast paths */ static __always_inline void * rb_event_data(struct ring_buffer_event *event) { if (extended_time(event)) event = skip_time_extend(event); WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ if (event->type_len) return (void *)&event->array[0]; /* Otherwise length is in array[0] and array[1] has the data */ return (void *)&event->array[1]; } /** * ring_buffer_event_data - return the data of the event * @event: the event to get the data from */ void *ring_buffer_event_data(struct ring_buffer_event *event) { return rb_event_data(event); } EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define for_each_buffer_cpu(buffer, cpu) \ for_each_cpu(cpu, buffer->cpumask) #define for_each_online_buffer_cpu(buffer, cpu) \ for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask) #define TS_SHIFT 27 #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) static u64 rb_event_time_stamp(struct ring_buffer_event *event) { u64 ts; ts = event->array[0]; ts <<= TS_SHIFT; ts += event->time_delta; return ts; } /* Flag when events were overwritten */ #define RB_MISSED_EVENTS (1 << 31) /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) #define RB_MISSED_MASK (3 << 30) struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ }; struct buffer_data_read_page { unsigned order; /* order of the page */ struct buffer_data_page *data; /* actual data, stored in this page */ }; /* * Note, the buffer_page list must be first. The buffer pages * are allocated in cache lines, which means that each buffer * page will be at the beginning of a cache line, and thus * the least significant bits will be zero. We use this to * add flags in the list struct pointers, to make the ring buffer * lockless. */ struct buffer_page { struct list_head list; /* list of buffer pages */ local_t write; /* index for next write */ unsigned read; /* index for next read */ local_t entries; /* entries on this page */ unsigned long real_end; /* real end of data */ unsigned order; /* order of the page */ u32 id:30; /* ID for external mapping */ u32 range:1; /* Mapped via a range */ struct buffer_data_page *page; /* Actual data page */ }; /* * The buffer page counters, write and entries, must be reset * atomically when crossing page boundaries. To synchronize this * update, two counters are inserted into the number. One is * the actual counter for the write position or count on the page. * * The other is a counter of updaters. Before an update happens * the update partition of the counter is incremented. This will * allow the updater to update the counter atomically. * * The counter is 20 bits, and the state data is 12. */ #define RB_WRITE_MASK 0xfffff #define RB_WRITE_INTCNT (1 << 20) static void rb_init_page(struct buffer_data_page *bpage) { local_set(&bpage->commit, 0); } static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) { return local_read(&bpage->page->commit); } static void free_buffer_page(struct buffer_page *bpage) { /* Range pages are not to be freed */ if (!bpage->range) free_pages((unsigned long)bpage->page, bpage->order); kfree(bpage); } /* * We need to fit the time_stamp delta into 27 bits. */ static inline bool test_time_stamp(u64 delta) { return !!(delta & TS_DELTA_TEST); } struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; atomic_t seq; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; }; /* * Structure to hold event state and handle nested events. */ struct rb_event_info { u64 ts; u64 delta; u64 before; u64 after; unsigned long length; struct buffer_page *tail_page; int add_timestamp; }; /* * Used for the add_timestamp * NONE * EXTEND - wants a time extend * ABSOLUTE - the buffer requests all events to have absolute time stamps * FORCE - force a full time stamp. */ enum { RB_ADD_STAMP_NONE = 0, RB_ADD_STAMP_EXTEND = BIT(1), RB_ADD_STAMP_ABSOLUTE = BIT(2), RB_ADD_STAMP_FORCE = BIT(3) }; /* * Used for which event context the event is in. * TRANSITION = 0 * NMI = 1 * IRQ = 2 * SOFTIRQ = 3 * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { RB_CTX_TRANSITION, RB_CTX_NMI, RB_CTX_IRQ, RB_CTX_SOFTIRQ, RB_CTX_NORMAL, RB_CTX_MAX }; struct rb_time_struct { local64_t time; }; typedef struct rb_time_struct rb_time_t; #define MAX_NEST 5 /* * head_page == tail_page && head == tail then buffer is empty. */ struct ring_buffer_per_cpu { int cpu; atomic_t record_disabled; atomic_t resize_disabled; struct trace_buffer *buffer; raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; struct buffer_data_page *free_page; unsigned long nr_pages; unsigned int current_context; struct list_head *pages; struct buffer_page *head_page; /* read from head */ struct buffer_page *tail_page; /* write to tail */ struct buffer_page *commit_page; /* committed pages */ struct buffer_page *reader_page; unsigned long lost_events; unsigned long last_overrun; unsigned long nest; local_t entries_bytes; local_t entries; local_t overrun; local_t commit_overrun; local_t dropped_events; local_t committing; local_t commits; local_t pages_touched; local_t pages_lost; local_t pages_read; long last_pages_touch; size_t shortest_full; unsigned long read; unsigned long read_bytes; rb_time_t write_stamp; rb_time_t before_stamp; u64 event_stamp[MAX_NEST]; u64 read_stamp; /* pages removed since last reset */ unsigned long pages_removed; unsigned int mapped; unsigned int user_mapped; /* user space mapping */ struct mutex mapping_lock; unsigned long *subbuf_ids; /* ID to subbuf VA */ struct trace_buffer_meta *meta_page; struct ring_buffer_meta *ring_meta; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; struct list_head new_pages; /* new pages to add */ struct work_struct update_pages_work; struct completion update_done; struct rb_irq_work irq_work; }; struct trace_buffer { unsigned flags; int cpus; atomic_t record_disabled; atomic_t resizing; cpumask_var_t cpumask; struct lock_class_key *reader_lock_key; struct mutex mutex; struct ring_buffer_per_cpu **buffers; struct hlist_node node; u64 (*clock)(void); struct rb_irq_work irq_work; bool time_stamp_abs; unsigned long range_addr_start; unsigned long range_addr_end; long last_text_delta; long last_data_delta; unsigned int subbuf_size; unsigned int subbuf_order; unsigned int max_data_size; }; struct ring_buffer_iter { struct ring_buffer_per_cpu *cpu_buffer; unsigned long head; unsigned long next_event; struct buffer_page *head_page; struct buffer_page *cache_reader_page; unsigned long cache_read; unsigned long cache_pages_removed; u64 read_stamp; u64 page_stamp; struct ring_buffer_event *event; size_t event_size; int missed_events; }; int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s) { struct buffer_data_page field; trace_seq_printf(s, "\tfield: u64 timestamp;\t" "offset:0;\tsize:%u;\tsigned:%u;\n", (unsigned int)sizeof(field.time_stamp), (unsigned int)is_signed_type(u64)); trace_seq_printf(s, "\tfield: local_t commit;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), (unsigned int)sizeof(field.commit), (unsigned int)is_signed_type(long)); trace_seq_printf(s, "\tfield: int overwrite;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), 1, (unsigned int)is_signed_type(long)); trace_seq_printf(s, "\tfield: char data;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), (unsigned int)buffer->subbuf_size, (unsigned int)is_signed_type(char)); return !trace_seq_has_overflowed(s); } static inline void rb_time_read(rb_time_t *t, u64 *ret) { *ret = local64_read(&t->time); } static void rb_time_set(rb_time_t *t, u64 val) { local64_set(&t->time, val); } /* * Enable this to make sure that the event passed to * ring_buffer_event_time_stamp() is not committed and also * is on the buffer that it passed in. */ //#define RB_VERIFY_EVENT #ifdef RB_VERIFY_EVENT static struct list_head *rb_list_head(struct list_head *list); static void verify_event(struct ring_buffer_per_cpu *cpu_buffer, void *event) { struct buffer_page *page = cpu_buffer->commit_page; struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page); struct list_head *next; long commit, write; unsigned long addr = (unsigned long)event; bool done = false; int stop = 0; /* Make sure the event exists and is not committed yet */ do { if (page == tail_page || WARN_ON_ONCE(stop++ > 100)) done = true; commit = local_read(&page->page->commit); write = local_read(&page->write); if (addr >= (unsigned long)&page->page->data[commit] && addr < (unsigned long)&page->page->data[write]) return; next = rb_list_head(page->list.next); page = list_entry(next, struct buffer_page, list); } while (!done); WARN_ON_ONCE(1); } #else static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer, void *event) { } #endif /* * The absolute time stamp drops the 5 MSBs and some clocks may * require them. The rb_fix_abs_ts() will take a previous full * time stamp, and add the 5 MSB of that time stamp on to the * saved absolute time stamp. Then they are compared in case of * the unlikely event that the latest time stamp incremented * the 5 MSB. */ static inline u64 rb_fix_abs_ts(u64 abs, u64 save_ts) { if (save_ts & TS_MSB) { abs |= save_ts & TS_MSB; /* Check for overflow */ if (unlikely(abs < save_ts)) abs += 1ULL << 59; } return abs; } static inline u64 rb_time_stamp(struct trace_buffer *buffer); /** * ring_buffer_event_time_stamp - return the event's current time stamp * @buffer: The buffer that the event is on * @event: the event to get the time stamp of * * Note, this must be called after @event is reserved, and before it is * committed to the ring buffer. And must be called from the same * context where the event was reserved (normal, softirq, irq, etc). * * Returns the time stamp associated with the current event. * If the event has an extended time stamp, then that is used as * the time stamp to return. * In the highly unlikely case that the event was nested more than * the max nesting, then the write_stamp of the buffer is returned, * otherwise current time is returned, but that really neither of * the last two cases should ever happen. */ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[smp_processor_id()]; unsigned int nest; u64 ts; /* If the event includes an absolute time, then just use that */ if (event->type_len == RINGBUF_TYPE_TIME_STAMP) { ts = rb_event_time_stamp(event); return rb_fix_abs_ts(ts, cpu_buffer->tail_page->page->time_stamp); } nest = local_read(&cpu_buffer->committing); verify_event(cpu_buffer, event); if (WARN_ON_ONCE(!nest)) goto fail; /* Read the current saved nesting level time stamp */ if (likely(--nest < MAX_NEST)) return cpu_buffer->event_stamp[nest]; /* Shouldn't happen, warn if it does */ WARN_ONCE(1, "nest (%d) greater than max", nest); fail: rb_time_read(&cpu_buffer->write_stamp, &ts); return ts; } /** * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from * @cpu: The cpu of the ring_buffer to get the number of pages from * * Returns the number of pages that have content in the ring buffer. */ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; size_t lost; size_t cnt; read = local_read(&buffer->buffers[cpu]->pages_read); lost = local_read(&buffer->buffers[cpu]->pages_lost); cnt = local_read(&buffer->buffers[cpu]->pages_touched); if (WARN_ON_ONCE(cnt < lost)) return 0; cnt -= lost; /* The reader can read an empty page, but not more than that */ if (cnt < read) { WARN_ON_ONCE(read > cnt + 1); return 0; } return cnt - read; } static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; size_t nr_pages; size_t dirty; nr_pages = cpu_buffer->nr_pages; if (!nr_pages || !full) return true; /* * Add one as dirty will never equal nr_pages, as the sub-buffer * that the writer is on is not counted as dirty. * This is needed if "buffer_percent" is set to 100. */ dirty = ring_buffer_nr_dirty_pages(buffer, cpu) + 1; return (dirty * 100) >= (full * nr_pages); } /* * rb_wake_up_waiters - wake up tasks waiting for ring buffer input * * Schedules a delayed work to wake up any task that is blocked on the * ring buffer waiters queue. */ static void rb_wake_up_waiters(struct irq_work *work) { struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); /* For waiters waiting for the first wake up */ (void)atomic_fetch_inc_release(&rbwork->seq); wake_up_all(&rbwork->waiters); if (rbwork->full_waiters_pending || rbwork->wakeup_full) { /* Only cpu_buffer sets the above flags */ struct ring_buffer_per_cpu *cpu_buffer = container_of(rbwork, struct ring_buffer_per_cpu, irq_work); /* Called from interrupt context */ raw_spin_lock(&cpu_buffer->reader_lock); rbwork->wakeup_full = false; rbwork->full_waiters_pending = false; /* Waking up all waiters, they will reset the shortest full */ cpu_buffer->shortest_full = 0; raw_spin_unlock(&cpu_buffer->reader_lock); wake_up_all(&rbwork->full_waiters); } } /** * ring_buffer_wake_waiters - wake up any waiters on this ring buffer * @buffer: The ring buffer to wake waiters on * @cpu: The CPU buffer to wake waiters on * * In the case of a file that represents a ring buffer is closing, * it is prudent to wake up any waiters that are on this. */ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *rbwork; if (!buffer) return; if (cpu == RING_BUFFER_ALL_CPUS) { /* Wake up individual ones too. One level recursion */ for_each_buffer_cpu(buffer, cpu) ring_buffer_wake_waiters(buffer, cpu); rbwork = &buffer->irq_work; } else { if (WARN_ON_ONCE(!buffer->buffers)) return; if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) return; cpu_buffer = buffer->buffers[cpu]; /* The CPU buffer may not have been initialized yet */ if (!cpu_buffer) return; rbwork = &cpu_buffer->irq_work; } /* This can be called in any context */ irq_work_queue(&rbwork->work); } static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer; bool ret = false; /* Reads of all CPUs always waits for any data */ if (cpu == RING_BUFFER_ALL_CPUS) return !ring_buffer_empty(buffer); cpu_buffer = buffer->buffers[cpu]; if (!ring_buffer_empty_cpu(buffer, cpu)) { unsigned long flags; bool pagebusy; if (!full) return true; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; ret = !pagebusy && full_hit(buffer, cpu, full); if (!ret && (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full)) { cpu_buffer->shortest_full = full; } raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } return ret; } static inline bool rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data) { if (rb_watermark_hit(buffer, cpu, full)) return true; if (cond(data)) return true; /* * The events can happen in critical sections where * checking a work queue can cause deadlocks. * After adding a task to the queue, this flag is set * only to notify events to try to wake up the queue * using irq_work. * * We don't clear it even if the buffer is no longer * empty. The flag only causes the next event to run * irq_work to do the work queue wake up. The worse * that can happen if we race with !trace_empty() is that * an event will cause an irq_work to try to wake up * an empty queue. * * There's no reason to protect this flag either, as * the work queue and irq_work logic will do the necessary * synchronization for the wake ups. The only thing * that is necessary is that the wake up happens after * a task has been queued. It's OK for spurious wake ups. */ if (full) rbwork->full_waiters_pending = true; else rbwork->waiters_pending = true; return false; } struct rb_wait_data { struct rb_irq_work *irq_work; int seq; }; /* * The default wait condition for ring_buffer_wait() is to just to exit the * wait loop the first time it is woken up. */ static bool rb_wait_once(void *data) { struct rb_wait_data *rdata = data; struct rb_irq_work *rbwork = rdata->irq_work; return atomic_read_acquire(&rbwork->seq) != rdata->seq; } /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * @cond: condition function to break out of wait (NULL to run once) * @data: the data to pass to @cond. * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data) { struct ring_buffer_per_cpu *cpu_buffer; struct wait_queue_head *waitq; struct rb_irq_work *rbwork; struct rb_wait_data rdata; int ret = 0; /* * Depending on what the caller is waiting for, either any * data in any cpu buffer, or a specific buffer, put the * caller on the appropriate wait queue. */ if (cpu == RING_BUFFER_ALL_CPUS) { rbwork = &buffer->irq_work; /* Full only makes sense on per cpu reads */ full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -ENODEV; cpu_buffer = buffer->buffers[cpu]; rbwork = &cpu_buffer->irq_work; } if (full) waitq = &rbwork->full_waiters; else waitq = &rbwork->waiters; /* Set up to exit loop as soon as it is woken */ if (!cond) { cond = rb_wait_once; rdata.irq_work = rbwork; rdata.seq = atomic_read_acquire(&rbwork->seq); data = &rdata; } ret = wait_event_interruptible((*waitq), rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); return ret; } /** * ring_buffer_poll_wait - poll on buffer input * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on * @filp: the file descriptor * @poll_table: The poll descriptor * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. * * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers, * zero otherwise. */ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *rbwork; if (cpu == RING_BUFFER_ALL_CPUS) { rbwork = &buffer->irq_work; full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; rbwork = &cpu_buffer->irq_work; } if (full) { poll_wait(filp, &rbwork->full_waiters, poll_table); if (rb_watermark_hit(buffer, cpu, full)) return EPOLLIN | EPOLLRDNORM; /* * Only allow full_waiters_pending update to be seen after * the shortest_full is set (in rb_watermark_hit). If the * writer sees the full_waiters_pending flag set, it will * compare the amount in the ring buffer to shortest_full. * If the amount in the ring buffer is greater than the * shortest_full percent, it will call the irq_work handler * to wake up this list. The irq_handler will reset shortest_full * back to zero. That's done under the reader_lock, but * the below smp_mb() makes sure that the update to * full_waiters_pending doesn't leak up into the above. */ smp_mb(); rbwork->full_waiters_pending = true; return 0; } poll_wait(filp, &rbwork->waiters, poll_table); rbwork->waiters_pending = true; /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit * is set, the next event will wake the task up, but we can get stuck * if there's only a single event in. * * FIXME: Ideally, we need a memory barrier on the writer side as well, * but adding a memory barrier to all events will cause too much of a * performance hit in the fast path. We only need a memory barrier when * the buffer goes from empty to having content. But as this race is * extremely small, and it's not a problem if another event comes in, we * will fix it later. */ smp_mb(); if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; return 0; } /* buffer may be either ring_buffer or ring_buffer_per_cpu */ #define RB_WARN_ON(b, cond) \ ({ \ int _____ret = unlikely(cond); \ if (_____ret) { \ if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ struct ring_buffer_per_cpu *__b = \ (void *)b; \ atomic_inc(&__b->buffer->record_disabled); \ } else \ atomic_inc(&b->record_disabled); \ WARN_ON(1); \ } \ _____ret; \ }) /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 static inline u64 rb_time_stamp(struct trace_buffer *buffer) { u64 ts; /* Skip retpolines :-( */ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && likely(buffer->clock == trace_clock_local)) ts = trace_clock_local(); else ts = buffer->clock(); /* shift to debug/test normalization and TIME_EXTENTS */ return ts << DEBUG_SHIFT; } u64 ring_buffer_time_stamp(struct trace_buffer *buffer) { u64 time; preempt_disable_notrace(); time = rb_time_stamp(buffer); preempt_enable_notrace(); return time; } EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts) { /* Just stupid testing the normalize function and deltas */ *ts >>= DEBUG_SHIFT; } EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); /* * Making the ring buffer lockless makes things tricky. * Although writes only happen on the CPU that they are on, * and they only need to worry about interrupts. Reads can * happen on any CPU. * * The reader page is always off the ring buffer, but when the * reader finishes with a page, it needs to swap its page with * a new one from the buffer. The reader needs to take from * the head (writes go to the tail). But if a writer is in overwrite * mode and wraps, it must push the head page forward. * * Here lies the problem. * * The reader must be careful to replace only the head page, and * not another one. As described at the top of the file in the * ASCII art, the reader sets its old page to point to the next * page after head. It then sets the page after head to point to * the old reader page. But if the writer moves the head page * during this operation, the reader could end up with the tail. * * We use cmpxchg to help prevent this race. We also do something * special with the page before head. We set the LSB to 1. * * When the writer must push the page forward, it will clear the * bit that points to the head page, move the head, and then set * the bit that points to the new head page. * * We also don't want an interrupt coming in and moving the head * page on another writer. Thus we use the second LSB to catch * that too. Thus: * * head->list->prev->next bit 1 bit 0 * ------- ------- * Normal page 0 0 * Points to head page 0 1 * New head page 1 0 * * Note we can not trust the prev pointer of the head page, because: * * +----+ +-----+ +-----+ * | |------>| T |---X--->| N | * | |<------| | | | * +----+ +-----+ +-----+ * ^ ^ | * | +-----+ | | * +----------| R |----------+ | * | |<-----------+ * +-----+ * * Key: ---X--> HEAD flag set in pointer * T Tail page * R Reader page * N Next page * * (see __rb_reserve_next() to see where this happens) * * What the above shows is that the reader just swapped out * the reader page with a page in the buffer, but before it * could make the new header point back to the new page added * it was preempted by a writer. The writer moved forward onto * the new page added by the reader and is about to move forward * again. * * You can see, it is legitimate for the previous pointer of * the head (or any page) not to point back to itself. But only * temporarily. */ #define RB_PAGE_NORMAL 0UL #define RB_PAGE_HEAD 1UL #define RB_PAGE_UPDATE 2UL #define RB_FLAG_MASK 3UL /* PAGE_MOVED is not part of the mask */ #define RB_PAGE_MOVED 4UL /* * rb_list_head - remove any bit */ static struct list_head *rb_list_head(struct list_head *list) { unsigned long val = (unsigned long)list; return (struct list_head *)(val & ~RB_FLAG_MASK); } /* * rb_is_head_page - test if the given page is the head page * * Because the reader may move the head_page pointer, we can * not trust what the head page is (it may be pointing to * the reader page). But if the next page is a header page, * its flags will be non zero. */ static inline int rb_is_head_page(struct buffer_page *page, struct list_head *list) { unsigned long val; val = (unsigned long)list->next; if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) return RB_PAGE_MOVED; return val & RB_FLAG_MASK; } /* * rb_is_reader_page * * The unique thing about the reader page, is that, if the * writer is ever on it, the previous pointer never points * back to the reader page. */ static bool rb_is_reader_page(struct buffer_page *page) { struct list_head *list = page->list.prev; return rb_list_head(list->next) != &page->list; } /* * rb_set_list_to_head - set a list_head to be pointing to head. */ static void rb_set_list_to_head(struct list_head *list) { unsigned long *ptr; ptr = (unsigned long *)&list->next; *ptr |= RB_PAGE_HEAD; *ptr &= ~RB_PAGE_UPDATE; } /* * rb_head_page_activate - sets up head page */ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *head; head = cpu_buffer->head_page; if (!head) return; /* * Set the previous list pointer to have the HEAD flag. */ rb_set_list_to_head(head->list.prev); if (cpu_buffer->ring_meta) { struct ring_buffer_meta *meta = cpu_buffer->ring_meta; meta->head_buffer = (unsigned long)head->page; } } static void rb_list_head_clear(struct list_head *list) { unsigned long *ptr = (unsigned long *)&list->next; *ptr &= ~RB_FLAG_MASK; } /* * rb_head_page_deactivate - clears head page ptr (for free list) */ static void rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *hd; /* Go through the whole list and clear any pointers found. */ rb_list_head_clear(cpu_buffer->pages); list_for_each(hd, cpu_buffer->pages) rb_list_head_clear(hd); } static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag, int new_flag) { struct list_head *list; unsigned long val = (unsigned long)&head->list; unsigned long ret; list = &prev->list; val &= ~RB_FLAG_MASK; ret = cmpxchg((unsigned long *)&list->next, val | old_flag, val | new_flag); /* check if the reader took the page */ if ((ret & ~RB_FLAG_MASK) != val) return RB_PAGE_MOVED; return ret & RB_FLAG_MASK; } static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { return rb_head_page_set(cpu_buffer, head, prev, old_flag, RB_PAGE_UPDATE); } static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { return rb_head_page_set(cpu_buffer, head, prev, old_flag, RB_PAGE_HEAD); } static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { return rb_head_page_set(cpu_buffer, head, prev, old_flag, RB_PAGE_NORMAL); } static inline void rb_inc_page(struct buffer_page **bpage) { struct list_head *p = rb_list_head((*bpage)->list.next); *bpage = list_entry(p, struct buffer_page, list); } static struct buffer_page * rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *head; struct buffer_page *page; struct list_head *list; int i; if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) return NULL; /* sanity check */ list = cpu_buffer->pages; if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) return NULL; page = head = cpu_buffer->head_page; /* * It is possible that the writer moves the header behind * where we started, and we miss in one loop. * A second loop should grab the header, but we'll do * three loops just because I'm paranoid. */ for (i = 0; i < 3; i++) { do { if (rb_is_head_page(page, page->list.prev)) { cpu_buffer->head_page = page; return page; } rb_inc_page(&page); } while (page != head); } RB_WARN_ON(cpu_buffer, 1); return NULL; } static bool rb_head_page_replace(struct buffer_page *old, struct buffer_page *new) { unsigned long *ptr = (unsigned long *)&old->list.prev->next; unsigned long val; val = *ptr & ~RB_FLAG_MASK; val |= RB_PAGE_HEAD; return try_cmpxchg(ptr, &val, (unsigned long)&new->list); } /* * rb_tail_page_update - move the tail page forward */ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { unsigned long old_entries; unsigned long old_write; /* * The tail page now needs to be moved forward. * * We need to reset the tail page, but without messing * with possible erasing of data brought in by interrupts * that have moved the tail page and are currently on it. * * We add a counter to the write field to denote this. */ old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); /* * Just make sure we have seen our old_write and synchronize * with any interrupts that come in. */ barrier(); /* * If the tail page is still the same as what we think * it is, then it is up to us to update the tail * pointer. */ if (tail_page == READ_ONCE(cpu_buffer->tail_page)) { /* Zero the write counter */ unsigned long val = old_write & ~RB_WRITE_MASK; unsigned long eval = old_entries & ~RB_WRITE_MASK; /* * This will only succeed if an interrupt did * not come in and change it. In which case, we * do not want to modify it. * * We add (void) to let the compiler know that we do not care * about the return value of these functions. We use the * cmpxchg to only update if an interrupt did not already * do it for us. If the cmpxchg fails, we don't care. */ (void)local_cmpxchg(&next_page->write, old_write, val); (void)local_cmpxchg(&next_page->entries, old_entries, eval); /* * No need to worry about races with clearing out the commit. * it only can increment when a commit takes place. But that * only happens in the outer most nested commit. */ local_set(&next_page->page->commit, 0); /* Either we update tail_page or an interrupt does */ if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page)) local_inc(&cpu_buffer->pages_touched); } } static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { unsigned long val = (unsigned long)bpage; RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK); } /** * rb_check_pages - integrity check of buffer pages * @cpu_buffer: CPU buffer with pages to test * * As a safety measure we check to make sure the data pages have not * been corrupted. * * Callers of this function need to guarantee that the list of pages doesn't get * modified during the check. In particular, if it's possible that the function * is invoked with concurrent readers which can swap in a new reader page then * the caller should take cpu_buffer->reader_lock. */ static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = rb_list_head(cpu_buffer->pages); struct list_head *tmp; if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(head->next)->prev) != head)) return; if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(head->prev)->next) != head)) return; for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) { if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(tmp->next)->prev) != tmp)) return; if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(tmp->prev)->next) != tmp)) return; } } /* * Take an address, add the meta data size as well as the array of * array subbuffer indexes, then align it to a subbuffer size. * * This is used to help find the next per cpu subbuffer within a mapped range. */ static unsigned long rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs) { addr += sizeof(struct ring_buffer_meta) + sizeof(int) * nr_subbufs; return ALIGN(addr, subbuf_size); } /* * Return the ring_buffer_meta for a given @cpu. */ static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu) { int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE; unsigned long ptr = buffer->range_addr_start; struct ring_buffer_meta *meta; int nr_subbufs; if (!ptr) return NULL; /* When nr_pages passed in is zero, the first meta has already been initialized */ if (!nr_pages) { meta = (struct ring_buffer_meta *)ptr; nr_subbufs = meta->nr_subbufs; } else { meta = NULL; /* Include the reader page */ nr_subbufs = nr_pages + 1; } /* * The first chunk may not be subbuffer aligned, where as * the rest of the chunks are. */ if (cpu) { ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); ptr += subbuf_size * nr_subbufs; /* We can use multiplication to find chunks greater than 1 */ if (cpu > 1) { unsigned long size; unsigned long p; /* Save the beginning of this CPU chunk */ p = ptr; ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); ptr += subbuf_size * nr_subbufs; /* Now all chunks after this are the same size */ size = ptr - p; ptr += size * (cpu - 2); } } return (void *)ptr; } /* Return the start of subbufs given the meta pointer */ static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta) { int subbuf_size = meta->subbuf_size; unsigned long ptr; ptr = (unsigned long)meta; ptr = rb_range_align_subbuf(ptr, subbuf_size, meta->nr_subbufs); return (void *)ptr; } /* * Return a specific sub-buffer for a given @cpu defined by @idx. */ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) { struct ring_buffer_meta *meta; unsigned long ptr; int subbuf_size; meta = rb_range_meta(cpu_buffer->buffer, 0, cpu_buffer->cpu); if (!meta) return NULL; if (WARN_ON_ONCE(idx >= meta->nr_subbufs)) return NULL; subbuf_size = meta->subbuf_size; /* Map this buffer to the order that's in meta->buffers[] */ idx = meta->buffers[idx]; ptr = (unsigned long)rb_subbufs_from_meta(meta); ptr += subbuf_size * idx; if (ptr + subbuf_size > cpu_buffer->buffer->range_addr_end) return NULL; return (void *)ptr; } /* * See if the existing memory contains valid ring buffer data. * As the previous kernel must be the same as this kernel, all * the calculations (size of buffers and number of buffers) * must be the same. */ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, struct trace_buffer *buffer, int nr_pages) { int subbuf_size = PAGE_SIZE; struct buffer_data_page *subbuf; unsigned long buffers_start; unsigned long buffers_end; int i; /* Check the meta magic and meta struct size */ if (meta->magic != RING_BUFFER_META_MAGIC || meta->struct_size != sizeof(*meta)) { pr_info("Ring buffer boot meta[%d] mismatch of magic or struct size\n", cpu); return false; } /* The subbuffer's size and number of subbuffers must match */ if (meta->subbuf_size != subbuf_size || meta->nr_subbufs != nr_pages + 1) { pr_info("Ring buffer boot meta [%d] mismatch of subbuf_size/nr_pages\n", cpu); return false; } buffers_start = meta->first_buffer; buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs); /* Is the head and commit buffers within the range of buffers? */ if (meta->head_buffer < buffers_start || meta->head_buffer >= buffers_end) { pr_info("Ring buffer boot meta [%d] head buffer out of range\n", cpu); return false; } if (meta->commit_buffer < buffers_start || meta->commit_buffer >= buffers_end) { pr_info("Ring buffer boot meta [%d] commit buffer out of range\n", cpu); return false; } subbuf = rb_subbufs_from_meta(meta); /* Is the meta buffers and the subbufs themselves have correct data? */ for (i = 0; i < meta->nr_subbufs; i++) { if (meta->buffers[i] < 0 || meta->buffers[i] >= meta->nr_subbufs) { pr_info("Ring buffer boot meta [%d] array out of range\n", cpu); return false; } if ((unsigned)local_read(&subbuf->commit) > subbuf_size) { pr_info("Ring buffer boot meta [%d] buffer invalid commit\n", cpu); return false; } subbuf = (void *)subbuf + subbuf_size; } return true; } static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf); static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu, unsigned long long *timestamp, u64 *delta_ptr) { struct ring_buffer_event *event; u64 ts, delta; int events = 0; int e; *delta_ptr = 0; *timestamp = 0; ts = dpage->time_stamp; for (e = 0; e < tail; e += rb_event_length(event)) { event = (struct ring_buffer_event *)(dpage->data + e); switch (event->type_len) { case RINGBUF_TYPE_TIME_EXTEND: delta = rb_event_time_stamp(event); ts += delta; break; case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); delta = rb_fix_abs_ts(delta, ts); if (delta < ts) { *delta_ptr = delta; *timestamp = ts; return -1; } ts = delta; break; case RINGBUF_TYPE_PADDING: if (event->time_delta == 1) break; fallthrough; case RINGBUF_TYPE_DATA: events++; ts += event->time_delta; break; default: return -1; } } *timestamp = ts; return events; } static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu) { unsigned long long ts; u64 delta; int tail; tail = local_read(&dpage->commit); return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta); } /* If the meta data has been validated, now validate the events */ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) { struct ring_buffer_meta *meta = cpu_buffer->ring_meta; struct buffer_page *head_page; unsigned long entry_bytes = 0; unsigned long entries = 0; int ret; int i; if (!meta || !meta->head_buffer) return; /* Do the reader page first */ ret = rb_validate_buffer(cpu_buffer->reader_page->page, cpu_buffer->cpu); if (ret < 0) { pr_info("Ring buffer reader page is invalid\n"); goto invalid; } entries += ret; entry_bytes += local_read(&cpu_buffer->reader_page->page->commit); local_set(&cpu_buffer->reader_page->entries, ret); head_page = cpu_buffer->head_page; /* If both the head and commit are on the reader_page then we are done. */ if (head_page == cpu_buffer->reader_page && head_page == cpu_buffer->commit_page) goto done; /* Iterate until finding the commit page */ for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) { /* Reader page has already been done */ if (head_page == cpu_buffer->reader_page) continue; ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu); if (ret < 0) { pr_info("Ring buffer meta [%d] invalid buffer page\n", cpu_buffer->cpu); goto invalid; } entries += ret; entry_bytes += local_read(&head_page->page->commit); local_set(&cpu_buffer->head_page->entries, ret); if (head_page == cpu_buffer->commit_page) break; } if (head_page != cpu_buffer->commit_page) { pr_info("Ring buffer meta [%d] commit page not found\n", cpu_buffer->cpu); goto invalid; } done: local_set(&cpu_buffer->entries, entries); local_set(&cpu_buffer->entries_bytes, entry_bytes); pr_info("Ring buffer meta [%d] is from previous boot!\n", cpu_buffer->cpu); return; invalid: /* The content of the buffers are invalid, reset the meta data */ meta->head_buffer = 0; meta->commit_buffer = 0; /* Reset the reader page */ local_set(&cpu_buffer->reader_page->entries, 0); local_set(&cpu_buffer->reader_page->page->commit, 0); /* Reset all the subbuffers */ for (i = 0; i < meta->nr_subbufs - 1; i++, rb_inc_page(&head_page)) { local_set(&head_page->entries, 0); local_set(&head_page->page->commit, 0); } } /* Used to calculate data delta */ static char rb_data_ptr[] = ""; #define THIS_TEXT_PTR ((unsigned long)rb_meta_init_text_addr) #define THIS_DATA_PTR ((unsigned long)rb_data_ptr) static void rb_meta_init_text_addr(struct ring_buffer_meta *meta) { meta->text_addr = THIS_TEXT_PTR; meta->data_addr = THIS_DATA_PTR; } static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) { struct ring_buffer_meta *meta; unsigned long delta; void *subbuf; int cpu; int i; for (cpu = 0; cpu < nr_cpu_ids; cpu++) { void *next_meta; meta = rb_range_meta(buffer, nr_pages, cpu); if (rb_meta_valid(meta, cpu, buffer, nr_pages)) { /* Make the mappings match the current address */ subbuf = rb_subbufs_from_meta(meta); delta = (unsigned long)subbuf - meta->first_buffer; meta->first_buffer += delta; meta->head_buffer += delta; meta->commit_buffer += delta; buffer->last_text_delta = THIS_TEXT_PTR - meta->text_addr; buffer->last_data_delta = THIS_DATA_PTR - meta->data_addr; continue; } if (cpu < nr_cpu_ids - 1) next_meta = rb_range_meta(buffer, nr_pages, cpu + 1); else next_meta = (void *)buffer->range_addr_end; memset(meta, 0, next_meta - (void *)meta); meta->magic = RING_BUFFER_META_MAGIC; meta->struct_size = sizeof(*meta); meta->nr_subbufs = nr_pages + 1; meta->subbuf_size = PAGE_SIZE; subbuf = rb_subbufs_from_meta(meta); meta->first_buffer = (unsigned long)subbuf; rb_meta_init_text_addr(meta); /* * The buffers[] array holds the order of the sub-buffers * that are after the meta data. The sub-buffers may * be swapped out when read and inserted into a different * location of the ring buffer. Although their addresses * remain the same, the buffers[] array contains the * index into the sub-buffers holding their actual order. */ for (i = 0; i < meta->nr_subbufs; i++) { meta->buffers[i] = i; rb_init_page(subbuf); subbuf += meta->subbuf_size; } } } static void *rbm_start(struct seq_file *m, loff_t *pos) { struct ring_buffer_per_cpu *cpu_buffer = m->private; struct ring_buffer_meta *meta = cpu_buffer->ring_meta; unsigned long val; if (!meta) return NULL; if (*pos > meta->nr_subbufs) return NULL; val = *pos; val++; return (void *)val; } static void *rbm_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return rbm_start(m, pos); } static int rbm_show(struct seq_file *m, void *v) { struct ring_buffer_per_cpu *cpu_buffer = m->private; struct ring_buffer_meta *meta = cpu_buffer->ring_meta; unsigned long val = (unsigned long)v; if (val == 1) { seq_printf(m, "head_buffer: %d\n", rb_meta_subbuf_idx(meta, (void *)meta->head_buffer)); seq_printf(m, "commit_buffer: %d\n", rb_meta_subbuf_idx(meta, (void *)meta->commit_buffer)); seq_printf(m, "subbuf_size: %d\n", meta->subbuf_size); seq_printf(m, "nr_subbufs: %d\n", meta->nr_subbufs); return 0; } val -= 2; seq_printf(m, "buffer[%ld]: %d\n", val, meta->buffers[val]); return 0; } static void rbm_stop(struct seq_file *m, void *p) { } static const struct seq_operations rb_meta_seq_ops = { .start = rbm_start, .next = rbm_next, .show = rbm_show, .stop = rbm_stop, }; int ring_buffer_meta_seq_init(struct file *file, struct trace_buffer *buffer, int cpu) { struct seq_file *m; int ret; ret = seq_open(file, &rb_meta_seq_ops); if (ret) return ret; m = file->private_data; m->private = buffer->buffers[cpu]; return 0; } /* Map the buffer_pages to the previous head and commit pages */ static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { struct ring_buffer_meta *meta = cpu_buffer->ring_meta; if (meta->head_buffer == (unsigned long)bpage->page) cpu_buffer->head_page = bpage; if (meta->commit_buffer == (unsigned long)bpage->page) { cpu_buffer->commit_page = bpage; cpu_buffer->tail_page = bpage; } } static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, long nr_pages, struct list_head *pages) { struct trace_buffer *buffer = cpu_buffer->buffer; struct ring_buffer_meta *meta = NULL; struct buffer_page *bpage, *tmp; bool user_thread = current->mm != NULL; gfp_t mflags; long i; /* * Check if the available memory is there first. * Note, si_mem_available() only gives us a rough estimate of available * memory. It may not be accurate. But we don't care, we just want * to prevent doing any allocation when it is obvious that it is * not going to succeed. */ i = si_mem_available(); if (i < nr_pages) return -ENOMEM; /* * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails * gracefully without invoking oom-killer and the system is not * destabilized. */ mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL; /* * If a user thread allocates too much, and si_mem_available() * reports there's enough memory, even though there is not. * Make sure the OOM killer kills this thread. This can happen * even with RETRY_MAYFAIL because another task may be doing * an allocation after this task has taken all memory. * This is the task the OOM killer needs to take out during this * loop, even if it was triggered by an allocation somewhere else. */ if (user_thread) set_current_oom_origin(); if (buffer->range_addr_start) meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu); for (i = 0; i < nr_pages; i++) { struct page *page; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), mflags, cpu_to_node(cpu_buffer->cpu)); if (!bpage) goto free_pages; rb_check_bpage(cpu_buffer, bpage); /* * Append the pages as for mapped buffers we want to keep * the order */ list_add_tail(&bpage->list, pages); if (meta) { /* A range was given. Use that for the buffer page */ bpage->page = rb_range_buffer(cpu_buffer, i + 1); if (!bpage->page) goto free_pages; /* If this is valid from a previous boot */ if (meta->head_buffer) rb_meta_buffer_update(cpu_buffer, bpage); bpage->range = 1; bpage->id = i + 1; } else { page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags | __GFP_COMP | __GFP_ZERO, cpu_buffer->buffer->subbuf_order); if (!page) goto free_pages; bpage->page = page_address(page); rb_init_page(bpage->page); } bpage->order = cpu_buffer->buffer->subbuf_order; if (user_thread && fatal_signal_pending(current)) goto free_pages; } if (user_thread) clear_current_oom_origin(); return 0; free_pages: list_for_each_entry_safe(bpage, tmp, pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } if (user_thread) clear_current_oom_origin(); return -ENOMEM; } static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { LIST_HEAD(pages); WARN_ON(!nr_pages); if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages)) return -ENOMEM; /* * The ring buffer page list is a circular list that does not * start and end with a list head. All page list items point to * other pages. */ cpu_buffer->pages = pages.next; list_del(&pages); cpu_buffer->nr_pages = nr_pages; rb_check_pages(cpu_buffer); return 0; } static struct ring_buffer_per_cpu * rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_meta *meta; struct buffer_page *bpage; struct page *page; int ret; cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); if (!cpu_buffer) return NULL; cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; raw_spin_lock_init(&cpu_buffer->reader_lock); lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); init_completion(&cpu_buffer->update_done); init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); init_waitqueue_head(&cpu_buffer->irq_work.waiters); init_waitqueue_head(&cpu_buffer->irq_work.full_waiters); mutex_init(&cpu_buffer->mapping_lock); bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); if (!bpage) goto fail_free_buffer; rb_check_bpage(cpu_buffer, bpage); cpu_buffer->reader_page = bpage; if (buffer->range_addr_start) { /* * Range mapped buffers have the same restrictions as memory * mapped ones do. */ cpu_buffer->mapped = 1; cpu_buffer->ring_meta = rb_range_meta(buffer, nr_pages, cpu); bpage->page = rb_range_buffer(cpu_buffer, 0); if (!bpage->page) goto fail_free_reader; if (cpu_buffer->ring_meta->head_buffer) rb_meta_buffer_update(cpu_buffer, bpage); bpage->range = 1; } else { page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_COMP | __GFP_ZERO, cpu_buffer->buffer->subbuf_order); if (!page) goto fail_free_reader; bpage->page = page_address(page); rb_init_page(bpage->page); } INIT_LIST_HEAD(&cpu_buffer->reader_page->list); INIT_LIST_HEAD(&cpu_buffer->new_pages); ret = rb_allocate_pages(cpu_buffer, nr_pages); if (ret < 0) goto fail_free_reader; rb_meta_validate_events(cpu_buffer); /* If the boot meta was valid then this has already been updated */ meta = cpu_buffer->ring_meta; if (!meta || !meta->head_buffer || !cpu_buffer->head_page || !cpu_buffer->commit_page || !cpu_buffer->tail_page) { if (meta && meta->head_buffer && (cpu_buffer->head_page || cpu_buffer->commit_page || cpu_buffer->tail_page)) { pr_warn("Ring buffer meta buffers not all mapped\n"); if (!cpu_buffer->head_page) pr_warn(" Missing head_page\n"); if (!cpu_buffer->commit_page) pr_warn(" Missing commit_page\n"); if (!cpu_buffer->tail_page) pr_warn(" Missing tail_page\n"); } cpu_buffer->head_page = list_entry(cpu_buffer->pages, struct buffer_page, list); cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; rb_head_page_activate(cpu_buffer); if (cpu_buffer->ring_meta) meta->commit_buffer = meta->head_buffer; } else { /* The valid meta buffer still needs to activate the head page */ rb_head_page_activate(cpu_buffer); } return cpu_buffer; fail_free_reader: free_buffer_page(cpu_buffer->reader_page); fail_free_buffer: kfree(cpu_buffer); return NULL; } static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = cpu_buffer->pages; struct buffer_page *bpage, *tmp; irq_work_sync(&cpu_buffer->irq_work.work); free_buffer_page(cpu_buffer->reader_page); if (head) { rb_head_page_deactivate(cpu_buffer); list_for_each_entry_safe(bpage, tmp, head, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } bpage = list_entry(head, struct buffer_page, list); free_buffer_page(bpage); } free_page((unsigned long)cpu_buffer->free_page); kfree(cpu_buffer); } static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long end, struct lock_class_key *key) { struct trace_buffer *buffer; long nr_pages; int subbuf_size; int bsize; int cpu; int ret; /* keep it in its own cache line */ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), GFP_KERNEL); if (!buffer) return NULL; if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) goto fail_free_buffer; buffer->subbuf_order = order; subbuf_size = (PAGE_SIZE << order); buffer->subbuf_size = subbuf_size - BUF_PAGE_HDR_SIZE; /* Max payload is buffer page size - header (8bytes) */ buffer->max_data_size = buffer->subbuf_size - (sizeof(u32) * 2); buffer->flags = flags; buffer->clock = trace_clock_local; buffer->reader_lock_key = key; init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters); init_waitqueue_head(&buffer->irq_work.waiters); buffer->cpus = nr_cpu_ids; bsize = sizeof(void *) * nr_cpu_ids; buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), GFP_KERNEL); if (!buffer->buffers) goto fail_free_cpumask; /* If start/end are specified, then that overrides size */ if (start && end) { unsigned long ptr; int n; size = end - start; size = size / nr_cpu_ids; /* * The number of sub-buffers (nr_pages) is determined by the * total size allocated minus the meta data size. * Then that is divided by the number of per CPU buffers * needed, plus account for the integer array index that * will be appended to the meta data. */ nr_pages = (size - sizeof(struct ring_buffer_meta)) / (subbuf_size + sizeof(int)); /* Need at least two pages plus the reader page */ if (nr_pages < 3) goto fail_free_buffers; again: /* Make sure that the size fits aligned */ for (n = 0, ptr = start; n < nr_cpu_ids; n++) { ptr += sizeof(struct ring_buffer_meta) + sizeof(int) * nr_pages; ptr = ALIGN(ptr, subbuf_size); ptr += subbuf_size * nr_pages; } if (ptr > end) { if (nr_pages <= 3) goto fail_free_buffers; nr_pages--; goto again; } /* nr_pages should not count the reader page */ nr_pages--; buffer->range_addr_start = start; buffer->range_addr_end = end; rb_range_meta_init(buffer, nr_pages); } else { /* need at least two pages */ nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size); if (nr_pages < 2) nr_pages = 2; } cpu = raw_smp_processor_id(); cpumask_set_cpu(cpu, buffer->cpumask); buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu); if (!buffer->buffers[cpu]) goto fail_free_buffers; /* If already mapped, do not hook to CPU hotplug */ if (!start) { ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); if (ret < 0) goto fail_free_buffers; } mutex_init(&buffer->mutex); return buffer; fail_free_buffers: for_each_buffer_cpu(buffer, cpu) { if (buffer->buffers[cpu]) rb_free_cpu_buffer(buffer->buffers[cpu]); } kfree(buffer->buffers); fail_free_cpumask: free_cpumask_var(buffer->cpumask); fail_free_buffer: kfree(buffer); return NULL; } /** * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { /* Default buffer page size - one system page */ return alloc_buffer(size, flags, 0, 0, 0,key); } EXPORT_SYMBOL_GPL(__ring_buffer_alloc); /** * __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. * @start: start of allocated range * @range_size: size of allocated range * @order: sub-buffer order * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long range_size, struct lock_class_key *key) { return alloc_buffer(size, flags, order, start, start + range_size, key); } /** * ring_buffer_last_boot_delta - return the delta offset from last boot * @buffer: The buffer to return the delta from * @text: Return text delta * @data: Return data delta * * Returns: The true if the delta is non zero */ bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, long *data) { if (!buffer) return false; if (!buffer->last_text_delta) return false; *text = buffer->last_text_delta; *data = buffer->last_data_delta; return true; } /** * ring_buffer_free - free a ring buffer. * @buffer: the buffer to free. */ void ring_buffer_free(struct trace_buffer *buffer) { int cpu; cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); irq_work_sync(&buffer->irq_work.work); for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); kfree(buffer->buffers); free_cpumask_var(buffer->cpumask); kfree(buffer); } EXPORT_SYMBOL_GPL(ring_buffer_free); void ring_buffer_set_clock(struct trace_buffer *buffer, u64 (*clock)(void)) { buffer->clock = clock; } void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs) { buffer->time_stamp_abs = abs; } bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer) { return buffer->time_stamp_abs; } static inline unsigned long rb_page_entries(struct buffer_page *bpage) { return local_read(&bpage->entries) & RB_WRITE_MASK; } static inline unsigned long rb_page_write(struct buffer_page *bpage) { return local_read(&bpage->write) & RB_WRITE_MASK; } static bool rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { struct list_head *tail_page, *to_remove, *next_page; struct buffer_page *to_remove_page, *tmp_iter_page; struct buffer_page *last_page, *first_page; unsigned long nr_removed; unsigned long head_bit; int page_entries; head_bit = 0; raw_spin_lock_irq(&cpu_buffer->reader_lock); atomic_inc(&cpu_buffer->record_disabled); /* * We don't race with the readers since we have acquired the reader * lock. We also don't race with writers after disabling recording. * This makes it easy to figure out the first and the last page to be * removed from the list. We unlink all the pages in between including * the first and last pages. This is done in a busy loop so that we * lose the least number of traces. * The pages are freed after we restart recording and unlock readers. */ tail_page = &cpu_buffer->tail_page->list; /* * tail page might be on reader page, we remove the next page * from the ring buffer */ if (cpu_buffer->tail_page == cpu_buffer->reader_page) tail_page = rb_list_head(tail_page->next); to_remove = tail_page; /* start of pages to remove */ first_page = list_entry(rb_list_head(to_remove->next), struct buffer_page, list); for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) { to_remove = rb_list_head(to_remove)->next; head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; } /* Read iterators need to reset themselves when some pages removed */ cpu_buffer->pages_removed += nr_removed; next_page = rb_list_head(to_remove)->next; /* * Now we remove all pages between tail_page and next_page. * Make sure that we have head_bit value preserved for the * next page */ tail_page->next = (struct list_head *)((unsigned long)next_page | head_bit); next_page = rb_list_head(next_page); next_page->prev = tail_page; /* make sure pages points to a valid page in the ring buffer */ cpu_buffer->pages = next_page; /* update head page */ if (head_bit) cpu_buffer->head_page = list_entry(next_page, struct buffer_page, list); /* pages are removed, resume tracing and then free the pages */ atomic_dec(&cpu_buffer->record_disabled); raw_spin_unlock_irq(&cpu_buffer->reader_lock); RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)); /* last buffer page to remove */ last_page = list_entry(rb_list_head(to_remove), struct buffer_page, list); tmp_iter_page = first_page; do { cond_resched(); to_remove_page = tmp_iter_page; rb_inc_page(&tmp_iter_page); /* update the counters */ page_entries = rb_page_entries(to_remove_page); if (page_entries) { /* * If something was added to this page, it was full * since it is not the tail page. So we deduct the * bytes consumed in ring buffer from here. * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } /* * We have already removed references to this list item, just * free up the buffer_page and its page */ free_buffer_page(to_remove_page); nr_removed--; } while (to_remove_page != last_page); RB_WARN_ON(cpu_buffer, nr_removed); return nr_removed == 0; } static bool rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *pages = &cpu_buffer->new_pages; unsigned long flags; bool success; int retries; /* Can be called at early boot up, where interrupts must not been enabled */ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* * We are holding the reader lock, so the reader page won't be swapped * in the ring buffer. Now we are racing with the writer trying to * move head page and the tail page. * We are going to adapt the reader page update process where: * 1. We first splice the start and end of list of new pages between * the head page and its previous page. * 2. We cmpxchg the prev_page->next to point from head page to the * start of new pages list. * 3. Finally, we update the head->prev to the end of new list. * * We will try this process 10 times, to make sure that we don't keep * spinning. */ retries = 10; success = false; while (retries--) { struct list_head *head_page, *prev_page; struct list_head *last_page, *first_page; struct list_head *head_page_with_bit; struct buffer_page *hpage = rb_set_head_page(cpu_buffer); if (!hpage) break; head_page = &hpage->list; prev_page = head_page->prev; first_page = pages->next; last_page = pages->prev; head_page_with_bit = (struct list_head *) ((unsigned long)head_page | RB_PAGE_HEAD); last_page->next = head_page_with_bit; first_page->prev = prev_page; /* caution: head_page_with_bit gets updated on cmpxchg failure */ if (try_cmpxchg(&prev_page->next, &head_page_with_bit, first_page)) { /* * yay, we replaced the page pointer to our new list, * now, we just have to update to head page's prev * pointer to point to end of list */ head_page->prev = last_page; success = true; break; } } if (success) INIT_LIST_HEAD(pages); /* * If we weren't successful in adding in new pages, warn and stop * tracing */ RB_WARN_ON(cpu_buffer, !success); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); /* free pages if they weren't inserted */ if (!success) { struct buffer_page *bpage, *tmp; list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } } return success; } static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer) { bool success; if (cpu_buffer->nr_pages_to_update > 0) success = rb_insert_pages(cpu_buffer); else success = rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update); if (success) cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update; } static void update_pages_handler(struct work_struct *work) { struct ring_buffer_per_cpu *cpu_buffer = container_of(work, struct ring_buffer_per_cpu, update_pages_work); rb_update_pages(cpu_buffer); complete(&cpu_buffer->update_done); } /** * ring_buffer_resize - resize the ring buffer * @buffer: the buffer to resize. * @size: the new size. * @cpu_id: the cpu buffer to resize * * Minimum size is 2 * buffer->subbuf_size. * * Returns 0 on success and < 0 on failure. */ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, int cpu_id) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long nr_pages; int cpu, err; /* * Always succeed at resizing a non-existent buffer: */ if (!buffer) return 0; /* Make sure the requested buffer exists */ if (cpu_id != RING_BUFFER_ALL_CPUS && !cpumask_test_cpu(cpu_id, buffer->cpumask)) return 0; nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size); /* we need a minimum of two pages */ if (nr_pages < 2) nr_pages = 2; /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); atomic_inc(&buffer->resizing); if (cpu_id == RING_BUFFER_ALL_CPUS) { /* * Don't succeed if resizing is disabled, as a reader might be * manipulating the ring buffer and is expecting a sane state while * this is true. */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; if (atomic_read(&cpu_buffer->resize_disabled)) { err = -EBUSY; goto out_err_unlock; } } /* calculate the pages to update */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; cpu_buffer->nr_pages_to_update = nr_pages - cpu_buffer->nr_pages; /* * nothing more to do for removing pages or no update */ if (cpu_buffer->nr_pages_to_update <= 0) continue; /* * to add pages, make sure all new pages can be * allocated without receiving ENOMEM */ INIT_LIST_HEAD(&cpu_buffer->new_pages); if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update, &cpu_buffer->new_pages)) { /* not enough memory for new pages */ err = -ENOMEM; goto out_err; } cond_resched(); } cpus_read_lock(); /* * Fire off all the required work handlers * We can't schedule on offline CPUs, but it's not necessary * since we can change their buffer sizes without any race. */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; if (!cpu_buffer->nr_pages_to_update) continue; /* Can't run something on an offline CPU. */ if (!cpu_online(cpu)) { rb_update_pages(cpu_buffer); cpu_buffer->nr_pages_to_update = 0; } else { /* Run directly if possible. */ migrate_disable(); if (cpu != smp_processor_id()) { migrate_enable(); schedule_work_on(cpu, &cpu_buffer->update_pages_work); } else { update_pages_handler(&cpu_buffer->update_pages_work); migrate_enable(); } } } /* wait for all the updates to complete */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; if (!cpu_buffer->nr_pages_to_update) continue; if (cpu_online(cpu)) wait_for_completion(&cpu_buffer->update_done); cpu_buffer->nr_pages_to_update = 0; } cpus_read_unlock(); } else { cpu_buffer = buffer->buffers[cpu_id]; if (nr_pages == cpu_buffer->nr_pages) goto out; /* * Don't succeed if resizing is disabled, as a reader might be * manipulating the ring buffer and is expecting a sane state while * this is true. */ if (atomic_read(&cpu_buffer->resize_disabled)) { err = -EBUSY; goto out_err_unlock; } cpu_buffer->nr_pages_to_update = nr_pages - cpu_buffer->nr_pages; INIT_LIST_HEAD(&cpu_buffer->new_pages); if (cpu_buffer->nr_pages_to_update > 0 && __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update, &cpu_buffer->new_pages)) { err = -ENOMEM; goto out_err; } cpus_read_lock(); /* Can't run something on an offline CPU. */ if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); else { /* Run directly if possible. */ migrate_disable(); if (cpu_id == smp_processor_id()) { rb_update_pages(cpu_buffer); migrate_enable(); } else { migrate_enable(); schedule_work_on(cpu_id, &cpu_buffer->update_pages_work); wait_for_completion(&cpu_buffer->update_done); } } cpu_buffer->nr_pages_to_update = 0; cpus_read_unlock(); } out: /* * The ring buffer resize can happen with the ring buffer * enabled, so that the update disturbs the tracing as little * as possible. But if the buffer is disabled, we do not need * to worry about that, and we can take the time to verify * that the buffer is not corrupt. */ if (atomic_read(&buffer->record_disabled)) { atomic_inc(&buffer->record_disabled); /* * Even though the buffer was disabled, we must make sure * that it is truly disabled before calling rb_check_pages. * There could have been a race between checking * record_disable and incrementing it. */ synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { unsigned long flags; cpu_buffer = buffer->buffers[cpu]; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_check_pages(cpu_buffer); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } atomic_dec(&buffer->record_disabled); } atomic_dec(&buffer->resizing); mutex_unlock(&buffer->mutex); return 0; out_err: for_each_buffer_cpu(buffer, cpu) { struct buffer_page *bpage, *tmp; cpu_buffer = buffer->buffers[cpu]; cpu_buffer->nr_pages_to_update = 0; if (list_empty(&cpu_buffer->new_pages)) continue; list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } } out_err_unlock: atomic_dec(&buffer->resizing); mutex_unlock(&buffer->mutex); return err; } EXPORT_SYMBOL_GPL(ring_buffer_resize); void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val) { mutex_lock(&buffer->mutex); if (val) buffer->flags |= RB_FL_OVERWRITE; else buffer->flags &= ~RB_FL_OVERWRITE; mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) { return bpage->page->data + index; } static __always_inline struct ring_buffer_event * rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) { return __rb_page_index(cpu_buffer->reader_page, cpu_buffer->reader_page->read); } static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { struct ring_buffer_event *event; struct buffer_page *iter_head_page = iter->head_page; unsigned long commit; unsigned length; if (iter->head != iter->next_event) return iter->event; /* * When the writer goes across pages, it issues a cmpxchg which * is a mb(), which will synchronize with the rmb here. * (see rb_tail_page_update() and __rb_reserve_next()) */ commit = rb_page_commit(iter_head_page); smp_rmb(); /* An event needs to be at least 8 bytes in size */ if (iter->head > commit - 8) goto reset; event = __rb_page_index(iter_head_page, iter->head); length = rb_event_length(event); /* * READ_ONCE() doesn't work on functions and we don't want the * compiler doing any crazy optimizations with length. */ barrier(); if ((iter->head + length) > commit || length > iter->event_size) /* Writer corrupted the read? */ goto reset; memcpy(iter->event, event, length); /* * If the page stamp is still the same after this rmb() then the * event was safely copied without the writer entering the page. */ smp_rmb(); /* Make sure the page didn't change since we read this */ if (iter->page_stamp != iter_head_page->page->time_stamp || commit > rb_page_commit(iter_head_page)) goto reset; iter->next_event = iter->head + length; return iter->event; reset: /* Reset to the beginning */ iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; iter->next_event = 0; iter->missed_events = 1; return NULL; } /* Size is determined by what has been committed */ static __always_inline unsigned rb_page_size(struct buffer_page *bpage) { return rb_page_commit(bpage) & ~RB_MISSED_MASK; } static __always_inline unsigned rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) { return rb_page_commit(cpu_buffer->commit_page); } static __always_inline unsigned rb_event_index(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { unsigned long addr = (unsigned long)event; addr &= (PAGE_SIZE << cpu_buffer->buffer->subbuf_order) - 1; return addr - BUF_PAGE_HDR_SIZE; } static void rb_inc_iter(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; /* * The iterator could be on the reader page (it starts there). * But the head could have moved, since the reader was * found. Check for this case and assign the iterator * to the head page instead of next. */ if (iter->head_page == cpu_buffer->reader_page) iter->head_page = rb_set_head_page(cpu_buffer); else rb_inc_page(&iter->head_page); iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; iter->next_event = 0; } /* Return the index into the sub-buffers for a given sub-buffer */ static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf) { void *subbuf_array; subbuf_array = (void *)meta + sizeof(int) * meta->nr_subbufs; subbuf_array = (void *)ALIGN((unsigned long)subbuf_array, meta->subbuf_size); return (subbuf - subbuf_array) / meta->subbuf_size; } static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *next_page) { struct ring_buffer_meta *meta = cpu_buffer->ring_meta; unsigned long old_head = (unsigned long)next_page->page; unsigned long new_head; rb_inc_page(&next_page); new_head = (unsigned long)next_page->page; /* * Only move it forward once, if something else came in and * moved it forward, then we don't want to touch it. */ (void)cmpxchg(&meta->head_buffer, old_head, new_head); } static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *reader) { struct ring_buffer_meta *meta = cpu_buffer->ring_meta; void *old_reader = cpu_buffer->reader_page->page; void *new_reader = reader->page; int id; id = reader->id; cpu_buffer->reader_page->id = id; reader->id = 0; meta->buffers[0] = rb_meta_subbuf_idx(meta, new_reader); meta->buffers[id] = rb_meta_subbuf_idx(meta, old_reader); /* The head pointer is the one after the reader */ rb_update_meta_head(cpu_buffer, reader); } /* * rb_handle_head_page - writer hit the head page * * Returns: +1 to retry page * 0 to continue * -1 on error */ static int rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { struct buffer_page *new_head; int entries; int type; int ret; entries = rb_page_entries(next_page); /* * The hard part is here. We need to move the head * forward, and protect against both readers on * other CPUs and writers coming in via interrupts. */ type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, RB_PAGE_HEAD); /* * type can be one of four: * NORMAL - an interrupt already moved it for us * HEAD - we are the first to get here. * UPDATE - we are the interrupt interrupting * a current move. * MOVED - a reader on another CPU moved the next * pointer to its reader page. Give up * and try again. */ switch (type) { case RB_PAGE_HEAD: /* * We changed the head to UPDATE, thus * it is our responsibility to update * the counters. */ local_add(entries, &cpu_buffer->overrun); local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); if (cpu_buffer->ring_meta) rb_update_meta_head(cpu_buffer, next_page); /* * The entries will be zeroed out when we move the * tail page. */ /* still more to do */ break; case RB_PAGE_UPDATE: /* * This is an interrupt that interrupt the * previous update. Still more to do. */ break; case RB_PAGE_NORMAL: /* * An interrupt came in before the update * and processed this for us. * Nothing left to do. */ return 1; case RB_PAGE_MOVED: /* * The reader is on another CPU and just did * a swap with our next_page. * Try again. */ return 1; default: RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ return -1; } /* * Now that we are here, the old head pointer is * set to UPDATE. This will keep the reader from * swapping the head page with the reader page. * The reader (on another CPU) will spin till * we are finished. * * We just need to protect against interrupts * doing the job. We will set the next pointer * to HEAD. After that, we set the old pointer * to NORMAL, but only if it was HEAD before. * otherwise we are an interrupt, and only * want the outer most commit to reset it. */ new_head = next_page; rb_inc_page(&new_head); ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, RB_PAGE_NORMAL); /* * Valid returns are: * HEAD - an interrupt came in and already set it. * NORMAL - One of two things: * 1) We really set it. * 2) A bunch of interrupts came in and moved * the page forward again. */ switch (ret) { case RB_PAGE_HEAD: case RB_PAGE_NORMAL: /* OK */ break; default: RB_WARN_ON(cpu_buffer, 1); return -1; } /* * It is possible that an interrupt came in, * set the head up, then more interrupts came in * and moved it again. When we get back here, * the page would have been set to NORMAL but we * just set it back to HEAD. * * How do you detect this? Well, if that happened * the tail page would have moved. */ if (ret == RB_PAGE_NORMAL) { struct buffer_page *buffer_tail_page; buffer_tail_page = READ_ONCE(cpu_buffer->tail_page); /* * If the tail had moved passed next, then we need * to reset the pointer. */ if (buffer_tail_page != tail_page && buffer_tail_page != next_page) rb_head_page_set_normal(cpu_buffer, new_head, next_page, RB_PAGE_HEAD); } /* * If this was the outer most commit (the one that * changed the original pointer from HEAD to UPDATE), * then it is up to us to reset it to NORMAL. */ if (type == RB_PAGE_HEAD) { ret = rb_head_page_set_normal(cpu_buffer, next_page, tail_page, RB_PAGE_UPDATE); if (RB_WARN_ON(cpu_buffer, ret != RB_PAGE_UPDATE)) return -1; } return 0; } static inline void rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long tail, struct rb_event_info *info) { unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size); struct buffer_page *tail_page = info->tail_page; struct ring_buffer_event *event; unsigned long length = info->length; /* * Only the event that crossed the page boundary * must fill the old tail_page with padding. */ if (tail >= bsize) { /* * If the page was filled, then we still need * to update the real_end. Reset it to zero * and the reader will ignore it. */ if (tail == bsize) tail_page->real_end = 0; local_sub(length, &tail_page->write); return; } event = __rb_page_index(tail_page, tail); /* * Save the original length to the meta data. * This will be used by the reader to add lost event * counter. */ tail_page->real_end = tail; /* * If this event is bigger than the minimum size, then * we need to be careful that we don't subtract the * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure * that this space is not used again, and this space will * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. */ if (tail > (bsize - RB_EVNT_MIN_SIZE)) { /* No room for any events */ /* Mark the rest of the page with padding */ rb_event_set_padding(event); /* Make sure the padding is visible before the write update */ smp_wmb(); /* Set the write back to the previous setting */ local_sub(length, &tail_page->write); return; } /* Put in a discarded event */ event->array[0] = (bsize - tail) - RB_EVNT_HDR_SIZE; event->type_len = RINGBUF_TYPE_PADDING; /* time delta must be non zero */ event->time_delta = 1; /* account for padding bytes */ local_add(bsize - tail, &cpu_buffer->entries_bytes); /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); /* Set write to end of buffer */ length = (tail + length) - bsize; local_sub(length, &tail_page->write); } static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer); /* * This is the slow path, force gcc not to inline it. */ static noinline struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long tail, struct rb_event_info *info) { struct buffer_page *tail_page = info->tail_page; struct buffer_page *commit_page = cpu_buffer->commit_page; struct trace_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; next_page = tail_page; rb_inc_page(&next_page); /* * If for some reason, we had an interrupt storm that made * it all the way around the buffer, bail, and warn * about it. */ if (unlikely(next_page == commit_page)) { local_inc(&cpu_buffer->commit_overrun); goto out_reset; } /* * This is where the fun begins! * * We are fighting against races between a reader that * could be on another CPU trying to swap its reader * page with the buffer head. * * We are also fighting against interrupts coming in and * moving the head or tail on us as well. * * If the next page is the head page then we have filled * the buffer, unless the commit page is still on the * reader page. */ if (rb_is_head_page(next_page, &tail_page->list)) { /* * If the commit is not on the reader page, then * move the header page. */ if (!rb_is_reader_page(cpu_buffer->commit_page)) { /* * If we are not in overwrite mode, * this is easy, just stop here. */ if (!(buffer->flags & RB_FL_OVERWRITE)) { local_inc(&cpu_buffer->dropped_events); goto out_reset; } ret = rb_handle_head_page(cpu_buffer, tail_page, next_page); if (ret < 0) goto out_reset; if (ret) goto out_again; } else { /* * We need to be careful here too. The * commit page could still be on the reader * page. We could have a small buffer, and * have filled up the buffer with events * from interrupts and such, and wrapped. * * Note, if the tail page is also on the * reader_page, we let it move out. */ if (unlikely((cpu_buffer->commit_page != cpu_buffer->tail_page) && (cpu_buffer->commit_page == cpu_buffer->reader_page))) { local_inc(&cpu_buffer->commit_overrun); goto out_reset; } } } rb_tail_page_update(cpu_buffer, tail_page, next_page); out_again: rb_reset_tail(cpu_buffer, tail, info); /* Commit what we have for now. */ rb_end_commit(cpu_buffer); /* rb_end_commit() decs committing */ local_inc(&cpu_buffer->committing); /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); out_reset: /* reset write */ rb_reset_tail(cpu_buffer, tail, info); return NULL; } /* Slow path */ static struct ring_buffer_event * rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event, u64 delta, bool abs) { if (abs) event->type_len = RINGBUF_TYPE_TIME_STAMP; else event->type_len = RINGBUF_TYPE_TIME_EXTEND; /* Not the first event on the page, or not delta? */ if (abs || rb_event_index(cpu_buffer, event)) { event->time_delta = delta & TS_MASK; event->array[0] = delta >> TS_SHIFT; } else { /* nope, just zero it */ event->time_delta = 0; event->array[0] = 0; } return skip_time_extend(event); } #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static inline bool sched_clock_stable(void) { return true; } #endif static void rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info) { u64 write_stamp; WARN_ONCE(1, "Delta way too big! %llu ts=%llu before=%llu after=%llu write stamp=%llu\n%s", (unsigned long long)info->delta, (unsigned long long)info->ts, (unsigned long long)info->before, (unsigned long long)info->after, (unsigned long long)({rb_time_read(&cpu_buffer->write_stamp, &write_stamp); write_stamp;}), sched_clock_stable() ? "" : "If you just came from a suspend/resume,\n" "please switch to the trace global clock:\n" " echo global > /sys/kernel/tracing/trace_clock\n" "or add trace_clock=global to the kernel command line\n"); } static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event **event, struct rb_event_info *info, u64 *delta, unsigned int *length) { bool abs = info->add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE); if (unlikely(info->delta > (1ULL << 59))) { /* * Some timers can use more than 59 bits, and when a timestamp * is added to the buffer, it will lose those bits. */ if (abs && (info->ts & TS_MSB)) { info->delta &= ABS_TS_MASK; /* did the clock go backwards */ } else if (info->before == info->after && info->before > info->ts) { /* not interrupted */ static int once; /* * This is possible with a recalibrating of the TSC. * Do not produce a call stack, but just report it. */ if (!once) { once++; pr_warn("Ring buffer clock went backwards: %llu -> %llu\n", info->before, info->ts); } } else rb_check_timestamp(cpu_buffer, info); if (!abs) info->delta = 0; } *event = rb_add_time_stamp(cpu_buffer, *event, info->delta, abs); *length -= RB_LEN_TIME_EXTEND; *delta = 0; } /** * rb_update_event - update event type and data * @cpu_buffer: The per cpu buffer of the @event * @event: the event to update * @info: The info to update the @event with (contains length and delta) * * Update the type and data fields of the @event. The length * is the actual size that is written to the ring buffer, * and with this, we can determine what to place into the * data field. */ static void rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event, struct rb_event_info *info) { unsigned length = info->length; u64 delta = info->delta; unsigned int nest = local_read(&cpu_buffer->committing) - 1; if (!WARN_ON_ONCE(nest >= MAX_NEST)) cpu_buffer->event_stamp[nest] = info->ts; /* * If we need to add a timestamp, then we * add it to the start of the reserved space. */ if (unlikely(info->add_timestamp)) rb_add_timestamp(cpu_buffer, &event, info, &delta, &length); event->time_delta = delta; length -= RB_EVNT_HDR_SIZE; if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { event->type_len = 0; event->array[0] = length; } else event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); } static unsigned rb_calculate_event_length(unsigned length) { struct ring_buffer_event event; /* Used only for sizeof array */ /* zero length can cause confusions */ if (!length) length++; if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) length += sizeof(event.array[0]); length += RB_EVNT_HDR_SIZE; length = ALIGN(length, RB_ARCH_ALIGNMENT); /* * In case the time delta is larger than the 27 bits for it * in the header, we need to add a timestamp. If another * event comes in when trying to discard this one to increase * the length, then the timestamp will be added in the allocated * space of this event. If length is bigger than the size needed * for the TIME_EXTEND, then padding has to be used. The events * length must be either RB_LEN_TIME_EXTEND, or greater than or equal * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding. * As length is a multiple of 4, we only need to worry if it * is 12 (RB_LEN_TIME_EXTEND + 4). */ if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT) length += RB_ALIGNMENT; return length; } static inline bool rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { unsigned long new_index, old_index; struct buffer_page *bpage; unsigned long addr; new_index = rb_event_index(cpu_buffer, event); old_index = new_index + rb_event_ts_length(event); addr = (unsigned long)event; addr &= ~((PAGE_SIZE << cpu_buffer->buffer->subbuf_order) - 1); bpage = READ_ONCE(cpu_buffer->tail_page); /* * Make sure the tail_page is still the same and * the next write location is the end of this event */ if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { unsigned long write_mask = local_read(&bpage->write) & ~RB_WRITE_MASK; unsigned long event_length = rb_event_length(event); /* * For the before_stamp to be different than the write_stamp * to make sure that the next event adds an absolute * value and does not rely on the saved write stamp, which * is now going to be bogus. * * By setting the before_stamp to zero, the next event * is not going to use the write_stamp and will instead * create an absolute timestamp. This means there's no * reason to update the wirte_stamp! */ rb_time_set(&cpu_buffer->before_stamp, 0); /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume * that this event just added itself before updating * the write stamp. The interrupting event will fix the * write stamp for us, and use an absolute timestamp. */ /* * This is on the tail page. It is possible that * a write could come in and move the tail page * and write to the next page. That is fine * because we just shorten what is on this page. */ old_index += write_mask; new_index += write_mask; /* caution: old_index gets updated on cmpxchg failure */ if (local_try_cmpxchg(&bpage->write, &old_index, new_index)) { /* update counters */ local_sub(event_length, &cpu_buffer->entries_bytes); return true; } } /* could not discard */ return false; } static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) { local_inc(&cpu_buffer->committing); local_inc(&cpu_buffer->commits); } static __always_inline void rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long max_count; /* * We only race with interrupts and NMIs on this CPU. * If we own the commit event, then we can commit * all others that interrupted us, since the interruptions * are in stack format (they finish before they come * back to us). This allows us to do a simple loop to * assign the commit to the tail. */ again: max_count = cpu_buffer->nr_pages * 100; while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) { if (RB_WARN_ON(cpu_buffer, !(--max_count))) return; if (RB_WARN_ON(cpu_buffer, rb_is_reader_page(cpu_buffer->tail_page))) return; /* * No need for a memory barrier here, as the update * of the tail_page did it for this page. */ local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(&cpu_buffer->commit_page); if (cpu_buffer->ring_meta) { struct ring_buffer_meta *meta = cpu_buffer->ring_meta; meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page; } /* add barrier to keep gcc from optimizing too much */ barrier(); } while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { /* Make sure the readers see the content of what is committed. */ smp_wmb(); local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->commit_page->page->commit) & ~RB_WRITE_MASK); barrier(); } /* again, keep gcc from optimizing */ barrier(); /* * If an interrupt came in just after the first while loop * and pushed the tail page forward, we will be left with * a dangling commit that will never go forward. */ if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page))) goto again; } static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long commits; if (RB_WARN_ON(cpu_buffer, !local_read(&cpu_buffer->committing))) return; again: commits = local_read(&cpu_buffer->commits); /* synchronize with interrupts */ barrier(); if (local_read(&cpu_buffer->committing) == 1) rb_set_commit_to_write(cpu_buffer); local_dec(&cpu_buffer->committing); /* synchronize with interrupts */ barrier(); /* * Need to account for interrupts coming in between the * updating of the commit page and the clearing of the * committing counter. */ if (unlikely(local_read(&cpu_buffer->commits) != commits) && !local_read(&cpu_buffer->committing)) { local_inc(&cpu_buffer->committing); goto again; } } static inline void rb_event_discard(struct ring_buffer_event *event) { if (extended_time(event)) event = skip_time_extend(event); /* array[0] holds the actual length for the discarded event */ event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; event->type_len = RINGBUF_TYPE_PADDING; /* time delta must be non zero */ if (!event->time_delta) event->time_delta = 1; } static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer) { local_inc(&cpu_buffer->entries); rb_end_commit(cpu_buffer); } static __always_inline void rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { if (buffer->irq_work.waiters_pending) { buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ irq_work_queue(&buffer->irq_work.work); } if (cpu_buffer->irq_work.waiters_pending) { cpu_buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ irq_work_queue(&cpu_buffer->irq_work.work); } if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched)) return; if (cpu_buffer->reader_page == cpu_buffer->commit_page) return; if (!cpu_buffer->irq_work.full_waiters_pending) return; cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched); if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full)) return; cpu_buffer->irq_work.wakeup_full = true; cpu_buffer->irq_work.full_waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ irq_work_queue(&cpu_buffer->irq_work.work); } #ifdef CONFIG_RING_BUFFER_RECORD_RECURSION # define do_ring_buffer_record_recursion() \ do_ftrace_record_recursion(_THIS_IP_, _RET_IP_) #else # define do_ring_buffer_record_recursion() do { } while (0) #endif /* * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified * by the current task between lock and unlock. But it can * be modified more than once via an interrupt. To pass this * information from the lock to the unlock without having to * access the 'in_interrupt()' functions again (which do show * a bit of overhead in something as critical as function tracing, * we use a bitmask trick. * * bit 1 = NMI context * bit 2 = IRQ context * bit 3 = SoftIRQ context * bit 4 = normal context. * * This works because this is the order of contexts that can * preempt other contexts. A SoftIRQ never preempts an IRQ * context. * * When the context is determined, the corresponding bit is * checked and set (if it was set, then a recursion of that context * happened). * * On unlock, we need to clear this bit. To do so, just subtract * 1 from the current_context and AND it to itself. * * (binary) * 101 - 1 = 100 * 101 & 100 = 100 (clearing bit zero) * * 1010 - 1 = 1001 * 1010 & 1001 = 1000 (clearing bit 1) * * The least significant bit can be cleared this way, and it * just so happens that it is the same bit corresponding to * the current context. * * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit * is set when a recursion is detected at the current context, and if * the TRANSITION bit is already set, it will fail the recursion. * This is needed because there's a lag between the changing of * interrupt context and updating the preempt count. In this case, * a false positive will be found. To handle this, one extra recursion * is allowed, and this is done by the TRANSITION bit. If the TRANSITION * bit is already set, then it is considered a recursion and the function * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. * * On the trace_recursive_unlock(), the TRANSITION bit will be the first * to be cleared. Even if it wasn't the context that set it. That is, * if an interrupt comes in while NORMAL bit is set and the ring buffer * is called before preempt_count() is updated, since the check will * be on the NORMAL bit, the TRANSITION bit will then be set. If an * NMI then comes in, it will set the NMI bit, but when the NMI code * does the trace_recursive_unlock() it will clear the TRANSITION bit * and leave the NMI bit set. But this is fine, because the interrupt * code that set the TRANSITION bit will then clear the NMI bit when it * calls trace_recursive_unlock(). If another NMI comes in, it will * set the TRANSITION bit and continue. * * Note: The TRANSITION bit only handles a single transition between context. */ static __always_inline bool trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { unsigned int val = cpu_buffer->current_context; int bit = interrupt_context_level(); bit = RB_CTX_NORMAL - bit; if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { /* * It is possible that this was called by transitioning * between interrupt context, and preempt_count() has not * been updated yet. In this case, use the TRANSITION bit. */ bit = RB_CTX_TRANSITION; if (val & (1 << (bit + cpu_buffer->nest))) { do_ring_buffer_record_recursion(); return true; } } val |= (1 << (bit + cpu_buffer->nest)); cpu_buffer->current_context = val; return false; } static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { cpu_buffer->current_context &= cpu_buffer->current_context - (1 << cpu_buffer->nest); } /* The recursive locking above uses 5 bits */ #define NESTED_BITS 5 /** * ring_buffer_nest_start - Allow to trace while nested * @buffer: The ring buffer to modify * * The ring buffer has a safety mechanism to prevent recursion. * But there may be a case where a trace needs to be done while * tracing something else. In this case, calling this function * will allow this function to nest within a currently active * ring_buffer_lock_reserve(). * * Call this function before calling another ring_buffer_lock_reserve() and * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). */ void ring_buffer_nest_start(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; /* Enabled by ring_buffer_nest_end() */ preempt_disable_notrace(); cpu = raw_smp_processor_id(); cpu_buffer = buffer->buffers[cpu]; /* This is the shift value for the above recursive locking */ cpu_buffer->nest += NESTED_BITS; } /** * ring_buffer_nest_end - Allow to trace while nested * @buffer: The ring buffer to modify * * Must be called after ring_buffer_nest_start() and after the * ring_buffer_unlock_commit(). */ void ring_buffer_nest_end(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; /* disabled by ring_buffer_nest_start() */ cpu = raw_smp_processor_id(); cpu_buffer = buffer->buffers[cpu]; /* This is the shift value for the above recursive locking */ cpu_buffer->nest -= NESTED_BITS; preempt_enable_notrace(); } /** * ring_buffer_unlock_commit - commit a reserved * @buffer: The buffer to commit to * * This commits the data to the ring buffer, and releases any locks held. * * Must be paired with ring_buffer_lock_reserve. */ int ring_buffer_unlock_commit(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu = raw_smp_processor_id(); cpu_buffer = buffer->buffers[cpu]; rb_commit(cpu_buffer); rb_wakeups(buffer, cpu_buffer); trace_recursive_unlock(cpu_buffer); preempt_enable_notrace(); return 0; } EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); /* Special value to validate all deltas on a page. */ #define CHECK_FULL_PAGE 1L #ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS static const char *show_irq_str(int bits) { const char *type[] = { ".", // 0 "s", // 1 "h", // 2 "Hs", // 3 "n", // 4 "Ns", // 5 "Nh", // 6 "NHs", // 7 }; return type[bits]; } /* Assume this is an trace event */ static const char *show_flags(struct ring_buffer_event *event) { struct trace_entry *entry; int bits = 0; if (rb_event_data_length(event) - RB_EVNT_HDR_SIZE < sizeof(*entry)) return "X"; entry = ring_buffer_event_data(event); if (entry->flags & TRACE_FLAG_SOFTIRQ) bits |= 1; if (entry->flags & TRACE_FLAG_HARDIRQ) bits |= 2; if (entry->flags & TRACE_FLAG_NMI) bits |= 4; return show_irq_str(bits); } static const char *show_irq(struct ring_buffer_event *event) { struct trace_entry *entry; if (rb_event_data_length(event) - RB_EVNT_HDR_SIZE < sizeof(*entry)) return ""; entry = ring_buffer_event_data(event); if (entry->flags & TRACE_FLAG_IRQS_OFF) return "d"; return ""; } static const char *show_interrupt_level(void) { unsigned long pc = preempt_count(); unsigned char level = 0; if (pc & SOFTIRQ_OFFSET) level |= 1; if (pc & HARDIRQ_MASK) level |= 2; if (pc & NMI_MASK) level |= 4; return show_irq_str(level); } static void dump_buffer_page(struct buffer_data_page *bpage, struct rb_event_info *info, unsigned long tail) { struct ring_buffer_event *event; u64 ts, delta; int e; ts = bpage->time_stamp; pr_warn(" [%lld] PAGE TIME STAMP\n", ts); for (e = 0; e < tail; e += rb_event_length(event)) { event = (struct ring_buffer_event *)(bpage->data + e); switch (event->type_len) { case RINGBUF_TYPE_TIME_EXTEND: delta = rb_event_time_stamp(event); ts += delta; pr_warn(" 0x%x: [%lld] delta:%lld TIME EXTEND\n", e, ts, delta); break; case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); ts = rb_fix_abs_ts(delta, ts); pr_warn(" 0x%x: [%lld] absolute:%lld TIME STAMP\n", e, ts, delta); break; case RINGBUF_TYPE_PADDING: ts += event->time_delta; pr_warn(" 0x%x: [%lld] delta:%d PADDING\n", e, ts, event->time_delta); break; case RINGBUF_TYPE_DATA: ts += event->time_delta; pr_warn(" 0x%x: [%lld] delta:%d %s%s\n", e, ts, event->time_delta, show_flags(event), show_irq(event)); break; default: break; } } pr_warn("expected end:0x%lx last event actually ended at:0x%x\n", tail, e); } static DEFINE_PER_CPU(atomic_t, checking); static atomic_t ts_dump; #define buffer_warn_return(fmt, ...) \ do { \ /* If another report is happening, ignore this one */ \ if (atomic_inc_return(&ts_dump) != 1) { \ atomic_dec(&ts_dump); \ goto out; \ } \ atomic_inc(&cpu_buffer->record_disabled); \ pr_warn(fmt, ##__VA_ARGS__); \ dump_buffer_page(bpage, info, tail); \ atomic_dec(&ts_dump); \ /* There's some cases in boot up that this can happen */ \ if (WARN_ON_ONCE(system_state != SYSTEM_BOOTING)) \ /* Do not re-enable checking */ \ return; \ } while (0) /* * Check if the current event time stamp matches the deltas on * the buffer page. */ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info, unsigned long tail) { struct buffer_data_page *bpage; u64 ts, delta; bool full = false; int ret; bpage = info->tail_page->page; if (tail == CHECK_FULL_PAGE) { full = true; tail = local_read(&bpage->commit); } else if (info->add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) { /* Ignore events with absolute time stamps */ return; } /* * Do not check the first event (skip possible extends too). * Also do not check if previous events have not been committed. */ if (tail <= 8 || tail > local_read(&bpage->commit)) return; /* * If this interrupted another event, */ if (atomic_inc_return(this_cpu_ptr(&checking)) != 1) goto out; ret = rb_read_data_buffer(bpage, tail, cpu_buffer->cpu, &ts, &delta); if (ret < 0) { if (delta < ts) { buffer_warn_return("[CPU: %d]ABSOLUTE TIME WENT BACKWARDS: last ts: %lld absolute ts: %lld\n", cpu_buffer->cpu, ts, delta); goto out; } } if ((full && ts > info->ts) || (!full && ts + info->delta != info->ts)) { buffer_warn_return("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s context:%s\n", cpu_buffer->cpu, ts + info->delta, info->ts, info->delta, info->before, info->after, full ? " (full)" : "", show_interrupt_level()); } out: atomic_dec(this_cpu_ptr(&checking)); } #else static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info, unsigned long tail) { } #endif /* CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS */ static struct ring_buffer_event * __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info) { struct ring_buffer_event *event; struct buffer_page *tail_page; unsigned long tail, write, w; /* Don't let the compiler play games with cpu_buffer->tail_page */ tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page); /*A*/ w = local_read(&tail_page->write) & RB_WRITE_MASK; barrier(); rb_time_read(&cpu_buffer->before_stamp, &info->before); rb_time_read(&cpu_buffer->write_stamp, &info->after); barrier(); info->ts = rb_time_stamp(cpu_buffer->buffer); if ((info->add_timestamp & RB_ADD_STAMP_ABSOLUTE)) { info->delta = info->ts; } else { /* * If interrupting an event time update, we may need an * absolute timestamp. * Don't bother if this is the start of a new page (w == 0). */ if (!w) { /* Use the sub-buffer timestamp */ info->delta = 0; } else if (unlikely(info->before != info->after)) { info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND; info->length += RB_LEN_TIME_EXTEND; } else { info->delta = info->ts - info->after; if (unlikely(test_time_stamp(info->delta))) { info->add_timestamp |= RB_ADD_STAMP_EXTEND; info->length += RB_LEN_TIME_EXTEND; } } } /*B*/ rb_time_set(&cpu_buffer->before_stamp, info->ts); /*C*/ write = local_add_return(info->length, &tail_page->write); /* set write to only the index of the write */ write &= RB_WRITE_MASK; tail = write - info->length; /* See if we shot pass the end of this buffer page */ if (unlikely(write > cpu_buffer->buffer->subbuf_size)) { check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); return rb_move_tail(cpu_buffer, tail, info); } if (likely(tail == w)) { /* Nothing interrupted us between A and C */ /*D*/ rb_time_set(&cpu_buffer->write_stamp, info->ts); /* * If something came in between C and D, the write stamp * may now not be in sync. But that's fine as the before_stamp * will be different and then next event will just be forced * to use an absolute timestamp. */ if (likely(!(info->add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)))) /* This did not interrupt any time update */ info->delta = info->ts - info->after; else /* Just use full timestamp for interrupting event */ info->delta = info->ts; check_buffer(cpu_buffer, info, tail); } else { u64 ts; /* SLOW PATH - Interrupted between A and C */ /* Save the old before_stamp */ rb_time_read(&cpu_buffer->before_stamp, &info->before); /* * Read a new timestamp and update the before_stamp to make * the next event after this one force using an absolute * timestamp. This is in case an interrupt were to come in * between E and F. */ ts = rb_time_stamp(cpu_buffer->buffer); rb_time_set(&cpu_buffer->before_stamp, ts); barrier(); /*E*/ rb_time_read(&cpu_buffer->write_stamp, &info->after); barrier(); /*F*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && info->after == info->before && info->after < ts) { /* * Nothing came after this event between C and F, it is * safe to use info->after for the delta as it * matched info->before and is still valid. */ info->delta = ts - info->after; } else { /* * Interrupted between C and F: * Lost the previous events time stamp. Just set the * delta to zero, and this will be the same time as * the event this event interrupted. And the events that * came after this will still be correct (as they would * have built their delta on the previous event. */ info->delta = 0; } info->ts = ts; info->add_timestamp &= ~RB_ADD_STAMP_FORCE; } /* * If this is the first commit on the page, then it has the same * timestamp as the page itself. */ if (unlikely(!tail && !(info->add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)))) info->delta = 0; /* We reserved something on the buffer */ event = __rb_page_index(tail_page, tail); rb_update_event(cpu_buffer, event, info); local_inc(&tail_page->entries); /* * If this is the first commit on the page, then update * its timestamp. */ if (unlikely(!tail)) tail_page->page->time_stamp = info->ts; /* account for these added bytes */ local_add(info->length, &cpu_buffer->entries_bytes); return event; } static __always_inline struct ring_buffer_event * rb_reserve_next_event(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer, unsigned long length) { struct ring_buffer_event *event; struct rb_event_info info; int nr_loops = 0; int add_ts_default; /* ring buffer does cmpxchg, make sure it is safe in NMI context */ if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && (unlikely(in_nmi()))) { return NULL; } rb_start_commit(cpu_buffer); /* The commit page can not change after this */ #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP /* * Due to the ability to swap a cpu buffer from a buffer * it is possible it was swapped before we committed. * (committing stops a swap). We check for it here and * if it happened, we have to fail the write. */ barrier(); if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) { local_dec(&cpu_buffer->committing); local_dec(&cpu_buffer->commits); return NULL; } #endif info.length = rb_calculate_event_length(length); if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) { add_ts_default = RB_ADD_STAMP_ABSOLUTE; info.length += RB_LEN_TIME_EXTEND; if (info.length > cpu_buffer->buffer->max_data_size) goto out_fail; } else { add_ts_default = RB_ADD_STAMP_NONE; } again: info.add_timestamp = add_ts_default; info.delta = 0; /* * We allow for interrupts to reenter here and do a trace. * If one does, it will cause this original code to loop * back here. Even with heavy interrupts happening, this * should only happen a few times in a row. If this happens * 1000 times in a row, there must be either an interrupt * storm or we have something buggy. * Bail! */ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) goto out_fail; event = __rb_reserve_next(cpu_buffer, &info); if (unlikely(PTR_ERR(event) == -EAGAIN)) { if (info.add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND)) info.length -= RB_LEN_TIME_EXTEND; goto again; } if (likely(event)) return event; out_fail: rb_end_commit(cpu_buffer); return NULL; } /** * ring_buffer_lock_reserve - reserve a part of the buffer * @buffer: the ring buffer to reserve from * @length: the length of the data to reserve (excluding event header) * * Returns a reserved event on the ring buffer to copy directly to. * The user of this interface will need to get the body to write into * and can use the ring_buffer_event_data() interface. * * The length is the length of the data needed, not the event length * which also includes the event header. * * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. * If NULL is returned, then nothing has been allocated or locked. */ struct ring_buffer_event * ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; int cpu; /* If we are tracing schedule, we don't want to recurse */ preempt_disable_notrace(); if (unlikely(atomic_read(&buffer->record_disabled))) goto out; cpu = raw_smp_processor_id(); if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask))) goto out; cpu_buffer = buffer->buffers[cpu]; if (unlikely(atomic_read(&cpu_buffer->record_disabled))) goto out; if (unlikely(length > buffer->max_data_size)) goto out; if (unlikely(trace_recursive_lock(cpu_buffer))) goto out; event = rb_reserve_next_event(buffer, cpu_buffer, length); if (!event) goto out_unlock; return event; out_unlock: trace_recursive_unlock(cpu_buffer); out: preempt_enable_notrace(); return NULL; } EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); /* * Decrement the entries to the page that an event is on. * The event does not even need to exist, only the pointer * to the page it is on. This may only be called before the commit * takes place. */ static inline void rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { unsigned long addr = (unsigned long)event; struct buffer_page *bpage = cpu_buffer->commit_page; struct buffer_page *start; addr &= ~((PAGE_SIZE << cpu_buffer->buffer->subbuf_order) - 1); /* Do the likely case first */ if (likely(bpage->page == (void *)addr)) { local_dec(&bpage->entries); return; } /* * Because the commit page may be on the reader page we * start with the next page and check the end loop there. */ rb_inc_page(&bpage); start = bpage; do { if (bpage->page == (void *)addr) { local_dec(&bpage->entries); return; } rb_inc_page(&bpage); } while (bpage != start); /* commit not part of this buffer?? */ RB_WARN_ON(cpu_buffer, 1); } /** * ring_buffer_discard_commit - discard an event that has not been committed * @buffer: the ring buffer * @event: non committed event to discard * * Sometimes an event that is in the ring buffer needs to be ignored. * This function lets the user discard an event in the ring buffer * and then that event will not be read later. * * This function only works if it is called before the item has been * committed. It will try to free the event from the ring buffer * if another event has not been added behind it. * * If another event has been added behind it, it will set the event * up as discarded, and perform the commit. * * If this function is called, do not call ring_buffer_unlock_commit on * the event. */ void ring_buffer_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; /* The event is discarded regardless */ rb_event_discard(event); cpu = smp_processor_id(); cpu_buffer = buffer->buffers[cpu]; /* * This must only be called if the event has not been * committed yet. Thus we can assume that preemption * is still disabled. */ RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); rb_decrement_entry(cpu_buffer, event); if (rb_try_to_discard(cpu_buffer, event)) goto out; out: rb_end_commit(cpu_buffer); trace_recursive_unlock(cpu_buffer); preempt_enable_notrace(); } EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); /** * ring_buffer_write - write data to the buffer without reserving * @buffer: The ring buffer to write to. * @length: The length of the data being written (excluding the event header) * @data: The data to write to the buffer. * * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as * one function. If you already have the data to write to the buffer, it * may be easier to simply call this function. * * Note, like ring_buffer_lock_reserve, the length is the length of the data * and not the length of the event which would hold the header. */ int ring_buffer_write(struct trace_buffer *buffer, unsigned long length, void *data) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; void *body; int ret = -EBUSY; int cpu; preempt_disable_notrace(); if (atomic_read(&buffer->record_disabled)) goto out; cpu = raw_smp_processor_id(); if (!cpumask_test_cpu(cpu, buffer->cpumask)) goto out; cpu_buffer = buffer->buffers[cpu]; if (atomic_read(&cpu_buffer->record_disabled)) goto out; if (length > buffer->max_data_size) goto out; if (unlikely(trace_recursive_lock(cpu_buffer))) goto out; event = rb_reserve_next_event(buffer, cpu_buffer, length); if (!event) goto out_unlock; body = rb_event_data(event); memcpy(body, data, length); rb_commit(cpu_buffer); rb_wakeups(buffer, cpu_buffer); ret = 0; out_unlock: trace_recursive_unlock(cpu_buffer); out: preempt_enable_notrace(); return ret; } EXPORT_SYMBOL_GPL(ring_buffer_write); static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = cpu_buffer->reader_page; struct buffer_page *head = rb_set_head_page(cpu_buffer); struct buffer_page *commit = cpu_buffer->commit_page; /* In case of error, head will be NULL */ if (unlikely(!head)) return true; /* Reader should exhaust content in reader page */ if (reader->read != rb_page_size(reader)) return false; /* * If writers are committing on the reader page, knowing all * committed content has been read, the ring buffer is empty. */ if (commit == reader) return true; /* * If writers are committing on a page other than reader page * and head page, there should always be content to read. */ if (commit != head) return false; /* * Writers are committing on the head page, we just need * to care about there're committed data, and the reader will * swap reader page with head page when it is to read data. */ return rb_page_commit(commit) == 0; } /** * ring_buffer_record_disable - stop all writes into the buffer * @buffer: The ring buffer to stop writes to. * * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable(struct trace_buffer *buffer) { atomic_inc(&buffer->record_disabled); } EXPORT_SYMBOL_GPL(ring_buffer_record_disable); /** * ring_buffer_record_enable - enable writes to the buffer * @buffer: The ring buffer to enable writes * * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ void ring_buffer_record_enable(struct trace_buffer *buffer) { atomic_dec(&buffer->record_disabled); } EXPORT_SYMBOL_GPL(ring_buffer_record_enable); /** * ring_buffer_record_off - stop all writes into the buffer * @buffer: The ring buffer to stop writes to. * * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * * This is different than ring_buffer_record_disable() as * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ void ring_buffer_record_off(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; rd = atomic_read(&buffer->record_disabled); do { new_rd = rd | RB_BUFFER_OFF; } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd)); } EXPORT_SYMBOL_GPL(ring_buffer_record_off); /** * ring_buffer_record_on - restart writes into the buffer * @buffer: The ring buffer to start writes to. * * This enables all writes to the buffer that was disabled by * ring_buffer_record_off(). * * This is different than ring_buffer_record_enable() as * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ void ring_buffer_record_on(struct trace_buffer *buffer) { unsigned int rd; unsigned int new_rd; rd = atomic_read(&buffer->record_disabled); do { new_rd = rd & ~RB_BUFFER_OFF; } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd)); } EXPORT_SYMBOL_GPL(ring_buffer_record_on); /** * ring_buffer_record_is_on - return true if the ring buffer can write * @buffer: The ring buffer to see if write is enabled * * Returns true if the ring buffer is in a state that it accepts writes. */ bool ring_buffer_record_is_on(struct trace_buffer *buffer) { return !atomic_read(&buffer->record_disabled); } /** * ring_buffer_record_is_set_on - return true if the ring buffer is set writable * @buffer: The ring buffer to see if write is set enabled * * Returns true if the ring buffer is set writable by ring_buffer_record_on(). * Note that this does NOT mean it is in a writable state. * * It may return true when the ring buffer has been disabled by * ring_buffer_record_disable(), as that is a temporary disabling of * the ring buffer. */ bool ring_buffer_record_is_set_on(struct trace_buffer *buffer) { return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); } /** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. * @cpu: The CPU buffer to stop * * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; cpu_buffer = buffer->buffers[cpu]; atomic_inc(&cpu_buffer->record_disabled); } EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); /** * ring_buffer_record_enable_cpu - enable writes to the buffer * @buffer: The ring buffer to enable writes * @cpu: The CPU to enable. * * Note, multiple disables will need the same number of enables * to truly enable the writing (much like preempt_disable). */ void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; cpu_buffer = buffer->buffers[cpu]; atomic_dec(&cpu_buffer->record_disabled); } EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); /* * The total entries in the ring buffer is the running counter * of entries entered into the ring buffer, minus the sum of * the entries read from the ring buffer and the number of * entries that were overwritten. */ static inline unsigned long rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) { return local_read(&cpu_buffer->entries) - (local_read(&cpu_buffer->overrun) + cpu_buffer->read); } /** * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; u64 ret = 0; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* * if the tail is on reader_page, oldest time stamp is on the reader * page */ if (cpu_buffer->tail_page == cpu_buffer->reader_page) bpage = cpu_buffer->reader_page; else bpage = rb_set_head_page(cpu_buffer); if (bpage) ret = bpage->page->time_stamp; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return ret; } EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes; return ret; } EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); /** * ring_buffer_entries_cpu - get the number of entries in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to get the entries from. */ unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; return rb_num_of_entries(cpu_buffer); } EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); /** * ring_buffer_overrun_cpu - get the number of overruns caused by the ring * buffer wrapping around (only if RB_FL_OVERWRITE is on). * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; ret = local_read(&cpu_buffer->overrun); return ret; } EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); /** * ring_buffer_commit_overrun_cpu - get the number of overruns caused by * commits failing due to the buffer wrapping around while there are uncommitted * events, such as during an interrupt storm. * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; ret = local_read(&cpu_buffer->commit_overrun); return ret; } EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); /** * ring_buffer_dropped_events_cpu - get the number of dropped events caused by * the ring buffer filling up (only if RB_FL_OVERWRITE is off). * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ unsigned long ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; ret = local_read(&cpu_buffer->dropped_events); return ret; } EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); /** * ring_buffer_read_events_cpu - get the number of events successfully read * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of events read */ unsigned long ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; return cpu_buffer->read; } EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); /** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer * * Returns the total number of entries in the ring buffer * (all CPU entries) */ unsigned long ring_buffer_entries(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long entries = 0; int cpu; /* if you care about this being correct, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; entries += rb_num_of_entries(cpu_buffer); } return entries; } EXPORT_SYMBOL_GPL(ring_buffer_entries); /** * ring_buffer_overruns - get the number of overruns in buffer * @buffer: The ring buffer * * Returns the total number of overruns in the ring buffer * (all CPU entries) */ unsigned long ring_buffer_overruns(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long overruns = 0; int cpu; /* if you care about this being correct, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; overruns += local_read(&cpu_buffer->overrun); } return overruns; } EXPORT_SYMBOL_GPL(ring_buffer_overruns); static void rb_iter_reset(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; /* Iterator usage is expected to have record disabled */ iter->head_page = cpu_buffer->reader_page; iter->head = cpu_buffer->reader_page->read; iter->next_event = iter->head; iter->cache_reader_page = iter->head_page; iter->cache_read = cpu_buffer->read; iter->cache_pages_removed = cpu_buffer->pages_removed; if (iter->head) { iter->read_stamp = cpu_buffer->read_stamp; iter->page_stamp = cpu_buffer->reader_page->page->time_stamp; } else { iter->read_stamp = iter->head_page->page->time_stamp; iter->page_stamp = iter->read_stamp; } } /** * ring_buffer_iter_reset - reset an iterator * @iter: The iterator to reset * * Resets the iterator, so that it will start from the beginning * again. */ void ring_buffer_iter_reset(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; if (!iter) return; cpu_buffer = iter->cpu_buffer; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_iter_reset(iter); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); /** * ring_buffer_iter_empty - check if an iterator has no more to read * @iter: The iterator to check */ int ring_buffer_iter_empty(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *reader; struct buffer_page *head_page; struct buffer_page *commit_page; struct buffer_page *curr_commit_page; unsigned commit; u64 curr_commit_ts; u64 commit_ts; cpu_buffer = iter->cpu_buffer; reader = cpu_buffer->reader_page; head_page = cpu_buffer->head_page; commit_page = READ_ONCE(cpu_buffer->commit_page); commit_ts = commit_page->page->time_stamp; /* * When the writer goes across pages, it issues a cmpxchg which * is a mb(), which will synchronize with the rmb here. * (see rb_tail_page_update()) */ smp_rmb(); commit = rb_page_commit(commit_page); /* We want to make sure that the commit page doesn't change */ smp_rmb(); /* Make sure commit page didn't change */ curr_commit_page = READ_ONCE(cpu_buffer->commit_page); curr_commit_ts = READ_ONCE(curr_commit_page->page->time_stamp); /* If the commit page changed, then there's more data */ if (curr_commit_page != commit_page || curr_commit_ts != commit_ts) return 0; /* Still racy, as it may return a false positive, but that's OK */ return ((iter->head_page == commit_page && iter->head >= commit) || (iter->head_page == reader && commit_page == head_page && head_page->read == commit && iter->head == rb_page_size(cpu_buffer->reader_page))); } EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); static void rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { u64 delta; switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; case RINGBUF_TYPE_TIME_EXTEND: delta = rb_event_time_stamp(event); cpu_buffer->read_stamp += delta; return; case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); delta = rb_fix_abs_ts(delta, cpu_buffer->read_stamp); cpu_buffer->read_stamp = delta; return; case RINGBUF_TYPE_DATA: cpu_buffer->read_stamp += event->time_delta; return; default: RB_WARN_ON(cpu_buffer, 1); } } static void rb_update_iter_read_stamp(struct ring_buffer_iter *iter, struct ring_buffer_event *event) { u64 delta; switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; case RINGBUF_TYPE_TIME_EXTEND: delta = rb_event_time_stamp(event); iter->read_stamp += delta; return; case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); delta = rb_fix_abs_ts(delta, iter->read_stamp); iter->read_stamp = delta; return; case RINGBUF_TYPE_DATA: iter->read_stamp += event->time_delta; return; default: RB_WARN_ON(iter->cpu_buffer, 1); } } static struct buffer_page * rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = NULL; unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size); unsigned long overwrite; unsigned long flags; int nr_loops = 0; bool ret; local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); again: /* * This should normally only loop twice. But because the * start of the reader inserts an empty page, it causes * a case where we will loop three times. There should be no * reason to loop four times (that I know of). */ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) { reader = NULL; goto out; } reader = cpu_buffer->reader_page; /* If there's more to read, return this page */ if (cpu_buffer->reader_page->read < rb_page_size(reader)) goto out; /* Never should we have an index greater than the size */ if (RB_WARN_ON(cpu_buffer, cpu_buffer->reader_page->read > rb_page_size(reader))) goto out; /* check if we caught up to the tail */ reader = NULL; if (cpu_buffer->commit_page == cpu_buffer->reader_page) goto out; /* Don't bother swapping if the ring buffer is empty */ if (rb_num_of_entries(cpu_buffer) == 0) goto out; /* * Reset the reader page to size zero. */ local_set(&cpu_buffer->reader_page->write, 0); local_set(&cpu_buffer->reader_page->entries, 0); local_set(&cpu_buffer->reader_page->page->commit, 0); cpu_buffer->reader_page->real_end = 0; spin: /* * Splice the empty reader page into the list around the head. */ reader = rb_set_head_page(cpu_buffer); if (!reader) goto out; cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); cpu_buffer->reader_page->list.prev = reader->list.prev; /* * cpu_buffer->pages just needs to point to the buffer, it * has no specific buffer page to point to. Lets move it out * of our way so we don't accidentally swap it. */ cpu_buffer->pages = reader->list.prev; /* The reader page will be pointing to the new head */ rb_set_list_to_head(&cpu_buffer->reader_page->list); /* * We want to make sure we read the overruns after we set up our * pointers to the next object. The writer side does a * cmpxchg to cross pages which acts as the mb on the writer * side. Note, the reader will constantly fail the swap * while the writer is updating the pointers, so this * guarantees that the overwrite recorded here is the one we * want to compare with the last_overrun. */ smp_mb(); overwrite = local_read(&(cpu_buffer->overrun)); /* * Here's the tricky part. * * We need to move the pointer past the header page. * But we can only do that if a writer is not currently * moving it. The page before the header page has the * flag bit '1' set if it is pointing to the page we want. * but if the writer is in the process of moving it * than it will be '2' or already moved '0'. */ ret = rb_head_page_replace(reader, cpu_buffer->reader_page); /* * If we did not convert it, then we must try again. */ if (!ret) goto spin; if (cpu_buffer->ring_meta) rb_update_meta_reader(cpu_buffer, reader); /* * Yay! We succeeded in replacing the page. * * Now make the new head point back to the reader page. */ rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; rb_inc_page(&cpu_buffer->head_page); local_inc(&cpu_buffer->pages_read); /* Finally update the reader page to the new head */ cpu_buffer->reader_page = reader; cpu_buffer->reader_page->read = 0; if (overwrite != cpu_buffer->last_overrun) { cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; cpu_buffer->last_overrun = overwrite; } goto again; out: /* Update the read_stamp on the first event */ if (reader && reader->read == 0) cpu_buffer->read_stamp = reader->page->time_stamp; arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); /* * The writer has preempt disable, wait for it. But not forever * Although, 1 second is pretty much "forever" */ #define USECS_WAIT 1000000 for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) { /* If the write is past the end of page, a writer is still updating it */ if (likely(!reader || rb_page_write(reader) <= bsize)) break; udelay(1); /* Get the latest version of the reader write value */ smp_rmb(); } /* The writer is not moving forward? Something is wrong */ if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT)) reader = NULL; /* * Make sure we see any padding after the write update * (see rb_reset_tail()). * * In addition, a writer may be writing on the reader page * if the page has not been fully filled, so the read barrier * is also needed to make sure we see the content of what is * committed by the writer (see rb_set_commit_to_write()). */ smp_rmb(); return reader; } static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) { struct ring_buffer_event *event; struct buffer_page *reader; unsigned length; reader = rb_get_reader_page(cpu_buffer); /* This function should not be called when buffer is empty */ if (RB_WARN_ON(cpu_buffer, !reader)) return; event = rb_reader_event(cpu_buffer); if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX) cpu_buffer->read++; rb_update_read_stamp(cpu_buffer, event); length = rb_event_length(event); cpu_buffer->reader_page->read += length; cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer; cpu_buffer = iter->cpu_buffer; /* If head == next_event then we need to jump to the next event */ if (iter->head == iter->next_event) { /* If the event gets overwritten again, there's nothing to do */ if (rb_iter_head_event(iter) == NULL) return; } iter->head = iter->next_event; /* * Check if we are at the end of the buffer. */ if (iter->next_event >= rb_page_size(iter->head_page)) { /* discarded commits can make the page empty */ if (iter->head_page == cpu_buffer->commit_page) return; rb_inc_iter(iter); return; } rb_update_iter_read_stamp(iter, iter->event); } static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) { return cpu_buffer->lost_events; } static struct ring_buffer_event * rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, unsigned long *lost_events) { struct ring_buffer_event *event; struct buffer_page *reader; int nr_loops = 0; if (ts) *ts = 0; again: /* * We repeat when a time extend is encountered. * Since the time extend is always attached to a data event, * we should never loop more than once. * (We never hit the following condition more than twice). */ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) return NULL; reader = rb_get_reader_page(cpu_buffer); if (!reader) return NULL; event = rb_reader_event(cpu_buffer); switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) RB_WARN_ON(cpu_buffer, 1); /* * Because the writer could be discarding every * event it creates (which would probably be bad) * if we were to go back to "again" then we may never * catch up, and will trigger the warn on, or lock * the box. Return the padding, and we will release * the current locks, and try again. */ return event; case RINGBUF_TYPE_TIME_EXTEND: /* Internal data, OK to advance */ rb_advance_reader(cpu_buffer); goto again; case RINGBUF_TYPE_TIME_STAMP: if (ts) { *ts = rb_event_time_stamp(event); *ts = rb_fix_abs_ts(*ts, reader->page->time_stamp); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } /* Internal data, OK to advance */ rb_advance_reader(cpu_buffer); goto again; case RINGBUF_TYPE_DATA: if (ts && !(*ts)) { *ts = cpu_buffer->read_stamp + event->time_delta; ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } if (lost_events) *lost_events = rb_lost_events(cpu_buffer); return event; default: RB_WARN_ON(cpu_buffer, 1); } return NULL; } EXPORT_SYMBOL_GPL(ring_buffer_peek); static struct ring_buffer_event * rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { struct trace_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; int nr_loops = 0; if (ts) *ts = 0; cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; /* * Check if someone performed a consuming read to the buffer * or removed some pages from the buffer. In these cases, * iterator was invalidated and we need to reset it. */ if (unlikely(iter->cache_read != cpu_buffer->read || iter->cache_reader_page != cpu_buffer->reader_page || iter->cache_pages_removed != cpu_buffer->pages_removed)) rb_iter_reset(iter); again: if (ring_buffer_iter_empty(iter)) return NULL; /* * As the writer can mess with what the iterator is trying * to read, just give up if we fail to get an event after * three tries. The iterator is not as reliable when reading * the ring buffer with an active write as the consumer is. * Do not warn if the three failures is reached. */ if (++nr_loops > 3) return NULL; if (rb_per_cpu_empty(cpu_buffer)) return NULL; if (iter->head >= rb_page_size(iter->head_page)) { rb_inc_iter(iter); goto again; } event = rb_iter_head_event(iter); if (!event) goto again; switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) { rb_inc_iter(iter); goto again; } rb_advance_iter(iter); return event; case RINGBUF_TYPE_TIME_EXTEND: /* Internal data, OK to advance */ rb_advance_iter(iter); goto again; case RINGBUF_TYPE_TIME_STAMP: if (ts) { *ts = rb_event_time_stamp(event); *ts = rb_fix_abs_ts(*ts, iter->head_page->page->time_stamp); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } /* Internal data, OK to advance */ rb_advance_iter(iter); goto again; case RINGBUF_TYPE_DATA: if (ts && !(*ts)) { *ts = iter->read_stamp + event->time_delta; ring_buffer_normalize_time_stamp(buffer, cpu_buffer->cpu, ts); } return event; default: RB_WARN_ON(cpu_buffer, 1); } return NULL; } EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer) { if (likely(!in_nmi())) { raw_spin_lock(&cpu_buffer->reader_lock); return true; } /* * If an NMI die dumps out the content of the ring buffer * trylock must be used to prevent a deadlock if the NMI * preempted a task that holds the ring buffer locks. If * we get the lock then all is fine, if not, then continue * to do the read, but this can corrupt the ring buffer, * so it must be permanently disabled from future writes. * Reading from NMI is a oneshot deal. */ if (raw_spin_trylock(&cpu_buffer->reader_lock)) return true; /* Continue without locking, but disable the ring buffer */ atomic_inc(&cpu_buffer->record_disabled); return false; } static inline void rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) { if (likely(locked)) raw_spin_unlock(&cpu_buffer->reader_lock); } /** * ring_buffer_peek - peek at the next event to be read * @buffer: The ring buffer to read * @cpu: The cpu to peak at * @ts: The timestamp counter of this event. * @lost_events: a variable to store if events were lost (may be NULL) * * This will return the event that will be read next, but does * not consume the data. */ struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; unsigned long flags; bool dolock; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; again: local_irq_save(flags); dolock = rb_reader_lock(cpu_buffer); event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) goto again; return event; } /** ring_buffer_iter_dropped - report if there are dropped events * @iter: The ring buffer iterator * * Returns true if there was dropped events since the last peek. */ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter) { bool ret = iter->missed_events != 0; iter->missed_events = 0; return ret; } EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped); /** * ring_buffer_iter_peek - peek at the next event to be read * @iter: The ring buffer iterator * @ts: The timestamp counter of this event. * * This will return the event that will be read next, but does * not increment the iterator. */ struct ring_buffer_event * ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; struct ring_buffer_event *event; unsigned long flags; again: raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); event = rb_iter_peek(iter, ts); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) goto again; return event; } /** * ring_buffer_consume - return an event and consume it * @buffer: The ring buffer to get the next event from * @cpu: the cpu to read the buffer from * @ts: a variable to store the timestamp (may be NULL) * @lost_events: a variable to store if events were lost (may be NULL) * * Returns the next event in the ring buffer, and that event is consumed. * Meaning, that sequential reads will keep returning a different event, * and eventually empty the ring buffer if the producer is slower. */ struct ring_buffer_event * ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event = NULL; unsigned long flags; bool dolock; again: /* might be called in atomic */ preempt_disable(); if (!cpumask_test_cpu(cpu, buffer->cpumask)) goto out; cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); dolock = rb_reader_lock(cpu_buffer); event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event) { cpu_buffer->lost_events = 0; rb_advance_reader(cpu_buffer); } rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); out: preempt_enable(); if (event && event->type_len == RINGBUF_TYPE_PADDING) goto again; return event; } EXPORT_SYMBOL_GPL(ring_buffer_consume); /** * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer * @buffer: The ring buffer to read from * @cpu: The cpu buffer to iterate over * @flags: gfp flags to use for memory allocation * * This performs the initial preparations necessary to iterate * through the buffer. Memory is allocated, buffer resizing * is disabled, and the iterator pointer is returned to the caller. * * After a sequence of ring_buffer_read_prepare calls, the user is * expected to make at least one call to ring_buffer_read_prepare_sync. * Afterwards, ring_buffer_read_start is invoked to get things going * for real. * * This overall must be paired with ring_buffer_read_finish. */ struct ring_buffer_iter * ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; iter = kzalloc(sizeof(*iter), flags); if (!iter) return NULL; /* Holds the entire event: data and meta data */ iter->event_size = buffer->subbuf_size; iter->event = kmalloc(iter->event_size, flags); if (!iter->event) { kfree(iter); return NULL; } cpu_buffer = buffer->buffers[cpu]; iter->cpu_buffer = cpu_buffer; atomic_inc(&cpu_buffer->resize_disabled); return iter; } EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); /** * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls * * All previously invoked ring_buffer_read_prepare calls to prepare * iterators will be synchronized. Afterwards, read_buffer_read_start * calls on those iterators are allowed. */ void ring_buffer_read_prepare_sync(void) { synchronize_rcu(); } EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); /** * ring_buffer_read_start - start a non consuming read of the buffer * @iter: The iterator returned by ring_buffer_read_prepare * * This finalizes the startup of an iteration through the buffer. * The iterator comes from a call to ring_buffer_read_prepare and * an intervening ring_buffer_read_prepare_sync must have been * performed. * * Must be paired with ring_buffer_read_finish. */ void ring_buffer_read_start(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; if (!iter) return; cpu_buffer = iter->cpu_buffer; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); arch_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); arch_spin_unlock(&cpu_buffer->lock); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } EXPORT_SYMBOL_GPL(ring_buffer_read_start); /** * ring_buffer_read_finish - finish reading the iterator of the buffer * @iter: The iterator retrieved by ring_buffer_start * * This re-enables resizing of the buffer, and frees the iterator. */ void ring_buffer_read_finish(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; unsigned long flags; /* Use this opportunity to check the integrity of the ring buffer. */ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_check_pages(cpu_buffer); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); atomic_dec(&cpu_buffer->resize_disabled); kfree(iter->event); kfree(iter); } EXPORT_SYMBOL_GPL(ring_buffer_read_finish); /** * ring_buffer_iter_advance - advance the iterator to the next location * @iter: The ring buffer iterator * * Move the location of the iterator such that the next read will * be the next location of the iterator. */ void ring_buffer_iter_advance(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; unsigned long flags; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_advance_iter(iter); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } EXPORT_SYMBOL_GPL(ring_buffer_iter_advance); /** * ring_buffer_size - return the size of the ring buffer (in bytes) * @buffer: The ring buffer. * @cpu: The CPU to get ring buffer size from. */ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; return buffer->subbuf_size * buffer->buffers[cpu]->nr_pages; } EXPORT_SYMBOL_GPL(ring_buffer_size); /** * ring_buffer_max_event_size - return the max data size of an event * @buffer: The ring buffer. * * Returns the maximum size an event can be. */ unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer) { /* If abs timestamp is requested, events have a timestamp too */ if (ring_buffer_time_stamp_abs(buffer)) return buffer->max_data_size - RB_LEN_TIME_EXTEND; return buffer->max_data_size; } EXPORT_SYMBOL_GPL(ring_buffer_max_event_size); static void rb_clear_buffer_page(struct buffer_page *page) { local_set(&page->write, 0); local_set(&page->entries, 0); rb_init_page(page->page); page->read = 0; } static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer) { struct trace_buffer_meta *meta = cpu_buffer->meta_page; if (!meta) return; meta->reader.read = cpu_buffer->reader_page->read; meta->reader.id = cpu_buffer->reader_page->id; meta->reader.lost_events = cpu_buffer->lost_events; meta->entries = local_read(&cpu_buffer->entries); meta->overrun = local_read(&cpu_buffer->overrun); meta->read = cpu_buffer->read; /* Some archs do not have data cache coherency between kernel and user-space */ flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page)); } static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *page; rb_head_page_deactivate(cpu_buffer); cpu_buffer->head_page = list_entry(cpu_buffer->pages, struct buffer_page, list); rb_clear_buffer_page(cpu_buffer->head_page); list_for_each_entry(page, cpu_buffer->pages, list) { rb_clear_buffer_page(page); } cpu_buffer->tail_page = cpu_buffer->head_page; cpu_buffer->commit_page = cpu_buffer->head_page; INIT_LIST_HEAD(&cpu_buffer->reader_page->list); INIT_LIST_HEAD(&cpu_buffer->new_pages); rb_clear_buffer_page(cpu_buffer->reader_page); local_set(&cpu_buffer->entries_bytes, 0); local_set(&cpu_buffer->overrun, 0); local_set(&cpu_buffer->commit_overrun, 0); local_set(&cpu_buffer->dropped_events, 0); local_set(&cpu_buffer->entries, 0); local_set(&cpu_buffer->committing, 0); local_set(&cpu_buffer->commits, 0); local_set(&cpu_buffer->pages_touched, 0); local_set(&cpu_buffer->pages_lost, 0); local_set(&cpu_buffer->pages_read, 0); cpu_buffer->last_pages_touch = 0; cpu_buffer->shortest_full = 0; cpu_buffer->read = 0; cpu_buffer->read_bytes = 0; rb_time_set(&cpu_buffer->write_stamp, 0); rb_time_set(&cpu_buffer->before_stamp, 0); memset(cpu_buffer->event_stamp, 0, sizeof(cpu_buffer->event_stamp)); cpu_buffer->lost_events = 0; cpu_buffer->last_overrun = 0; rb_head_page_activate(cpu_buffer); cpu_buffer->pages_removed = 0; if (cpu_buffer->mapped) { rb_update_meta_page(cpu_buffer); if (cpu_buffer->ring_meta) { struct ring_buffer_meta *meta = cpu_buffer->ring_meta; meta->commit_buffer = meta->head_buffer; } } } /* Must have disabled the cpu buffer then done a synchronize_rcu */ static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long flags; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) goto out; arch_spin_lock(&cpu_buffer->lock); rb_reset_cpu(cpu_buffer); arch_spin_unlock(&cpu_buffer->lock); out: raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } /** * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer * @buffer: The ring buffer to reset a per cpu buffer of * @cpu: The CPU buffer to be reset */ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_meta *meta; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); atomic_inc(&cpu_buffer->resize_disabled); atomic_inc(&cpu_buffer->record_disabled); /* Make sure all commits have finished */ synchronize_rcu(); reset_disabled_cpu_buffer(cpu_buffer); atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); /* Make sure persistent meta now uses this buffer's addresses */ meta = rb_range_meta(buffer, 0, cpu_buffer->cpu); if (meta) rb_meta_init_text_addr(meta); mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); /* Flag to ensure proper resetting of atomic variables */ #define RESET_BIT (1 << 30) /** * ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer * @buffer: The ring buffer to reset a per cpu buffer of */ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_meta *meta; int cpu; /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); for_each_online_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; atomic_add(RESET_BIT, &cpu_buffer->resize_disabled); atomic_inc(&cpu_buffer->record_disabled); } /* Make sure all commits have finished */ synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; /* * If a CPU came online during the synchronize_rcu(), then * ignore it. */ if (!(atomic_read(&cpu_buffer->resize_disabled) & RESET_BIT)) continue; reset_disabled_cpu_buffer(cpu_buffer); /* Make sure persistent meta now uses this buffer's addresses */ meta = rb_range_meta(buffer, 0, cpu_buffer->cpu); if (meta) rb_meta_init_text_addr(meta); atomic_dec(&cpu_buffer->record_disabled); atomic_sub(RESET_BIT, &cpu_buffer->resize_disabled); } mutex_unlock(&buffer->mutex); } /** * ring_buffer_reset - reset a ring buffer * @buffer: The ring buffer to reset all cpu buffers */ void ring_buffer_reset(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu; /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; atomic_inc(&cpu_buffer->resize_disabled); atomic_inc(&cpu_buffer->record_disabled); } /* Make sure all commits have finished */ synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; reset_disabled_cpu_buffer(cpu_buffer); atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); } mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_reset); /** * ring_buffer_empty - is the ring buffer empty? * @buffer: The ring buffer to test */ bool ring_buffer_empty(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; bool dolock; bool ret; int cpu; /* yes this is racy, but if you don't like the race, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); dolock = rb_reader_lock(cpu_buffer); ret = rb_per_cpu_empty(cpu_buffer); rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); if (!ret) return false; } return true; } EXPORT_SYMBOL_GPL(ring_buffer_empty); /** * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? * @buffer: The ring buffer * @cpu: The CPU buffer to test */ bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; bool dolock; bool ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return true; cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); dolock = rb_reader_lock(cpu_buffer); ret = rb_per_cpu_empty(cpu_buffer); rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); return ret; } EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP /** * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers * @buffer_a: One buffer to swap with * @buffer_b: The other buffer to swap with * @cpu: the CPU of the buffers to swap * * This function is useful for tracers that want to take a "snapshot" * of a CPU buffer and has another back up buffer lying around. * it is expected that the tracer handles the cpu buffer not being * used at the moment. */ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a, struct trace_buffer *buffer_b, int cpu) { struct ring_buffer_per_cpu *cpu_buffer_a; struct ring_buffer_per_cpu *cpu_buffer_b; int ret = -EINVAL; if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || !cpumask_test_cpu(cpu, buffer_b->cpumask)) goto out; cpu_buffer_a = buffer_a->buffers[cpu]; cpu_buffer_b = buffer_b->buffers[cpu]; /* It's up to the callers to not try to swap mapped buffers */ if (WARN_ON_ONCE(cpu_buffer_a->mapped || cpu_buffer_b->mapped)) { ret = -EBUSY; goto out; } /* At least make sure the two buffers are somewhat the same */ if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) goto out; if (buffer_a->subbuf_order != buffer_b->subbuf_order) goto out; ret = -EAGAIN; if (atomic_read(&buffer_a->record_disabled)) goto out; if (atomic_read(&buffer_b->record_disabled)) goto out; if (atomic_read(&cpu_buffer_a->record_disabled)) goto out; if (atomic_read(&cpu_buffer_b->record_disabled)) goto out; /* * We can't do a synchronize_rcu here because this * function can be called in atomic context. * Normally this will be called from the same CPU as cpu. * If not it's up to the caller to protect this. */ atomic_inc(&cpu_buffer_a->record_disabled); atomic_inc(&cpu_buffer_b->record_disabled); ret = -EBUSY; if (local_read(&cpu_buffer_a->committing)) goto out_dec; if (local_read(&cpu_buffer_b->committing)) goto out_dec; /* * When resize is in progress, we cannot swap it because * it will mess the state of the cpu buffer. */ if (atomic_read(&buffer_a->resizing)) goto out_dec; if (atomic_read(&buffer_b->resizing)) goto out_dec; buffer_a->buffers[cpu] = cpu_buffer_b; buffer_b->buffers[cpu] = cpu_buffer_a; cpu_buffer_b->buffer = buffer_a; cpu_buffer_a->buffer = buffer_b; ret = 0; out_dec: atomic_dec(&cpu_buffer_a->record_disabled); atomic_dec(&cpu_buffer_b->record_disabled); out: return ret; } EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */ /** * ring_buffer_alloc_read_page - allocate a page to read from buffer * @buffer: the buffer to allocate for. * @cpu: the cpu buffer to allocate. * * This function is used in conjunction with ring_buffer_read_page. * When reading a full page from the ring buffer, these functions * can be used to speed up the process. The calling function should * allocate a few pages first with this function. Then when it * needs to get pages from the ring buffer, it passes the result * of this function into ring_buffer_read_page, which will swap * the page that was allocated, with the read page of the buffer. * * Returns: * The page allocated, or ERR_PTR */ struct buffer_data_read_page * ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_data_read_page *bpage = NULL; unsigned long flags; struct page *page; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return ERR_PTR(-ENODEV); bpage = kzalloc(sizeof(*bpage), GFP_KERNEL); if (!bpage) return ERR_PTR(-ENOMEM); bpage->order = buffer->subbuf_order; cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); if (cpu_buffer->free_page) { bpage->data = cpu_buffer->free_page; cpu_buffer->free_page = NULL; } arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); if (bpage->data) goto out; page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY | __GFP_COMP | __GFP_ZERO, cpu_buffer->buffer->subbuf_order); if (!page) { kfree(bpage); return ERR_PTR(-ENOMEM); } bpage->data = page_address(page); out: rb_init_page(bpage->data); return bpage; } EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); /** * ring_buffer_free_read_page - free an allocated read page * @buffer: the buffer the page was allocate for * @cpu: the cpu buffer the page came from * @data_page: the page to free * * Free a page allocated from ring_buffer_alloc_read_page. */ void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, struct buffer_data_read_page *data_page) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_data_page *bpage = data_page->data; struct page *page = virt_to_page(bpage); unsigned long flags; if (!buffer || !buffer->buffers || !buffer->buffers[cpu]) return; cpu_buffer = buffer->buffers[cpu]; /* * If the page is still in use someplace else, or order of the page * is different from the subbuffer order of the buffer - * we can't reuse it */ if (page_ref_count(page) > 1 || data_page->order != buffer->subbuf_order) goto out; local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); if (!cpu_buffer->free_page) { cpu_buffer->free_page = bpage; bpage = NULL; } arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); out: free_pages((unsigned long)bpage, data_page->order); kfree(data_page); } EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); /** * ring_buffer_read_page - extract a page from the ring buffer * @buffer: buffer to extract from * @data_page: the page to use allocated from ring_buffer_alloc_read_page * @len: amount to extract * @cpu: the cpu of the buffer to extract * @full: should the extraction only happen when the page is full. * * This function will pull out a page from the ring buffer and consume it. * @data_page must be the address of the variable that was returned * from ring_buffer_alloc_read_page. This is because the page might be used * to swap with a page in the ring buffer. * * for example: * rpage = ring_buffer_alloc_read_page(buffer, cpu); * if (IS_ERR(rpage)) * return PTR_ERR(rpage); * ret = ring_buffer_read_page(buffer, rpage, len, cpu, 0); * if (ret >= 0) * process_page(ring_buffer_read_page_data(rpage), ret); * ring_buffer_free_read_page(buffer, cpu, rpage); * * When @full is set, the function will not return true unless * the writer is off the reader page. * * Note: it is up to the calling functions to handle sleeps and wakeups. * The ring buffer can be used anywhere in the kernel and can not * blindly call wake_up. The layer that uses the ring buffer must be * responsible for that. * * Returns: * >=0 if data has been transferred, returns the offset of consumed data. * <0 if no data has been transferred. */ int ring_buffer_read_page(struct trace_buffer *buffer, struct buffer_data_read_page *data_page, size_t len, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; struct buffer_data_page *bpage; struct buffer_page *reader; unsigned long missed_events; unsigned long flags; unsigned int commit; unsigned int read; u64 save_timestamp; int ret = -1; if (!cpumask_test_cpu(cpu, buffer->cpumask)) goto out; /* * If len is not big enough to hold the page header, then * we can not copy anything. */ if (len <= BUF_PAGE_HDR_SIZE) goto out; len -= BUF_PAGE_HDR_SIZE; if (!data_page || !data_page->data) goto out; if (data_page->order != buffer->subbuf_order) goto out; bpage = data_page->data; if (!bpage) goto out; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); reader = rb_get_reader_page(cpu_buffer); if (!reader) goto out_unlock; event = rb_reader_event(cpu_buffer); read = reader->read; commit = rb_page_size(reader); /* Check if any events were dropped */ missed_events = cpu_buffer->lost_events; /* * If this page has been partially read or * if len is not big enough to read the rest of the page or * a writer is still on the page, then * we must copy the data from the page to the buffer. * Otherwise, we can simply swap the page with the one passed in. */ if (read || (len < (commit - read)) || cpu_buffer->reader_page == cpu_buffer->commit_page || cpu_buffer->mapped) { struct buffer_data_page *rpage = cpu_buffer->reader_page->page; unsigned int rpos = read; unsigned int pos = 0; unsigned int size; /* * If a full page is expected, this can still be returned * if there's been a previous partial read and the * rest of the page can be read and the commit page is off * the reader page. */ if (full && (!read || (len < (commit - read)) || cpu_buffer->reader_page == cpu_buffer->commit_page)) goto out_unlock; if (len > (commit - read)) len = (commit - read); /* Always keep the time extend and data together */ size = rb_event_ts_length(event); if (len < size) goto out_unlock; /* save the current timestamp, since the user will need it */ save_timestamp = cpu_buffer->read_stamp; /* Need to copy one event at a time */ do { /* We need the size of one event, because * rb_advance_reader only advances by one event, * whereas rb_event_ts_length may include the size of * one or two events. * We have already ensured there's enough space if this * is a time extend. */ size = rb_event_length(event); memcpy(bpage->data + pos, rpage->data + rpos, size); len -= size; rb_advance_reader(cpu_buffer); rpos = reader->read; pos += size; if (rpos >= commit) break; event = rb_reader_event(cpu_buffer); /* Always keep the time extend and data together */ size = rb_event_ts_length(event); } while (len >= size); /* update bpage */ local_set(&bpage->commit, pos); bpage->time_stamp = save_timestamp; /* we copied everything to the beginning */ read = 0; } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); cpu_buffer->read_bytes += rb_page_size(reader); /* swap the pages */ rb_init_page(bpage); bpage = reader->page; reader->page = data_page->data; local_set(&reader->write, 0); local_set(&reader->entries, 0); reader->read = 0; data_page->data = bpage; /* * Use the real_end for the data size, * This gives us a chance to store the lost events * on the page. */ if (reader->real_end) local_set(&bpage->commit, reader->real_end); } ret = read; cpu_buffer->lost_events = 0; commit = local_read(&bpage->commit); /* * Set a flag in the commit field if we lost events */ if (missed_events) { /* If there is room at the end of the page to save the * missed events, then record it there. */ if (buffer->subbuf_size - commit >= sizeof(missed_events)) { memcpy(&bpage->data[commit], &missed_events, sizeof(missed_events)); local_add(RB_MISSED_STORED, &bpage->commit); commit += sizeof(missed_events); } local_add(RB_MISSED_EVENTS, &bpage->commit); } /* * This page may be off to user land. Zero it out here. */ if (commit < buffer->subbuf_size) memset(&bpage->data[commit], 0, buffer->subbuf_size - commit); out_unlock: raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); out: return ret; } EXPORT_SYMBOL_GPL(ring_buffer_read_page); /** * ring_buffer_read_page_data - get pointer to the data in the page. * @page: the page to get the data from * * Returns pointer to the actual data in this page. */ void *ring_buffer_read_page_data(struct buffer_data_read_page *page) { return page->data; } EXPORT_SYMBOL_GPL(ring_buffer_read_page_data); /** * ring_buffer_subbuf_size_get - get size of the sub buffer. * @buffer: the buffer to get the sub buffer size from * * Returns size of the sub buffer, in bytes. */ int ring_buffer_subbuf_size_get(struct trace_buffer *buffer) { return buffer->subbuf_size + BUF_PAGE_HDR_SIZE; } EXPORT_SYMBOL_GPL(ring_buffer_subbuf_size_get); /** * ring_buffer_subbuf_order_get - get order of system sub pages in one buffer page. * @buffer: The ring_buffer to get the system sub page order from * * By default, one ring buffer sub page equals to one system page. This parameter * is configurable, per ring buffer. The size of the ring buffer sub page can be * extended, but must be an order of system page size. * * Returns the order of buffer sub page size, in system pages: * 0 means the sub buffer size is 1 system page and so forth. * In case of an error < 0 is returned. */ int ring_buffer_subbuf_order_get(struct trace_buffer *buffer) { if (!buffer) return -EINVAL; return buffer->subbuf_order; } EXPORT_SYMBOL_GPL(ring_buffer_subbuf_order_get); /** * ring_buffer_subbuf_order_set - set the size of ring buffer sub page. * @buffer: The ring_buffer to set the new page size. * @order: Order of the system pages in one sub buffer page * * By default, one ring buffer pages equals to one system page. This API can be * used to set new size of the ring buffer page. The size must be order of * system page size, that's why the input parameter @order is the order of * system pages that are allocated for one ring buffer page: * 0 - 1 system page * 1 - 2 system pages * 3 - 4 system pages * ... * * Returns 0 on success or < 0 in case of an error. */ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage, *tmp; int old_order, old_size; int nr_pages; int psize; int err; int cpu; if (!buffer || order < 0) return -EINVAL; if (buffer->subbuf_order == order) return 0; psize = (1 << order) * PAGE_SIZE; if (psize <= BUF_PAGE_HDR_SIZE) return -EINVAL; /* Size of a subbuf cannot be greater than the write counter */ if (psize > RB_WRITE_MASK + 1) return -EINVAL; old_order = buffer->subbuf_order; old_size = buffer->subbuf_size; /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); atomic_inc(&buffer->record_disabled); /* Make sure all commits have finished */ synchronize_rcu(); buffer->subbuf_order = order; buffer->subbuf_size = psize - BUF_PAGE_HDR_SIZE; /* Make sure all new buffers are allocated, before deleting the old ones */ for_each_buffer_cpu(buffer, cpu) { if (!cpumask_test_cpu(cpu, buffer->cpumask)) continue; cpu_buffer = buffer->buffers[cpu]; if (cpu_buffer->mapped) { err = -EBUSY; goto error; } /* Update the number of pages to match the new size */ nr_pages = old_size * buffer->buffers[cpu]->nr_pages; nr_pages = DIV_ROUND_UP(nr_pages, buffer->subbuf_size); /* we need a minimum of two pages */ if (nr_pages < 2) nr_pages = 2; cpu_buffer->nr_pages_to_update = nr_pages; /* Include the reader page */ nr_pages++; /* Allocate the new size buffer */ INIT_LIST_HEAD(&cpu_buffer->new_pages); if (__rb_allocate_pages(cpu_buffer, nr_pages, &cpu_buffer->new_pages)) { /* not enough memory for new pages */ err = -ENOMEM; goto error; } } for_each_buffer_cpu(buffer, cpu) { struct buffer_data_page *old_free_data_page; struct list_head old_pages; unsigned long flags; if (!cpumask_test_cpu(cpu, buffer->cpumask)) continue; cpu_buffer = buffer->buffers[cpu]; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* Clear the head bit to make the link list normal to read */ rb_head_page_deactivate(cpu_buffer); /* * Collect buffers from the cpu_buffer pages list and the * reader_page on old_pages, so they can be freed later when not * under a spinlock. The pages list is a linked list with no * head, adding old_pages turns it into a regular list with * old_pages being the head. */ list_add(&old_pages, cpu_buffer->pages); list_add(&cpu_buffer->reader_page->list, &old_pages); /* One page was allocated for the reader page */ cpu_buffer->reader_page = list_entry(cpu_buffer->new_pages.next, struct buffer_page, list); list_del_init(&cpu_buffer->reader_page->list); /* Install the new pages, remove the head from the list */ cpu_buffer->pages = cpu_buffer->new_pages.next; list_del_init(&cpu_buffer->new_pages); cpu_buffer->head_page = list_entry(cpu_buffer->pages, struct buffer_page, list); cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; cpu_buffer->nr_pages = cpu_buffer->nr_pages_to_update; cpu_buffer->nr_pages_to_update = 0; old_free_data_page = cpu_buffer->free_page; cpu_buffer->free_page = NULL; rb_head_page_activate(cpu_buffer); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); /* Free old sub buffers */ list_for_each_entry_safe(bpage, tmp, &old_pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } free_pages((unsigned long)old_free_data_page, old_order); rb_check_pages(cpu_buffer); } atomic_dec(&buffer->record_disabled); mutex_unlock(&buffer->mutex); return 0; error: buffer->subbuf_order = old_order; buffer->subbuf_size = old_size; atomic_dec(&buffer->record_disabled); mutex_unlock(&buffer->mutex); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; if (!cpu_buffer->nr_pages_to_update) continue; list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } } return err; } EXPORT_SYMBOL_GPL(ring_buffer_subbuf_order_set); static int rb_alloc_meta_page(struct ring_buffer_per_cpu *cpu_buffer) { struct page *page; if (cpu_buffer->meta_page) return 0; page = alloc_page(GFP_USER | __GFP_ZERO); if (!page) return -ENOMEM; cpu_buffer->meta_page = page_to_virt(page); return 0; } static void rb_free_meta_page(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long addr = (unsigned long)cpu_buffer->meta_page; free_page(addr); cpu_buffer->meta_page = NULL; } static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer, unsigned long *subbuf_ids) { struct trace_buffer_meta *meta = cpu_buffer->meta_page; unsigned int nr_subbufs = cpu_buffer->nr_pages + 1; struct buffer_page *first_subbuf, *subbuf; int id = 0; subbuf_ids[id] = (unsigned long)cpu_buffer->reader_page->page; cpu_buffer->reader_page->id = id++; first_subbuf = subbuf = rb_set_head_page(cpu_buffer); do { if (WARN_ON(id >= nr_subbufs)) break; subbuf_ids[id] = (unsigned long)subbuf->page; subbuf->id = id; rb_inc_page(&subbuf); id++; } while (subbuf != first_subbuf); /* install subbuf ID to kern VA translation */ cpu_buffer->subbuf_ids = subbuf_ids; meta->meta_struct_len = sizeof(*meta); meta->nr_subbufs = nr_subbufs; meta->subbuf_size = cpu_buffer->buffer->subbuf_size + BUF_PAGE_HDR_SIZE; meta->meta_page_size = meta->subbuf_size; rb_update_meta_page(cpu_buffer); } static struct ring_buffer_per_cpu * rb_get_mapped_buffer(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return ERR_PTR(-EINVAL); cpu_buffer = buffer->buffers[cpu]; mutex_lock(&cpu_buffer->mapping_lock); if (!cpu_buffer->user_mapped) { mutex_unlock(&cpu_buffer->mapping_lock); return ERR_PTR(-ENODEV); } return cpu_buffer; } static void rb_put_mapped_buffer(struct ring_buffer_per_cpu *cpu_buffer) { mutex_unlock(&cpu_buffer->mapping_lock); } /* * Fast-path for rb_buffer_(un)map(). Called whenever the meta-page doesn't need * to be set-up or torn-down. */ static int __rb_inc_dec_mapped(struct ring_buffer_per_cpu *cpu_buffer, bool inc) { unsigned long flags; lockdep_assert_held(&cpu_buffer->mapping_lock); /* mapped is always greater or equal to user_mapped */ if (WARN_ON(cpu_buffer->mapped < cpu_buffer->user_mapped)) return -EINVAL; if (inc && cpu_buffer->mapped == UINT_MAX) return -EBUSY; if (WARN_ON(!inc && cpu_buffer->user_mapped == 0)) return -EINVAL; mutex_lock(&cpu_buffer->buffer->mutex); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); if (inc) { cpu_buffer->user_mapped++; cpu_buffer->mapped++; } else { cpu_buffer->user_mapped--; cpu_buffer->mapped--; } raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); mutex_unlock(&cpu_buffer->buffer->mutex); return 0; } /* * +--------------+ pgoff == 0 * | meta page | * +--------------+ pgoff == 1 * | subbuffer 0 | * | | * +--------------+ pgoff == (1 + (1 << subbuf_order)) * | subbuffer 1 | * | | * ... */ #ifdef CONFIG_MMU static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer, struct vm_area_struct *vma) { unsigned long nr_subbufs, nr_pages, nr_vma_pages, pgoff = vma->vm_pgoff; unsigned int subbuf_pages, subbuf_order; struct page **pages; int p = 0, s = 0; int err; /* Refuse MP_PRIVATE or writable mappings */ if (vma->vm_flags & VM_WRITE || vma->vm_flags & VM_EXEC || !(vma->vm_flags & VM_MAYSHARE)) return -EPERM; subbuf_order = cpu_buffer->buffer->subbuf_order; subbuf_pages = 1 << subbuf_order; if (subbuf_order && pgoff % subbuf_pages) return -EINVAL; /* * Make sure the mapping cannot become writable later. Also tell the VM * to not touch these pages (VM_DONTCOPY | VM_DONTEXPAND). */ vm_flags_mod(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP, VM_MAYWRITE); lockdep_assert_held(&cpu_buffer->mapping_lock); nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */ nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */ nr_vma_pages = vma_pages(vma); if (!nr_vma_pages || nr_vma_pages > nr_pages) return -EINVAL; nr_pages = nr_vma_pages; pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return -ENOMEM; if (!pgoff) { unsigned long meta_page_padding; pages[p++] = virt_to_page(cpu_buffer->meta_page); /* * Pad with the zero-page to align the meta-page with the * sub-buffers. */ meta_page_padding = subbuf_pages - 1; while (meta_page_padding-- && p < nr_pages) { unsigned long __maybe_unused zero_addr = vma->vm_start + (PAGE_SIZE * p); pages[p++] = ZERO_PAGE(zero_addr); } } else { /* Skip the meta-page */ pgoff -= subbuf_pages; s += pgoff / subbuf_pages; } while (p < nr_pages) { struct page *page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]); int off = 0; if (WARN_ON_ONCE(s >= nr_subbufs)) { err = -EINVAL; goto out; } for (; off < (1 << (subbuf_order)); off++, page++) { if (p >= nr_pages) break; pages[p++] = page; } s++; } err = vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); out: kfree(pages); return err; } #else static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer, struct vm_area_struct *vma) { return -EOPNOTSUPP; } #endif int ring_buffer_map(struct trace_buffer *buffer, int cpu, struct vm_area_struct *vma) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags, *subbuf_ids; int err = 0; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -EINVAL; cpu_buffer = buffer->buffers[cpu]; mutex_lock(&cpu_buffer->mapping_lock); if (cpu_buffer->user_mapped) { err = __rb_map_vma(cpu_buffer, vma); if (!err) err = __rb_inc_dec_mapped(cpu_buffer, true); mutex_unlock(&cpu_buffer->mapping_lock); return err; } /* prevent another thread from changing buffer/sub-buffer sizes */ mutex_lock(&buffer->mutex); err = rb_alloc_meta_page(cpu_buffer); if (err) goto unlock; /* subbuf_ids include the reader while nr_pages does not */ subbuf_ids = kcalloc(cpu_buffer->nr_pages + 1, sizeof(*subbuf_ids), GFP_KERNEL); if (!subbuf_ids) { rb_free_meta_page(cpu_buffer); err = -ENOMEM; goto unlock; } atomic_inc(&cpu_buffer->resize_disabled); /* * Lock all readers to block any subbuf swap until the subbuf IDs are * assigned. */ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_setup_ids_meta_page(cpu_buffer, subbuf_ids); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); err = __rb_map_vma(cpu_buffer, vma); if (!err) { raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* This is the first time it is mapped by user */ cpu_buffer->mapped++; cpu_buffer->user_mapped = 1; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } else { kfree(cpu_buffer->subbuf_ids); cpu_buffer->subbuf_ids = NULL; rb_free_meta_page(cpu_buffer); } unlock: mutex_unlock(&buffer->mutex); mutex_unlock(&cpu_buffer->mapping_lock); return err; } int ring_buffer_unmap(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; int err = 0; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -EINVAL; cpu_buffer = buffer->buffers[cpu]; mutex_lock(&cpu_buffer->mapping_lock); if (!cpu_buffer->user_mapped) { err = -ENODEV; goto out; } else if (cpu_buffer->user_mapped > 1) { __rb_inc_dec_mapped(cpu_buffer, false); goto out; } mutex_lock(&buffer->mutex); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* This is the last user space mapping */ if (!WARN_ON_ONCE(cpu_buffer->mapped < cpu_buffer->user_mapped)) cpu_buffer->mapped--; cpu_buffer->user_mapped = 0; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); kfree(cpu_buffer->subbuf_ids); cpu_buffer->subbuf_ids = NULL; rb_free_meta_page(cpu_buffer); atomic_dec(&cpu_buffer->resize_disabled); mutex_unlock(&buffer->mutex); out: mutex_unlock(&cpu_buffer->mapping_lock); return err; } int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *reader; unsigned long missed_events; unsigned long reader_size; unsigned long flags; cpu_buffer = rb_get_mapped_buffer(buffer, cpu); if (IS_ERR(cpu_buffer)) return (int)PTR_ERR(cpu_buffer); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); consume: if (rb_per_cpu_empty(cpu_buffer)) goto out; reader_size = rb_page_size(cpu_buffer->reader_page); /* * There are data to be read on the current reader page, we can * return to the caller. But before that, we assume the latter will read * everything. Let's update the kernel reader accordingly. */ if (cpu_buffer->reader_page->read < reader_size) { while (cpu_buffer->reader_page->read < reader_size) rb_advance_reader(cpu_buffer); goto out; } reader = rb_get_reader_page(cpu_buffer); if (WARN_ON(!reader)) goto out; /* Check if any events were dropped */ missed_events = cpu_buffer->lost_events; if (cpu_buffer->reader_page != cpu_buffer->commit_page) { if (missed_events) { struct buffer_data_page *bpage = reader->page; unsigned int commit; /* * Use the real_end for the data size, * This gives us a chance to store the lost events * on the page. */ if (reader->real_end) local_set(&bpage->commit, reader->real_end); /* * If there is room at the end of the page to save the * missed events, then record it there. */ commit = rb_page_size(reader); if (buffer->subbuf_size - commit >= sizeof(missed_events)) { memcpy(&bpage->data[commit], &missed_events, sizeof(missed_events)); local_add(RB_MISSED_STORED, &bpage->commit); } local_add(RB_MISSED_EVENTS, &bpage->commit); } } else { /* * There really shouldn't be any missed events if the commit * is on the reader page. */ WARN_ON_ONCE(missed_events); } cpu_buffer->lost_events = 0; goto consume; out: /* Some archs do not have data cache coherency between kernel and user-space */ flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page)); rb_update_meta_page(cpu_buffer); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); rb_put_mapped_buffer(cpu_buffer); return 0; } /* * We only allocate new buffers, never free them if the CPU goes down. * If we were to free the buffer, then the user would lose any trace that was in * the buffer. */ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) { struct trace_buffer *buffer; long nr_pages_same; int cpu_i; unsigned long nr_pages; buffer = container_of(node, struct trace_buffer, node); if (cpumask_test_cpu(cpu, buffer->cpumask)) return 0; nr_pages = 0; nr_pages_same = 1; /* check if all cpu sizes are same */ for_each_buffer_cpu(buffer, cpu_i) { /* fill in the size from first enabled cpu */ if (nr_pages == 0) nr_pages = buffer->buffers[cpu_i]->nr_pages; if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { nr_pages_same = 0; break; } } /* allocate minimum pages, user can later expand it */ if (!nr_pages_same) nr_pages = 2; buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu); if (!buffer->buffers[cpu]) { WARN(1, "failed to allocate ring buffer on CPU %u\n", cpu); return -ENOMEM; } smp_wmb(); cpumask_set_cpu(cpu, buffer->cpumask); return 0; } #ifdef CONFIG_RING_BUFFER_STARTUP_TEST /* * This is a basic integrity check of the ring buffer. * Late in the boot cycle this test will run when configured in. * It will kick off a thread per CPU that will go into a loop * writing to the per cpu ring buffer various sizes of data. * Some of the data will be large items, some small. * * Another thread is created that goes into a spin, sending out * IPIs to the other CPUs to also write into the ring buffer. * this is to test the nesting ability of the buffer. * * Basic stats are recorded and reported. If something in the * ring buffer should happen that's not expected, a big warning * is displayed and all ring buffers are disabled. */ static struct task_struct *rb_threads[NR_CPUS] __initdata; struct rb_test_data { struct trace_buffer *buffer; unsigned long events; unsigned long bytes_written; unsigned long bytes_alloc; unsigned long bytes_dropped; unsigned long events_nested; unsigned long bytes_written_nested; unsigned long bytes_alloc_nested; unsigned long bytes_dropped_nested; int min_size_nested; int max_size_nested; int max_size; int min_size; int cpu; int cnt; }; static struct rb_test_data rb_data[NR_CPUS] __initdata; /* 1 meg per cpu */ #define RB_TEST_BUFFER_SIZE 1048576 static char rb_string[] __initdata = "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\" "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890" "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv"; static bool rb_test_started __initdata; struct rb_item { int size; char str[]; }; static __init int rb_write_something(struct rb_test_data *data, bool nested) { struct ring_buffer_event *event; struct rb_item *item; bool started; int event_len; int size; int len; int cnt; /* Have nested writes different that what is written */ cnt = data->cnt + (nested ? 27 : 0); /* Multiply cnt by ~e, to make some unique increment */ size = (cnt * 68 / 25) % (sizeof(rb_string) - 1); len = size + sizeof(struct rb_item); started = rb_test_started; /* read rb_test_started before checking buffer enabled */ smp_rmb(); event = ring_buffer_lock_reserve(data->buffer, len); if (!event) { /* Ignore dropped events before test starts. */ if (started) { if (nested) data->bytes_dropped += len; else data->bytes_dropped_nested += len; } return len; } event_len = ring_buffer_event_length(event); if (RB_WARN_ON(data->buffer, event_len < len)) goto out; item = ring_buffer_event_data(event); item->size = size; memcpy(item->str, rb_string, size); if (nested) { data->bytes_alloc_nested += event_len; data->bytes_written_nested += len; data->events_nested++; if (!data->min_size_nested || len < data->min_size_nested) data->min_size_nested = len; if (len > data->max_size_nested) data->max_size_nested = len; } else { data->bytes_alloc += event_len; data->bytes_written += len; data->events++; if (!data->min_size || len < data->min_size) data->max_size = len; if (len > data->max_size) data->max_size = len; } out: ring_buffer_unlock_commit(data->buffer); return 0; } static __init int rb_test(void *arg) { struct rb_test_data *data = arg; while (!kthread_should_stop()) { rb_write_something(data, false); data->cnt++; set_current_state(TASK_INTERRUPTIBLE); /* Now sleep between a min of 100-300us and a max of 1ms */ usleep_range(((data->cnt % 3) + 1) * 100, 1000); } return 0; } static __init void rb_ipi(void *ignore) { struct rb_test_data *data; int cpu = smp_processor_id(); data = &rb_data[cpu]; rb_write_something(data, true); } static __init int rb_hammer_test(void *arg) { while (!kthread_should_stop()) { /* Send an IPI to all cpus to write data! */ smp_call_function(rb_ipi, NULL, 1); /* No sleep, but for non preempt, let others run */ schedule(); } return 0; } static __init int test_ringbuffer(void) { struct task_struct *rb_hammer; struct trace_buffer *buffer; int cpu; int ret = 0; if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Lockdown is enabled, skipping ring buffer tests\n"); return 0; } pr_info("Running ring buffer tests...\n"); buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE); if (WARN_ON(!buffer)) return 0; /* Disable buffer so that threads can't write to it yet */ ring_buffer_record_off(buffer); for_each_online_cpu(cpu) { rb_data[cpu].buffer = buffer; rb_data[cpu].cpu = cpu; rb_data[cpu].cnt = cpu; rb_threads[cpu] = kthread_run_on_cpu(rb_test, &rb_data[cpu], cpu, "rbtester/%u"); if (WARN_ON(IS_ERR(rb_threads[cpu]))) { pr_cont("FAILED\n"); ret = PTR_ERR(rb_threads[cpu]); goto out_free; } } /* Now create the rb hammer! */ rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer"); if (WARN_ON(IS_ERR(rb_hammer))) { pr_cont("FAILED\n"); ret = PTR_ERR(rb_hammer); goto out_free; } ring_buffer_record_on(buffer); /* * Show buffer is enabled before setting rb_test_started. * Yes there's a small race window where events could be * dropped and the thread wont catch it. But when a ring * buffer gets enabled, there will always be some kind of * delay before other CPUs see it. Thus, we don't care about * those dropped events. We care about events dropped after * the threads see that the buffer is active. */ smp_wmb(); rb_test_started = true; set_current_state(TASK_INTERRUPTIBLE); /* Just run for 10 seconds */; schedule_timeout(10 * HZ); kthread_stop(rb_hammer); out_free: for_each_online_cpu(cpu) { if (!rb_threads[cpu]) break; kthread_stop(rb_threads[cpu]); } if (ret) { ring_buffer_free(buffer); return ret; } /* Report! */ pr_info("finished\n"); for_each_online_cpu(cpu) { struct ring_buffer_event *event; struct rb_test_data *data = &rb_data[cpu]; struct rb_item *item; unsigned long total_events; unsigned long total_dropped; unsigned long total_written; unsigned long total_alloc; unsigned long total_read = 0; unsigned long total_size = 0; unsigned long total_len = 0; unsigned long total_lost = 0; unsigned long lost; int big_event_size; int small_event_size; ret = -1; total_events = data->events + data->events_nested; total_written = data->bytes_written + data->bytes_written_nested; total_alloc = data->bytes_alloc + data->bytes_alloc_nested; total_dropped = data->bytes_dropped + data->bytes_dropped_nested; big_event_size = data->max_size + data->max_size_nested; small_event_size = data->min_size + data->min_size_nested; pr_info("CPU %d:\n", cpu); pr_info(" events: %ld\n", total_events); pr_info(" dropped bytes: %ld\n", total_dropped); pr_info(" alloced bytes: %ld\n", total_alloc); pr_info(" written bytes: %ld\n", total_written); pr_info(" biggest event: %d\n", big_event_size); pr_info(" smallest event: %d\n", small_event_size); if (RB_WARN_ON(buffer, total_dropped)) break; ret = 0; while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) { total_lost += lost; item = ring_buffer_event_data(event); total_len += ring_buffer_event_length(event); total_size += item->size + sizeof(struct rb_item); if (memcmp(&item->str[0], rb_string, item->size) != 0) { pr_info("FAILED!\n"); pr_info("buffer had: %.*s\n", item->size, item->str); pr_info("expected: %.*s\n", item->size, rb_string); RB_WARN_ON(buffer, 1); ret = -1; break; } total_read++; } if (ret) break; ret = -1; pr_info(" read events: %ld\n", total_read); pr_info(" lost events: %ld\n", total_lost); pr_info(" total events: %ld\n", total_lost + total_read); pr_info(" recorded len bytes: %ld\n", total_len); pr_info(" recorded size bytes: %ld\n", total_size); if (total_lost) { pr_info(" With dropped events, record len and size may not match\n" " alloced and written from above\n"); } else { if (RB_WARN_ON(buffer, total_len != total_alloc || total_size != total_written)) break; } if (RB_WARN_ON(buffer, total_lost + total_read != total_events)) break; ret = 0; } if (!ret) pr_info("Ring buffer PASSED!\n"); ring_buffer_free(buffer); return 0; } late_initcall(test_ringbuffer); #endif /* CONFIG_RING_BUFFER_STARTUP_TEST */
2 11 11 9 11 8 6 7 11 6 8 4 1 2 2 1 2 2 2 2 3 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 // SPDX-License-Identifier: GPL-2.0-only /* * TCP Vegas congestion control * * This is based on the congestion detection/avoidance scheme described in * Lawrence S. Brakmo and Larry L. Peterson. * "TCP Vegas: End to end congestion avoidance on a global internet." * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480, * October 1995. Available from: * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps * * See http://www.cs.arizona.edu/xkernel/ for their implementation. * The main aspects that distinguish this implementation from the * Arizona Vegas implementation are: * o We do not change the loss detection or recovery mechanisms of * Linux in any way. Linux already recovers from losses quite well, * using fine-grained timers, NewReno, and FACK. * o To avoid the performance penalty imposed by increasing cwnd * only every-other RTT during slow start, we increase during * every RTT during slow start, just like Reno. * o Largely to allow continuous cwnd growth during slow start, * we use the rate at which ACKs come back as the "actual" * rate, rather than the rate at which data is sent. * o To speed convergence to the right rate, we set the cwnd * to achieve the right ("actual") rate when we exit slow start. * o To filter out the noise caused by delayed ACKs, we use the * minimum RTT sample observed during the last RTT to calculate * the actual rate. * o When the sender re-starts from idle, it waits until it has * received ACKs for an entire flight of new data before making * a cwnd adjustment decision. The original Vegas implementation * assumed senders never went idle. */ #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/inet_diag.h> #include <net/tcp.h> #include "tcp_vegas.h" static int alpha = 2; static int beta = 4; static int gamma = 1; module_param(alpha, int, 0644); MODULE_PARM_DESC(alpha, "lower bound of packets in network"); module_param(beta, int, 0644); MODULE_PARM_DESC(beta, "upper bound of packets in network"); module_param(gamma, int, 0644); MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); /* There are several situations when we must "re-start" Vegas: * * o when a connection is established * o after an RTO * o after fast recovery * o when we send a packet and there is no outstanding * unacknowledged data (restarting an idle connection) * * In these circumstances we cannot do a Vegas calculation at the * end of the first RTT, because any calculation we do is using * stale info -- both the saved cwnd and congestion feedback are * stale. * * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ static void vegas_enable(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); /* Begin taking Vegas samples next time we send something. */ vegas->doing_vegas_now = 1; /* Set the beginning of the next send window. */ vegas->beg_snd_nxt = tp->snd_nxt; vegas->cntRTT = 0; vegas->minRTT = 0x7fffffff; } /* Stop taking Vegas samples for now. */ static inline void vegas_disable(struct sock *sk) { struct vegas *vegas = inet_csk_ca(sk); vegas->doing_vegas_now = 0; } void tcp_vegas_init(struct sock *sk) { struct vegas *vegas = inet_csk_ca(sk); vegas->baseRTT = 0x7fffffff; vegas_enable(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_init); /* Do RTT sampling needed for Vegas. * Basically we: * o min-filter RTT samples from within an RTT to get the current * propagation delay + queuing delay (we are min-filtering to try to * avoid the effects of delayed ACKs) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct vegas *vegas = inet_csk_ca(sk); u32 vrtt; if (sample->rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ vrtt = sample->rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) vegas->baseRTT = vrtt; /* Find the min RTT during the last RTT to find * the current prop. delay + queuing delay: */ vegas->minRTT = min(vegas->minRTT, vrtt); vegas->cntRTT++; } EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked); void tcp_vegas_state(struct sock *sk, u8 ca_state) { if (ca_state == TCP_CA_Open) vegas_enable(sk); else vegas_disable(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_state); /* * If the connection is idle and we are restarting, * then we don't want to do any Vegas calculations * until we get fresh RTT samples. So when we * restart, we reset our Vegas state to a clean * slate. After we get acks for this flight of * packets, _then_ we can make Vegas calculations * again. */ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) tcp_vegas_init(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { return min(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) { tcp_reno_cong_avoid(sk, ack, acked); return; } if (after(ack, vegas->beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ /* Save the extent of the current window so we can use this * at the end of the next RTT. */ vegas->beg_snd_nxt = tp->snd_nxt; /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got * at least one RTT sample that wasn't from a delayed ACK. * If we only had 2 samples total, * then that means we're getting only 1 ACK per RTT, which * means they're almost certainly delayed ACKs. * If we have 3 samples, we should be OK. */ if (vegas->cntRTT <= 2) { /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ tcp_reno_cong_avoid(sk, ack, acked); } else { u32 rtt, diff; u64 target_cwnd; /* We have enough RTT samples, so, using the Vegas * algorithm, we determine if we should increase or * decrease cwnd, and by how much. */ /* Pluck out the RTT we are using for the Vegas * calculations. This is the min RTT seen during the * last RTT. Taking the min filters out the effects * of delayed ACKs, at the cost of noticing congestion * a bit later. */ rtt = vegas->minRTT; /* Calculate the cwnd we should have, if we weren't * going too fast. * * This is: * (actual rate in segments) * baseRTT */ target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT; do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. */ diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tcp_in_slow_start(tp)) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ /* Set cwnd to match the actual rate * exactly: * cwnd = (actual rate) * baseRTT * Then we add 1 because the integer * truncation robs us of full link * utilization. */ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), (u32)target_cwnd + 1)); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tcp_in_slow_start(tp)) { /* Slow start. */ tcp_slow_start(tp, acked); } else { /* Congestion avoidance. */ /* Figure out where we would like cwnd * to be. */ if (diff > beta) { /* The old window was too fast, so * we slow down. */ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } else { /* Sending just as fast as we * should be. */ } } if (tcp_snd_cwnd(tp) < 2) tcp_snd_cwnd_set(tp, 2); else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); tp->snd_ssthresh = tcp_current_ssthresh(sk); } /* Wipe the slate clean for the next RTT. */ vegas->cntRTT = 0; vegas->minRTT = 0x7fffffff; } /* Use normal slow start */ else if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); } /* Extract info for Tcp socket info provided via netlink. */ size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct vegas *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { info->vegas.tcpv_enabled = ca->doing_vegas_now; info->vegas.tcpv_rttcnt = ca->cntRTT; info->vegas.tcpv_rtt = ca->baseRTT; info->vegas.tcpv_minrtt = ca->minRTT; *attr = INET_DIAG_VEGASINFO; return sizeof(struct tcpvegas_info); } return 0; } EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_vegas_cong_avoid, .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, .owner = THIS_MODULE, .name = "vegas", }; static int __init tcp_vegas_register(void) { BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } static void __exit tcp_vegas_unregister(void) { tcp_unregister_congestion_control(&tcp_vegas); } module_init(tcp_vegas_register); module_exit(tcp_vegas_unregister); MODULE_AUTHOR("Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP Vegas");
16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 // SPDX-License-Identifier: GPL-2.0-only /* This is a module which is used to mark packets for tracing. */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_log.h> MODULE_DESCRIPTION("Xtables: packet flow tracing"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static int trace_tg_check(const struct xt_tgchk_param *par) { return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); } static void trace_tg_destroy(const struct xt_tgdtor_param *par) { nf_logger_put(par->family, NF_LOG_TYPE_LOG); } static unsigned int trace_tg(struct sk_buff *skb, const struct xt_action_param *par) { skb->nf_trace = 1; return XT_CONTINUE; } static struct xt_target trace_tg_reg[] __read_mostly = { { .name = "TRACE", .revision = 0, .family = NFPROTO_IPV4, .table = "raw", .target = trace_tg, .checkentry = trace_tg_check, .destroy = trace_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "TRACE", .revision = 0, .family = NFPROTO_IPV6, .table = "raw", .target = trace_tg, .checkentry = trace_tg_check, .destroy = trace_tg_destroy, }, #endif }; static int __init trace_tg_init(void) { return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg)); } static void __exit trace_tg_exit(void) { xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg)); } module_init(trace_tg_init); module_exit(trace_tg_exit); MODULE_SOFTDEP("pre: nf_log_syslog");
16 11 11 16 1 11 1 4 26 36 25 35 45 15 16 15 15 34 10 10 26 34 18 23 34 32 32 1 1 1 1 1 2 2 2 25 11 10 1 4 16 3 2 13 4 2 2 1 1 2 5 2 1 1 2 11 4 34 34 33 1 45 34 1 60 1 1 1 1 1 3 13 13 19 36 31 28 28 25 53 55 1 2 1 33 1 4 16 16 16 16 1 1 16 13 13 51 30 31 2 6 51 29 5 5 3 29 33 8 17 1 3 33 26 2 7 7 4 3 1 1 1 1 16 3 16 2 26 2 1 1 1 1 47 47 25 25 1 25 35 1 58 74 72 24 74 1 13 74 73 59 9 7 9 60 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ocfs2 #if !defined(_TRACE_OCFS2_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_OCFS2_H #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(ocfs2__int, TP_PROTO(int num), TP_ARGS(num), TP_STRUCT__entry( __field(int, num) ), TP_fast_assign( __entry->num = num; ), TP_printk("%d", __entry->num) ); #define DEFINE_OCFS2_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__int, name, \ TP_PROTO(int num), \ TP_ARGS(num)) DECLARE_EVENT_CLASS(ocfs2__uint, TP_PROTO(unsigned int num), TP_ARGS(num), TP_STRUCT__entry( __field( unsigned int, num ) ), TP_fast_assign( __entry->num = num; ), TP_printk("%u", __entry->num) ); #define DEFINE_OCFS2_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint, name, \ TP_PROTO(unsigned int num), \ TP_ARGS(num)) DECLARE_EVENT_CLASS(ocfs2__ull, TP_PROTO(unsigned long long blkno), TP_ARGS(blkno), TP_STRUCT__entry( __field(unsigned long long, blkno) ), TP_fast_assign( __entry->blkno = blkno; ), TP_printk("%llu", __entry->blkno) ); #define DEFINE_OCFS2_ULL_EVENT(name) \ DEFINE_EVENT(ocfs2__ull, name, \ TP_PROTO(unsigned long long num), \ TP_ARGS(num)) DECLARE_EVENT_CLASS(ocfs2__pointer, TP_PROTO(void *pointer), TP_ARGS(pointer), TP_STRUCT__entry( __field(void *, pointer) ), TP_fast_assign( __entry->pointer = pointer; ), TP_printk("%p", __entry->pointer) ); #define DEFINE_OCFS2_POINTER_EVENT(name) \ DEFINE_EVENT(ocfs2__pointer, name, \ TP_PROTO(void *pointer), \ TP_ARGS(pointer)) DECLARE_EVENT_CLASS(ocfs2__string, TP_PROTO(const char *name), TP_ARGS(name), TP_STRUCT__entry( __string(name,name) ), TP_fast_assign( __assign_str(name); ), TP_printk("%s", __get_str(name)) ); #define DEFINE_OCFS2_STRING_EVENT(name) \ DEFINE_EVENT(ocfs2__string, name, \ TP_PROTO(const char *name), \ TP_ARGS(name)) DECLARE_EVENT_CLASS(ocfs2__int_int, TP_PROTO(int value1, int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(int, value1) __field(int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%d %d", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_INT_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__int_int, name, \ TP_PROTO(int val1, int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__uint_int, TP_PROTO(unsigned int value1, int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned int, value1) __field(int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%u %d", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_UINT_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint_int, name, \ TP_PROTO(unsigned int val1, int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__uint_uint, TP_PROTO(unsigned int value1, unsigned int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned int, value1) __field(unsigned int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%u %u", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint_uint, name, \ TP_PROTO(unsigned int val1, unsigned int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_uint, TP_PROTO(unsigned long long value1, unsigned int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %u", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_ULL_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_uint, name, \ TP_PROTO(unsigned long long val1, unsigned int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_int, TP_PROTO(unsigned long long value1, int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned long long, value1) __field(int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %d", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_ULL_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_int, name, \ TP_PROTO(unsigned long long val1, int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_ull, TP_PROTO(unsigned long long value1, unsigned long long value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %llu", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_ULL_ULL_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull, name, \ TP_PROTO(unsigned long long val1, unsigned long long val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint, TP_PROTO(unsigned long long value1, unsigned long long value2, unsigned int value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) __field(unsigned int, value3) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %llu %u", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_ULL_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull_uint, name, \ TP_PROTO(unsigned long long val1, \ unsigned long long val2, unsigned int val3), \ TP_ARGS(val1, val2, val3)) DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint, TP_PROTO(unsigned long long value1, unsigned int value2, unsigned int value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned int, value2) __field(unsigned int, value3) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %u %u", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_uint_uint, name, \ TP_PROTO(unsigned long long val1, \ unsigned int val2, unsigned int val3), \ TP_ARGS(val1, val2, val3)) DECLARE_EVENT_CLASS(ocfs2__uint_uint_uint, TP_PROTO(unsigned int value1, unsigned int value2, unsigned int value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field( unsigned int, value1 ) __field( unsigned int, value2 ) __field( unsigned int, value3 ) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%u %u %u", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_UINT_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint_uint_uint, name, \ TP_PROTO(unsigned int value1, unsigned int value2, \ unsigned int value3), \ TP_ARGS(value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_ull_ull, TP_PROTO(unsigned long long value1, unsigned long long value2, unsigned long long value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) __field(unsigned long long, value3) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %llu %llu", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_ULL_ULL_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull_ull, name, \ TP_PROTO(unsigned long long value1, unsigned long long value2, \ unsigned long long value3), \ TP_ARGS(value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_int_int_int, TP_PROTO(unsigned long long ull, int value1, int value2, int value3), TP_ARGS(ull, value1, value2, value3), TP_STRUCT__entry( __field( unsigned long long, ull ) __field( int, value1 ) __field( int, value2 ) __field( int, value3 ) ), TP_fast_assign( __entry->ull = ull; __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %d %d %d", __entry->ull, __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_int_int_int, name, \ TP_PROTO(unsigned long long ull, int value1, \ int value2, int value3), \ TP_ARGS(ull, value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint_uint, TP_PROTO(unsigned long long ull, unsigned int value1, unsigned int value2, unsigned int value3), TP_ARGS(ull, value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, ull) __field(unsigned int, value1) __field(unsigned int, value2) __field(unsigned int, value3) ), TP_fast_assign( __entry->ull = ull; __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %u %u %u", __entry->ull, __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_uint_uint_uint, name, \ TP_PROTO(unsigned long long ull, unsigned int value1, \ unsigned int value2, unsigned int value3), \ TP_ARGS(ull, value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint_uint, TP_PROTO(unsigned long long value1, unsigned long long value2, unsigned int value3, unsigned int value4), TP_ARGS(value1, value2, value3, value4), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) __field(unsigned int, value3) __field(unsigned int, value4) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; __entry->value4 = value4; ), TP_printk("%llu %llu %u %u", __entry->value1, __entry->value2, __entry->value3, __entry->value4) ); #define DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull_uint_uint, name, \ TP_PROTO(unsigned long long ull, unsigned long long ull1, \ unsigned int value2, unsigned int value3), \ TP_ARGS(ull, ull1, value2, value3)) /* Trace events for fs/ocfs2/alloc.c. */ DECLARE_EVENT_CLASS(ocfs2__btree_ops, TP_PROTO(unsigned long long owner,\ unsigned int value1, unsigned int value2), TP_ARGS(owner, value1, value2), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned int, value1) __field(unsigned int, value2) ), TP_fast_assign( __entry->owner = owner; __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %u %u", __entry->owner, __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_BTREE_EVENT(name) \ DEFINE_EVENT(ocfs2__btree_ops, name, \ TP_PROTO(unsigned long long owner, \ unsigned int value1, unsigned int value2), \ TP_ARGS(owner, value1, value2)) DEFINE_OCFS2_BTREE_EVENT(ocfs2_adjust_rightmost_branch); DEFINE_OCFS2_BTREE_EVENT(ocfs2_rotate_tree_right); DEFINE_OCFS2_BTREE_EVENT(ocfs2_append_rec_to_path); DEFINE_OCFS2_BTREE_EVENT(ocfs2_insert_extent_start); DEFINE_OCFS2_BTREE_EVENT(ocfs2_add_clusters_in_btree); DEFINE_OCFS2_INT_EVENT(ocfs2_num_free_extents); DEFINE_OCFS2_INT_EVENT(ocfs2_complete_edge_insert); TRACE_EVENT(ocfs2_grow_tree, TP_PROTO(unsigned long long owner, int depth), TP_ARGS(owner, depth), TP_STRUCT__entry( __field(unsigned long long, owner) __field(int, depth) ), TP_fast_assign( __entry->owner = owner; __entry->depth = depth; ), TP_printk("%llu %d", __entry->owner, __entry->depth) ); TRACE_EVENT(ocfs2_rotate_subtree, TP_PROTO(int subtree_root, unsigned long long blkno, int depth), TP_ARGS(subtree_root, blkno, depth), TP_STRUCT__entry( __field(int, subtree_root) __field(unsigned long long, blkno) __field(int, depth) ), TP_fast_assign( __entry->subtree_root = subtree_root; __entry->blkno = blkno; __entry->depth = depth; ), TP_printk("%d %llu %d", __entry->subtree_root, __entry->blkno, __entry->depth) ); TRACE_EVENT(ocfs2_insert_extent, TP_PROTO(unsigned int ins_appending, unsigned int ins_contig, int ins_contig_index, int free_records, int ins_tree_depth), TP_ARGS(ins_appending, ins_contig, ins_contig_index, free_records, ins_tree_depth), TP_STRUCT__entry( __field(unsigned int, ins_appending) __field(unsigned int, ins_contig) __field(int, ins_contig_index) __field(int, free_records) __field(int, ins_tree_depth) ), TP_fast_assign( __entry->ins_appending = ins_appending; __entry->ins_contig = ins_contig; __entry->ins_contig_index = ins_contig_index; __entry->free_records = free_records; __entry->ins_tree_depth = ins_tree_depth; ), TP_printk("%u %u %d %d %d", __entry->ins_appending, __entry->ins_contig, __entry->ins_contig_index, __entry->free_records, __entry->ins_tree_depth) ); TRACE_EVENT(ocfs2_split_extent, TP_PROTO(int split_index, unsigned int c_contig_type, unsigned int c_has_empty_extent, unsigned int c_split_covers_rec), TP_ARGS(split_index, c_contig_type, c_has_empty_extent, c_split_covers_rec), TP_STRUCT__entry( __field(int, split_index) __field(unsigned int, c_contig_type) __field(unsigned int, c_has_empty_extent) __field(unsigned int, c_split_covers_rec) ), TP_fast_assign( __entry->split_index = split_index; __entry->c_contig_type = c_contig_type; __entry->c_has_empty_extent = c_has_empty_extent; __entry->c_split_covers_rec = c_split_covers_rec; ), TP_printk("%d %u %u %u", __entry->split_index, __entry->c_contig_type, __entry->c_has_empty_extent, __entry->c_split_covers_rec) ); TRACE_EVENT(ocfs2_remove_extent, TP_PROTO(unsigned long long owner, unsigned int cpos, unsigned int len, int index, unsigned int e_cpos, unsigned int clusters), TP_ARGS(owner, cpos, len, index, e_cpos, clusters), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned int, cpos) __field(unsigned int, len) __field(int, index) __field(unsigned int, e_cpos) __field(unsigned int, clusters) ), TP_fast_assign( __entry->owner = owner; __entry->cpos = cpos; __entry->len = len; __entry->index = index; __entry->e_cpos = e_cpos; __entry->clusters = clusters; ), TP_printk("%llu %u %u %d %u %u", __entry->owner, __entry->cpos, __entry->len, __entry->index, __entry->e_cpos, __entry->clusters) ); TRACE_EVENT(ocfs2_commit_truncate, TP_PROTO(unsigned long long ino, unsigned int new_cpos, unsigned int clusters, unsigned int depth), TP_ARGS(ino, new_cpos, clusters, depth), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, new_cpos) __field(unsigned int, clusters) __field(unsigned int, depth) ), TP_fast_assign( __entry->ino = ino; __entry->new_cpos = new_cpos; __entry->clusters = clusters; __entry->depth = depth; ), TP_printk("%llu %u %u %u", __entry->ino, __entry->new_cpos, __entry->clusters, __entry->depth) ); TRACE_EVENT(ocfs2_validate_extent_block, TP_PROTO(unsigned long long blkno), TP_ARGS(blkno), TP_STRUCT__entry( __field(unsigned long long, blkno) ), TP_fast_assign( __entry->blkno = blkno; ), TP_printk("%llu ", __entry->blkno) ); TRACE_EVENT(ocfs2_rotate_leaf, TP_PROTO(unsigned int insert_cpos, int insert_index, int has_empty, int next_free, unsigned int l_count), TP_ARGS(insert_cpos, insert_index, has_empty, next_free, l_count), TP_STRUCT__entry( __field(unsigned int, insert_cpos) __field(int, insert_index) __field(int, has_empty) __field(int, next_free) __field(unsigned int, l_count) ), TP_fast_assign( __entry->insert_cpos = insert_cpos; __entry->insert_index = insert_index; __entry->has_empty = has_empty; __entry->next_free = next_free; __entry->l_count = l_count; ), TP_printk("%u %d %d %d %u", __entry->insert_cpos, __entry->insert_index, __entry->has_empty, __entry->next_free, __entry->l_count) ); TRACE_EVENT(ocfs2_add_clusters_in_btree_ret, TP_PROTO(int status, int reason, int err), TP_ARGS(status, reason, err), TP_STRUCT__entry( __field(int, status) __field(int, reason) __field(int, err) ), TP_fast_assign( __entry->status = status; __entry->reason = reason; __entry->err = err; ), TP_printk("%d %d %d", __entry->status, __entry->reason, __entry->err) ); TRACE_EVENT(ocfs2_mark_extent_written, TP_PROTO(unsigned long long owner, unsigned int cpos, unsigned int len, unsigned int phys), TP_ARGS(owner, cpos, len, phys), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned int, cpos) __field(unsigned int, len) __field(unsigned int, phys) ), TP_fast_assign( __entry->owner = owner; __entry->cpos = cpos; __entry->len = len; __entry->phys = phys; ), TP_printk("%llu %u %u %u", __entry->owner, __entry->cpos, __entry->len, __entry->phys) ); DECLARE_EVENT_CLASS(ocfs2__truncate_log_ops, TP_PROTO(unsigned long long blkno, int index, unsigned int start, unsigned int num), TP_ARGS(blkno, index, start, num), TP_STRUCT__entry( __field(unsigned long long, blkno) __field(int, index) __field(unsigned int, start) __field(unsigned int, num) ), TP_fast_assign( __entry->blkno = blkno; __entry->index = index; __entry->start = start; __entry->num = num; ), TP_printk("%llu %d %u %u", __entry->blkno, __entry->index, __entry->start, __entry->num) ); #define DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(name) \ DEFINE_EVENT(ocfs2__truncate_log_ops, name, \ TP_PROTO(unsigned long long blkno, int index, \ unsigned int start, unsigned int num), \ TP_ARGS(blkno, index, start, num)) DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_truncate_log_append); DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_replay_truncate_records); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_flush_truncate_log); DEFINE_OCFS2_INT_EVENT(ocfs2_begin_truncate_log_recovery); DEFINE_OCFS2_INT_EVENT(ocfs2_truncate_log_recovery_num); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_complete_truncate_log_recovery); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_free_cached_blocks); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_cache_cluster_dealloc); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_run_deallocs); TRACE_EVENT(ocfs2_cache_block_dealloc, TP_PROTO(int type, int slot, unsigned long long suballoc, unsigned long long blkno, unsigned int bit), TP_ARGS(type, slot, suballoc, blkno, bit), TP_STRUCT__entry( __field(int, type) __field(int, slot) __field(unsigned long long, suballoc) __field(unsigned long long, blkno) __field(unsigned int, bit) ), TP_fast_assign( __entry->type = type; __entry->slot = slot; __entry->suballoc = suballoc; __entry->blkno = blkno; __entry->bit = bit; ), TP_printk("%d %d %llu %llu %u", __entry->type, __entry->slot, __entry->suballoc, __entry->blkno, __entry->bit) ); TRACE_EVENT(ocfs2_trim_extent, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), TP_ARGS(sb, blk, count), TP_STRUCT__entry( __field(int, dev_major) __field(int, dev_minor) __field(unsigned long long, blk) __field(__u64, count) ), TP_fast_assign( __entry->dev_major = MAJOR(sb->s_dev); __entry->dev_minor = MINOR(sb->s_dev); __entry->blk = blk; __entry->count = count; ), TP_printk("%d %d %llu %llu", __entry->dev_major, __entry->dev_minor, __entry->blk, __entry->count) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_mainbm); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs); /* End of trace events for fs/ocfs2/alloc.c. */ /* Trace events for fs/ocfs2/localalloc.c. */ DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_la_set_sizes); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_alloc_should_use_local); DEFINE_OCFS2_INT_EVENT(ocfs2_load_local_alloc); DEFINE_OCFS2_INT_EVENT(ocfs2_begin_local_alloc_recovery); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_reserve_local_alloc_bits); DEFINE_OCFS2_UINT_EVENT(ocfs2_local_alloc_count_bits); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits_search_bitmap); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_sync_local_to_main); TRACE_EVENT(ocfs2_sync_local_to_main_free, TP_PROTO(int count, int bit, unsigned long long start_blk, unsigned long long blkno), TP_ARGS(count, bit, start_blk, blkno), TP_STRUCT__entry( __field(int, count) __field(int, bit) __field(unsigned long long, start_blk) __field(unsigned long long, blkno) ), TP_fast_assign( __entry->count = count; __entry->bit = bit; __entry->start_blk = start_blk; __entry->blkno = blkno; ), TP_printk("%d %d %llu %llu", __entry->count, __entry->bit, __entry->start_blk, __entry->blkno) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_new_window); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_local_alloc_new_window_result); /* End of trace events for fs/ocfs2/localalloc.c. */ /* Trace events for fs/ocfs2/resize.c. */ DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_update_last_group_and_inode); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_group_extend); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_group_add); /* End of trace events for fs/ocfs2/resize.c. */ /* Trace events for fs/ocfs2/suballoc.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_group_descriptor); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_contig); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_discontig); DEFINE_OCFS2_ULL_EVENT(ocfs2_block_group_alloc); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_nospc); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_no_new_group); DEFINE_OCFS2_ULL_EVENT(ocfs2_reserve_new_inode_new_group); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_set_bits); TRACE_EVENT(ocfs2_relink_block_group, TP_PROTO(unsigned long long i_blkno, unsigned int chain, unsigned long long bg_blkno, unsigned long long prev_blkno), TP_ARGS(i_blkno, chain, bg_blkno, prev_blkno), TP_STRUCT__entry( __field(unsigned long long, i_blkno) __field(unsigned int, chain) __field(unsigned long long, bg_blkno) __field(unsigned long long, prev_blkno) ), TP_fast_assign( __entry->i_blkno = i_blkno; __entry->chain = chain; __entry->bg_blkno = bg_blkno; __entry->prev_blkno = prev_blkno; ), TP_printk("%llu %u %llu %llu", __entry->i_blkno, __entry->chain, __entry->bg_blkno, __entry->prev_blkno) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_cluster_group_search_wrong_max_bits); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cluster_group_search_max_block); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_block_group_search_max_block); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_search_chain_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_succ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_end); DEFINE_OCFS2_UINT_EVENT(ocfs2_claim_suballoc_bits); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_claim_new_inode_at_loc); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_clear_bits); TRACE_EVENT(ocfs2_free_suballoc_bits, TP_PROTO(unsigned long long inode, unsigned long long group, unsigned int start_bit, unsigned int count), TP_ARGS(inode, group, start_bit, count), TP_STRUCT__entry( __field(unsigned long long, inode) __field(unsigned long long, group) __field(unsigned int, start_bit) __field(unsigned int, count) ), TP_fast_assign( __entry->inode = inode; __entry->group = group; __entry->start_bit = start_bit; __entry->count = count; ), TP_printk("%llu %llu %u %u", __entry->inode, __entry->group, __entry->start_bit, __entry->count) ); TRACE_EVENT(ocfs2_free_clusters, TP_PROTO(unsigned long long bg_blkno, unsigned long long start_blk, unsigned int start_bit, unsigned int count), TP_ARGS(bg_blkno, start_blk, start_bit, count), TP_STRUCT__entry( __field(unsigned long long, bg_blkno) __field(unsigned long long, start_blk) __field(unsigned int, start_bit) __field(unsigned int, count) ), TP_fast_assign( __entry->bg_blkno = bg_blkno; __entry->start_blk = start_blk; __entry->start_bit = start_bit; __entry->count = count; ), TP_printk("%llu %llu %u %u", __entry->bg_blkno, __entry->start_blk, __entry->start_bit, __entry->count) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_get_suballoc_slot_bit); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_test_suballoc_bit); DEFINE_OCFS2_ULL_EVENT(ocfs2_test_inode_bit); /* End of trace events for fs/ocfs2/suballoc.c. */ /* Trace events for fs/ocfs2/refcounttree.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_refcount_block); DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_refcount_trees); DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree); DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree_blkno); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_change_refcount_rec); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_expand_inline_ref_root); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_divide_leaf_refcount_block); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_new_leaf_refcount_block); DECLARE_EVENT_CLASS(ocfs2__refcount_tree_ops, TP_PROTO(unsigned long long blkno, int index, unsigned long long cpos, unsigned int clusters, unsigned int refcount), TP_ARGS(blkno, index, cpos, clusters, refcount), TP_STRUCT__entry( __field(unsigned long long, blkno) __field(int, index) __field(unsigned long long, cpos) __field(unsigned int, clusters) __field(unsigned int, refcount) ), TP_fast_assign( __entry->blkno = blkno; __entry->index = index; __entry->cpos = cpos; __entry->clusters = clusters; __entry->refcount = refcount; ), TP_printk("%llu %d %llu %u %u", __entry->blkno, __entry->index, __entry->cpos, __entry->clusters, __entry->refcount) ); #define DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(name) \ DEFINE_EVENT(ocfs2__refcount_tree_ops, name, \ TP_PROTO(unsigned long long blkno, int index, \ unsigned long long cpos, \ unsigned int count, unsigned int refcount), \ TP_ARGS(blkno, index, cpos, count, refcount)) DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_insert_refcount_rec); TRACE_EVENT(ocfs2_split_refcount_rec, TP_PROTO(unsigned long long cpos, unsigned int clusters, unsigned int refcount, unsigned long long split_cpos, unsigned int split_clusters, unsigned int split_refcount), TP_ARGS(cpos, clusters, refcount, split_cpos, split_clusters, split_refcount), TP_STRUCT__entry( __field(unsigned long long, cpos) __field(unsigned int, clusters) __field(unsigned int, refcount) __field(unsigned long long, split_cpos) __field(unsigned int, split_clusters) __field(unsigned int, split_refcount) ), TP_fast_assign( __entry->cpos = cpos; __entry->clusters = clusters; __entry->refcount = refcount; __entry->split_cpos = split_cpos; __entry->split_clusters = split_clusters; __entry->split_refcount = split_refcount; ), TP_printk("%llu %u %u %llu %u %u", __entry->cpos, __entry->clusters, __entry->refcount, __entry->split_cpos, __entry->split_clusters, __entry->split_refcount) ); DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_split_refcount_rec_insert); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_increase_refcount_begin); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_change); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_increase_refcount_insert); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_split); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_remove_refcount_extent); DEFINE_OCFS2_ULL_EVENT(ocfs2_restore_refcount_block); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_decrease_refcount_rec); TRACE_EVENT(ocfs2_decrease_refcount, TP_PROTO(unsigned long long owner, unsigned long long cpos, unsigned int len, int delete), TP_ARGS(owner, cpos, len, delete), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned long long, cpos) __field(unsigned int, len) __field(int, delete) ), TP_fast_assign( __entry->owner = owner; __entry->cpos = cpos; __entry->len = len; __entry->delete = delete; ), TP_printk("%llu %llu %u %d", __entry->owner, __entry->cpos, __entry->len, __entry->delete) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_mark_extent_refcounted); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_calc_refcount_meta_credits); TRACE_EVENT(ocfs2_calc_refcount_meta_credits_iterate, TP_PROTO(int recs_add, unsigned long long cpos, unsigned int clusters, unsigned long long r_cpos, unsigned int r_clusters, unsigned int refcount, int index), TP_ARGS(recs_add, cpos, clusters, r_cpos, r_clusters, refcount, index), TP_STRUCT__entry( __field(int, recs_add) __field(unsigned long long, cpos) __field(unsigned int, clusters) __field(unsigned long long, r_cpos) __field(unsigned int, r_clusters) __field(unsigned int, refcount) __field(int, index) ), TP_fast_assign( __entry->recs_add = recs_add; __entry->cpos = cpos; __entry->clusters = clusters; __entry->r_cpos = r_cpos; __entry->r_clusters = r_clusters; __entry->refcount = refcount; __entry->index = index; ), TP_printk("%d %llu %u %llu %u %u %d", __entry->recs_add, __entry->cpos, __entry->clusters, __entry->r_cpos, __entry->r_clusters, __entry->refcount, __entry->index) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_add_refcount_flag); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_prepare_refcount_change_for_del); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_lock_refcount_allocators); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_page); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_jbd); TRACE_EVENT(ocfs2_clear_ext_refcount, TP_PROTO(unsigned long long ino, unsigned int cpos, unsigned int len, unsigned int p_cluster, unsigned int ext_flags), TP_ARGS(ino, cpos, len, p_cluster, ext_flags), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, cpos) __field(unsigned int, len) __field(unsigned int, p_cluster) __field(unsigned int, ext_flags) ), TP_fast_assign( __entry->ino = ino; __entry->cpos = cpos; __entry->len = len; __entry->p_cluster = p_cluster; __entry->ext_flags = ext_flags; ), TP_printk("%llu %u %u %u %u", __entry->ino, __entry->cpos, __entry->len, __entry->p_cluster, __entry->ext_flags) ); TRACE_EVENT(ocfs2_replace_clusters, TP_PROTO(unsigned long long ino, unsigned int cpos, unsigned int old, unsigned int new, unsigned int len, unsigned int ext_flags), TP_ARGS(ino, cpos, old, new, len, ext_flags), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, cpos) __field(unsigned int, old) __field(unsigned int, new) __field(unsigned int, len) __field(unsigned int, ext_flags) ), TP_fast_assign( __entry->ino = ino; __entry->cpos = cpos; __entry->old = old; __entry->new = new; __entry->len = len; __entry->ext_flags = ext_flags; ), TP_printk("%llu %u %u %u %u %u", __entry->ino, __entry->cpos, __entry->old, __entry->new, __entry->len, __entry->ext_flags) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_make_clusters_writable); TRACE_EVENT(ocfs2_refcount_cow_hunk, TP_PROTO(unsigned long long ino, unsigned int cpos, unsigned int write_len, unsigned int max_cpos, unsigned int cow_start, unsigned int cow_len), TP_ARGS(ino, cpos, write_len, max_cpos, cow_start, cow_len), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, cpos) __field(unsigned int, write_len) __field(unsigned int, max_cpos) __field(unsigned int, cow_start) __field(unsigned int, cow_len) ), TP_fast_assign( __entry->ino = ino; __entry->cpos = cpos; __entry->write_len = write_len; __entry->max_cpos = max_cpos; __entry->cow_start = cow_start; __entry->cow_len = cow_len; ), TP_printk("%llu %u %u %u %u %u", __entry->ino, __entry->cpos, __entry->write_len, __entry->max_cpos, __entry->cow_start, __entry->cow_len) ); /* End of trace events for fs/ocfs2/refcounttree.c. */ /* Trace events for fs/ocfs2/aops.c. */ DECLARE_EVENT_CLASS(ocfs2__get_block, TP_PROTO(unsigned long long ino, unsigned long long iblock, void *bh_result, int create), TP_ARGS(ino, iblock, bh_result, create), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, iblock) __field(void *, bh_result) __field(int, create) ), TP_fast_assign( __entry->ino = ino; __entry->iblock = iblock; __entry->bh_result = bh_result; __entry->create = create; ), TP_printk("%llu %llu %p %d", __entry->ino, __entry->iblock, __entry->bh_result, __entry->create) ); #define DEFINE_OCFS2_GET_BLOCK_EVENT(name) \ DEFINE_EVENT(ocfs2__get_block, name, \ TP_PROTO(unsigned long long ino, unsigned long long iblock, \ void *bh_result, int create), \ TP_ARGS(ino, iblock, bh_result, create)) DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_symlink_get_block); DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_get_block); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_get_block_end); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_readpage); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_bmap); TRACE_EVENT(ocfs2_try_to_write_inline_data, TP_PROTO(unsigned long long ino, unsigned int len, unsigned long long pos, unsigned int flags), TP_ARGS(ino, len, pos, flags), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, len) __field(unsigned long long, pos) __field(unsigned int, flags) ), TP_fast_assign( __entry->ino = ino; __entry->len = len; __entry->pos = pos; __entry->flags = flags; ), TP_printk("%llu %u %llu 0x%x", __entry->ino, __entry->len, __entry->pos, __entry->flags) ); TRACE_EVENT(ocfs2_write_begin_nolock, TP_PROTO(unsigned long long ino, long long i_size, unsigned int i_clusters, unsigned long long pos, unsigned int len, unsigned int flags, void *page, unsigned int clusters, unsigned int extents_to_split), TP_ARGS(ino, i_size, i_clusters, pos, len, flags, page, clusters, extents_to_split), TP_STRUCT__entry( __field(unsigned long long, ino) __field(long long, i_size) __field(unsigned int, i_clusters) __field(unsigned long long, pos) __field(unsigned int, len) __field(unsigned int, flags) __field(void *, page) __field(unsigned int, clusters) __field(unsigned int, extents_to_split) ), TP_fast_assign( __entry->ino = ino; __entry->i_size = i_size; __entry->i_clusters = i_clusters; __entry->pos = pos; __entry->len = len; __entry->flags = flags; __entry->page = page; __entry->clusters = clusters; __entry->extents_to_split = extents_to_split; ), TP_printk("%llu %lld %u %llu %u %u %p %u %u", __entry->ino, __entry->i_size, __entry->i_clusters, __entry->pos, __entry->len, __entry->flags, __entry->page, __entry->clusters, __entry->extents_to_split) ); TRACE_EVENT(ocfs2_write_end_inline, TP_PROTO(unsigned long long ino, unsigned long long pos, unsigned int copied, unsigned int id_count, unsigned int features), TP_ARGS(ino, pos, copied, id_count, features), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, pos) __field(unsigned int, copied) __field(unsigned int, id_count) __field(unsigned int, features) ), TP_fast_assign( __entry->ino = ino; __entry->pos = pos; __entry->copied = copied; __entry->id_count = id_count; __entry->features = features; ), TP_printk("%llu %llu %u %u %u", __entry->ino, __entry->pos, __entry->copied, __entry->id_count, __entry->features) ); /* End of trace events for fs/ocfs2/aops.c. */ /* Trace events for fs/ocfs2/mmap.c. */ TRACE_EVENT(ocfs2_fault, TP_PROTO(unsigned long long ino, void *area, void *page, unsigned long pgoff), TP_ARGS(ino, area, page, pgoff), TP_STRUCT__entry( __field(unsigned long long, ino) __field(void *, area) __field(void *, page) __field(unsigned long, pgoff) ), TP_fast_assign( __entry->ino = ino; __entry->area = area; __entry->page = page; __entry->pgoff = pgoff; ), TP_printk("%llu %p %p %lu", __entry->ino, __entry->area, __entry->page, __entry->pgoff) ); /* End of trace events for fs/ocfs2/mmap.c. */ /* Trace events for fs/ocfs2/file.c. */ DECLARE_EVENT_CLASS(ocfs2__file_ops, TP_PROTO(void *inode, void *file, void *dentry, unsigned long long ino, unsigned int d_len, const unsigned char *d_name, unsigned long long para), TP_ARGS(inode, file, dentry, ino, d_len, d_name, para), TP_STRUCT__entry( __field(void *, inode) __field(void *, file) __field(void *, dentry) __field(unsigned long long, ino) __field(unsigned int, d_len) __string(d_name, d_name) __field(unsigned long long, para) ), TP_fast_assign( __entry->inode = inode; __entry->file = file; __entry->dentry = dentry; __entry->ino = ino; __entry->d_len = d_len; __assign_str(d_name); __entry->para = para; ), TP_printk("%p %p %p %llu %llu %.*s", __entry->inode, __entry->file, __entry->dentry, __entry->ino, __entry->para, __entry->d_len, __get_str(d_name)) ); #define DEFINE_OCFS2_FILE_OPS(name) \ DEFINE_EVENT(ocfs2__file_ops, name, \ TP_PROTO(void *inode, void *file, void *dentry, \ unsigned long long ino, \ unsigned int d_len, const unsigned char *d_name, \ unsigned long long mode), \ TP_ARGS(inode, file, dentry, ino, d_len, d_name, mode)) DEFINE_OCFS2_FILE_OPS(ocfs2_file_open); DEFINE_OCFS2_FILE_OPS(ocfs2_file_release); DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file); DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter); DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter); DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_truncate_file_error); TRACE_EVENT(ocfs2_extend_allocation, TP_PROTO(unsigned long long ip_blkno, unsigned long long size, unsigned int clusters, unsigned int clusters_to_add, int why, int restart_func), TP_ARGS(ip_blkno, size, clusters, clusters_to_add, why, restart_func), TP_STRUCT__entry( __field(unsigned long long, ip_blkno) __field(unsigned long long, size) __field(unsigned int, clusters) __field(unsigned int, clusters_to_add) __field(int, why) __field(int, restart_func) ), TP_fast_assign( __entry->ip_blkno = ip_blkno; __entry->size = size; __entry->clusters = clusters; __entry->clusters_to_add = clusters_to_add; __entry->why = why; __entry->restart_func = restart_func; ), TP_printk("%llu %llu %u %u %d %d", __entry->ip_blkno, __entry->size, __entry->clusters, __entry->clusters_to_add, __entry->why, __entry->restart_func) ); TRACE_EVENT(ocfs2_extend_allocation_end, TP_PROTO(unsigned long long ino, unsigned int di_clusters, unsigned long long di_size, unsigned int ip_clusters, unsigned long long i_size), TP_ARGS(ino, di_clusters, di_size, ip_clusters, i_size), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, di_clusters) __field(unsigned long long, di_size) __field(unsigned int, ip_clusters) __field(unsigned long long, i_size) ), TP_fast_assign( __entry->ino = ino; __entry->di_clusters = di_clusters; __entry->di_size = di_size; __entry->ip_clusters = ip_clusters; __entry->i_size = i_size; ), TP_printk("%llu %u %llu %u %llu", __entry->ino, __entry->di_clusters, __entry->di_size, __entry->ip_clusters, __entry->i_size) ); TRACE_EVENT(ocfs2_write_zero_page, TP_PROTO(unsigned long long ino, unsigned long long abs_from, unsigned long long abs_to, unsigned long index, unsigned int zero_from, unsigned int zero_to), TP_ARGS(ino, abs_from, abs_to, index, zero_from, zero_to), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, abs_from) __field(unsigned long long, abs_to) __field(unsigned long, index) __field(unsigned int, zero_from) __field(unsigned int, zero_to) ), TP_fast_assign( __entry->ino = ino; __entry->abs_from = abs_from; __entry->abs_to = abs_to; __entry->index = index; __entry->zero_from = zero_from; __entry->zero_to = zero_to; ), TP_printk("%llu %llu %llu %lu %u %u", __entry->ino, __entry->abs_from, __entry->abs_to, __entry->index, __entry->zero_from, __entry->zero_to) ); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend_range); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend); TRACE_EVENT(ocfs2_setattr, TP_PROTO(void *inode, void *dentry, unsigned long long ino, unsigned int d_len, const unsigned char *d_name, unsigned int ia_valid, unsigned int ia_mode, unsigned int ia_uid, unsigned int ia_gid), TP_ARGS(inode, dentry, ino, d_len, d_name, ia_valid, ia_mode, ia_uid, ia_gid), TP_STRUCT__entry( __field(void *, inode) __field(void *, dentry) __field(unsigned long long, ino) __field(unsigned int, d_len) __string(d_name, d_name) __field(unsigned int, ia_valid) __field(unsigned int, ia_mode) __field(unsigned int, ia_uid) __field(unsigned int, ia_gid) ), TP_fast_assign( __entry->inode = inode; __entry->dentry = dentry; __entry->ino = ino; __entry->d_len = d_len; __assign_str(d_name); __entry->ia_valid = ia_valid; __entry->ia_mode = ia_mode; __entry->ia_uid = ia_uid; __entry->ia_gid = ia_gid; ), TP_printk("%p %p %llu %.*s %u %u %u %u", __entry->inode, __entry->dentry, __entry->ino, __entry->d_len, __get_str(d_name), __entry->ia_valid, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_write_remove_suid); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_partial_clusters); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range1); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range2); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range); TRACE_EVENT(ocfs2_prepare_inode_for_write, TP_PROTO(unsigned long long ino, unsigned long long saved_pos, unsigned long count, int wait), TP_ARGS(ino, saved_pos, count, wait), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, saved_pos) __field(unsigned long, count) __field(int, wait) ), TP_fast_assign( __entry->ino = ino; __entry->saved_pos = saved_pos; __entry->count = count; __entry->wait = wait; ), TP_printk("%llu %llu %lu %d", __entry->ino, __entry->saved_pos, __entry->count, __entry->wait) ); DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret); DEFINE_OCFS2_INT_EVENT(filemap_splice_read_ret); /* End of trace events for fs/ocfs2/file.c. */ /* Trace events for fs/ocfs2/inode.c. */ TRACE_EVENT(ocfs2_iget_begin, TP_PROTO(unsigned long long ino, unsigned int flags, int sysfile_type), TP_ARGS(ino, flags, sysfile_type), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, flags) __field(int, sysfile_type) ), TP_fast_assign( __entry->ino = ino; __entry->flags = flags; __entry->sysfile_type = sysfile_type; ), TP_printk("%llu %u %d", __entry->ino, __entry->flags, __entry->sysfile_type) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_iget5_locked); TRACE_EVENT(ocfs2_iget_end, TP_PROTO(void *inode, unsigned long long ino), TP_ARGS(inode, ino), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, ino) ), TP_fast_assign( __entry->inode = inode; __entry->ino = ino; ), TP_printk("%p %llu", __entry->inode, __entry->ino) ); TRACE_EVENT(ocfs2_find_actor, TP_PROTO(void *inode, unsigned long long ino, void *args, unsigned long long fi_blkno), TP_ARGS(inode, ino, args, fi_blkno), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, ino) __field(void *, args) __field(unsigned long long, fi_blkno) ), TP_fast_assign( __entry->inode = inode; __entry->ino = ino; __entry->args = args; __entry->fi_blkno = fi_blkno; ), TP_printk("%p %llu %p %llu", __entry->inode, __entry->ino, __entry->args, __entry->fi_blkno) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_populate_inode); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_read_locked_inode); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_check_orphan_recovery_state); DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_inode_block); DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_validate_inode_block); DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_repair_inode_block); TRACE_EVENT(ocfs2_inode_is_valid_to_delete, TP_PROTO(void *task, void *dc_task, unsigned long long ino, unsigned int flags), TP_ARGS(task, dc_task, ino, flags), TP_STRUCT__entry( __field(void *, task) __field(void *, dc_task) __field(unsigned long long, ino) __field(unsigned int, flags) ), TP_fast_assign( __entry->task = task; __entry->dc_task = dc_task; __entry->ino = ino; __entry->flags = flags; ), TP_printk("%p %p %llu %u", __entry->task, __entry->dc_task, __entry->ino, __entry->flags) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_query_inode_wipe_begin); DEFINE_OCFS2_UINT_EVENT(ocfs2_query_inode_wipe_succ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_query_inode_wipe_end); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_cleanup_delete_inode); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_delete_inode); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_clear_inode); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_drop_inode); TRACE_EVENT(ocfs2_inode_revalidate, TP_PROTO(void *inode, unsigned long long ino, unsigned int flags), TP_ARGS(inode, ino, flags), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, ino) __field(unsigned int, flags) ), TP_fast_assign( __entry->inode = inode; __entry->ino = ino; __entry->flags = flags; ), TP_printk("%p %llu %u", __entry->inode, __entry->ino, __entry->flags) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_mark_inode_dirty); /* End of trace events for fs/ocfs2/inode.c. */ /* Trace events for fs/ocfs2/extent_map.c. */ TRACE_EVENT(ocfs2_read_virt_blocks, TP_PROTO(void *inode, unsigned long long vblock, int nr, void *bhs, unsigned int flags, void *validate), TP_ARGS(inode, vblock, nr, bhs, flags, validate), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, vblock) __field(int, nr) __field(void *, bhs) __field(unsigned int, flags) __field(void *, validate) ), TP_fast_assign( __entry->inode = inode; __entry->vblock = vblock; __entry->nr = nr; __entry->bhs = bhs; __entry->flags = flags; __entry->validate = validate; ), TP_printk("%p %llu %d %p %x %p", __entry->inode, __entry->vblock, __entry->nr, __entry->bhs, __entry->flags, __entry->validate) ); /* End of trace events for fs/ocfs2/extent_map.c. */ /* Trace events for fs/ocfs2/slot_map.c. */ DEFINE_OCFS2_UINT_EVENT(ocfs2_refresh_slot_info); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers_block); DEFINE_OCFS2_INT_EVENT(ocfs2_find_slot); /* End of trace events for fs/ocfs2/slot_map.c. */ /* Trace events for fs/ocfs2/heartbeat.c. */ DEFINE_OCFS2_INT_EVENT(ocfs2_do_node_down); /* End of trace events for fs/ocfs2/heartbeat.c. */ /* Trace events for fs/ocfs2/super.c. */ TRACE_EVENT(ocfs2_remount, TP_PROTO(unsigned long s_flags, unsigned long osb_flags, int flags), TP_ARGS(s_flags, osb_flags, flags), TP_STRUCT__entry( __field(unsigned long, s_flags) __field(unsigned long, osb_flags) __field(int, flags) ), TP_fast_assign( __entry->s_flags = s_flags; __entry->osb_flags = osb_flags; __entry->flags = flags; ), TP_printk("%lu %lu %d", __entry->s_flags, __entry->osb_flags, __entry->flags) ); TRACE_EVENT(ocfs2_fill_super, TP_PROTO(void *sb, void *data, int silent), TP_ARGS(sb, data, silent), TP_STRUCT__entry( __field(void *, sb) __field(void *, data) __field(int, silent) ), TP_fast_assign( __entry->sb = sb; __entry->data = data; __entry->silent = silent; ), TP_printk("%p %p %d", __entry->sb, __entry->data, __entry->silent) ); TRACE_EVENT(ocfs2_parse_options, TP_PROTO(int is_remount, char *options), TP_ARGS(is_remount, options), TP_STRUCT__entry( __field(int, is_remount) __string(options, options) ), TP_fast_assign( __entry->is_remount = is_remount; __assign_str(options); ), TP_printk("%d %s", __entry->is_remount, __get_str(options)) ); DEFINE_OCFS2_POINTER_EVENT(ocfs2_put_super); TRACE_EVENT(ocfs2_statfs, TP_PROTO(void *sb, void *buf), TP_ARGS(sb, buf), TP_STRUCT__entry( __field(void *, sb) __field(void *, buf) ), TP_fast_assign( __entry->sb = sb; __entry->buf = buf; ), TP_printk("%p %p", __entry->sb, __entry->buf) ); DEFINE_OCFS2_POINTER_EVENT(ocfs2_dismount_volume); TRACE_EVENT(ocfs2_initialize_super, TP_PROTO(char *label, char *uuid_str, unsigned long long root_dir, unsigned long long system_dir, int cluster_bits), TP_ARGS(label, uuid_str, root_dir, system_dir, cluster_bits), TP_STRUCT__entry( __string(label, label) __string(uuid_str, uuid_str) __field(unsigned long long, root_dir) __field(unsigned long long, system_dir) __field(int, cluster_bits) ), TP_fast_assign( __assign_str(label); __assign_str(uuid_str); __entry->root_dir = root_dir; __entry->system_dir = system_dir; __entry->cluster_bits = cluster_bits; ), TP_printk("%s %s %llu %llu %d", __get_str(label), __get_str(uuid_str), __entry->root_dir, __entry->system_dir, __entry->cluster_bits) ); /* End of trace events for fs/ocfs2/super.c. */ /* Trace events for fs/ocfs2/xattr.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_xattr_block); DEFINE_OCFS2_UINT_EVENT(ocfs2_xattr_extend_allocation); TRACE_EVENT(ocfs2_init_xattr_set_ctxt, TP_PROTO(const char *name, int meta, int clusters, int credits), TP_ARGS(name, meta, clusters, credits), TP_STRUCT__entry( __string(name, name) __field(int, meta) __field(int, clusters) __field(int, credits) ), TP_fast_assign( __assign_str(name); __entry->meta = meta; __entry->clusters = clusters; __entry->credits = credits; ), TP_printk("%s %d %d %d", __get_str(name), __entry->meta, __entry->clusters, __entry->credits) ); DECLARE_EVENT_CLASS(ocfs2__xattr_find, TP_PROTO(unsigned long long ino, const char *name, int name_index, unsigned int hash, unsigned long long location, int xe_index), TP_ARGS(ino, name, name_index, hash, location, xe_index), TP_STRUCT__entry( __field(unsigned long long, ino) __string(name, name) __field(int, name_index) __field(unsigned int, hash) __field(unsigned long long, location) __field(int, xe_index) ), TP_fast_assign( __entry->ino = ino; __assign_str(name); __entry->name_index = name_index; __entry->hash = hash; __entry->location = location; __entry->xe_index = xe_index; ), TP_printk("%llu %s %d %u %llu %d", __entry->ino, __get_str(name), __entry->name_index, __entry->hash, __entry->location, __entry->xe_index) ); #define DEFINE_OCFS2_XATTR_FIND_EVENT(name) \ DEFINE_EVENT(ocfs2__xattr_find, name, \ TP_PROTO(unsigned long long ino, const char *name, int name_index, \ unsigned int hash, unsigned long long bucket, \ int xe_index), \ TP_ARGS(ino, name, name_index, hash, bucket, xe_index)) DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_bucket_find); DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find); DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find_rec); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_iterate_xattr_buckets); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_iterate_xattr_bucket); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cp_xattr_block_to_bucket_begin); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cp_xattr_block_to_bucket_end); DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block_begin); DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_defrag_xattr_bucket); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_bucket_cross_cluster); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_divide_xattr_bucket_begin); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_divide_xattr_bucket_move); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_cp_xattr_bucket); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_buckets); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_adjust_xattr_cross_cluster); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_add_new_xattr_cluster); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_insert); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_extend_xattr_bucket); DEFINE_OCFS2_ULL_EVENT(ocfs2_add_new_xattr_bucket); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_xattr_bucket_value_truncate); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_rm_xattr_cluster); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_header); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_create_empty_xattr_block); DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_bucket); DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_index_block); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_xattr_bucket_value_refcount); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_reflink_xattr_buckets); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_rec); /* End of trace events for fs/ocfs2/xattr.c. */ /* Trace events for fs/ocfs2/reservations.c. */ DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_insert); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_begin); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_end); TRACE_EVENT(ocfs2_resv_find_window_begin, TP_PROTO(unsigned int r_start, unsigned int r_end, unsigned int goal, unsigned int wanted, int empty_root), TP_ARGS(r_start, r_end, goal, wanted, empty_root), TP_STRUCT__entry( __field(unsigned int, r_start) __field(unsigned int, r_end) __field(unsigned int, goal) __field(unsigned int, wanted) __field(int, empty_root) ), TP_fast_assign( __entry->r_start = r_start; __entry->r_end = r_end; __entry->goal = goal; __entry->wanted = wanted; __entry->empty_root = empty_root; ), TP_printk("%u %u %u %u %d", __entry->r_start, __entry->r_end, __entry->goal, __entry->wanted, __entry->empty_root) ); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_find_window_prev); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_resv_find_window_next); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cannibalize_resv_begin); TRACE_EVENT(ocfs2_cannibalize_resv_end, TP_PROTO(unsigned int start, unsigned int end, unsigned int len, unsigned int last_start, unsigned int last_len), TP_ARGS(start, end, len, last_start, last_len), TP_STRUCT__entry( __field(unsigned int, start) __field(unsigned int, end) __field(unsigned int, len) __field(unsigned int, last_start) __field(unsigned int, last_len) ), TP_fast_assign( __entry->start = start; __entry->end = end; __entry->len = len; __entry->last_start = last_start; __entry->last_len = last_len; ), TP_printk("%u %u %u %u %u", __entry->start, __entry->end, __entry->len, __entry->last_start, __entry->last_len) ); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_resv_bits); TRACE_EVENT(ocfs2_resmap_claimed_bits_begin, TP_PROTO(unsigned int cstart, unsigned int cend, unsigned int clen, unsigned int r_start, unsigned int r_end, unsigned int r_len, unsigned int last_start, unsigned int last_len), TP_ARGS(cstart, cend, clen, r_start, r_end, r_len, last_start, last_len), TP_STRUCT__entry( __field(unsigned int, cstart) __field(unsigned int, cend) __field(unsigned int, clen) __field(unsigned int, r_start) __field(unsigned int, r_end) __field(unsigned int, r_len) __field(unsigned int, last_start) __field(unsigned int, last_len) ), TP_fast_assign( __entry->cstart = cstart; __entry->cend = cend; __entry->clen = clen; __entry->r_start = r_start; __entry->r_end = r_end; __entry->r_len = r_len; __entry->last_start = last_start; __entry->last_len = last_len; ), TP_printk("%u %u %u %u %u %u %u %u", __entry->cstart, __entry->cend, __entry->clen, __entry->r_start, __entry->r_end, __entry->r_len, __entry->last_start, __entry->last_len) ); TRACE_EVENT(ocfs2_resmap_claimed_bits_end, TP_PROTO(unsigned int start, unsigned int end, unsigned int len, unsigned int last_start, unsigned int last_len), TP_ARGS(start, end, len, last_start, last_len), TP_STRUCT__entry( __field(unsigned int, start) __field(unsigned int, end) __field(unsigned int, len) __field(unsigned int, last_start) __field(unsigned int, last_len) ), TP_fast_assign( __entry->start = start; __entry->end = end; __entry->len = len; __entry->last_start = last_start; __entry->last_len = last_len; ), TP_printk("%u %u %u %u %u", __entry->start, __entry->end, __entry->len, __entry->last_start, __entry->last_len) ); /* End of trace events for fs/ocfs2/reservations.c. */ /* Trace events for fs/ocfs2/quota_local.c. */ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_recover_local_quota_file); DEFINE_OCFS2_INT_EVENT(ocfs2_finish_quota_recovery); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(olq_set_dquot); /* End of trace events for fs/ocfs2/quota_local.c. */ /* Trace events for fs/ocfs2/quota_global.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_quota_block); TRACE_EVENT(ocfs2_sync_dquot, TP_PROTO(unsigned int dq_id, long long dqb_curspace, long long spacechange, long long curinodes, long long inodechange), TP_ARGS(dq_id, dqb_curspace, spacechange, curinodes, inodechange), TP_STRUCT__entry( __field(unsigned int, dq_id) __field(long long, dqb_curspace) __field(long long, spacechange) __field(long long, curinodes) __field(long long, inodechange) ), TP_fast_assign( __entry->dq_id = dq_id; __entry->dqb_curspace = dqb_curspace; __entry->spacechange = spacechange; __entry->curinodes = curinodes; __entry->inodechange = inodechange; ), TP_printk("%u %lld %lld %lld %lld", __entry->dq_id, __entry->dqb_curspace, __entry->spacechange, __entry->curinodes, __entry->inodechange) ); TRACE_EVENT(ocfs2_sync_dquot_helper, TP_PROTO(unsigned int dq_id, unsigned int dq_type, unsigned long type, const char *s_id), TP_ARGS(dq_id, dq_type, type, s_id), TP_STRUCT__entry( __field(unsigned int, dq_id) __field(unsigned int, dq_type) __field(unsigned long, type) __string(s_id, s_id) ), TP_fast_assign( __entry->dq_id = dq_id; __entry->dq_type = dq_type; __entry->type = type; __assign_str(s_id); ), TP_printk("%u %u %lu %s", __entry->dq_id, __entry->dq_type, __entry->type, __get_str(s_id)) ); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_write_dquot); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_release_dquot); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_acquire_dquot); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_get_next_id); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_mark_dquot_dirty); /* End of trace events for fs/ocfs2/quota_global.c. */ /* Trace events for fs/ocfs2/dir.c. */ DEFINE_OCFS2_INT_EVENT(ocfs2_search_dirblock); DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_dir_block); DEFINE_OCFS2_POINTER_EVENT(ocfs2_find_entry_el); TRACE_EVENT(ocfs2_dx_dir_search, TP_PROTO(unsigned long long ino, int namelen, const char *name, unsigned int major_hash, unsigned int minor_hash, unsigned long long blkno), TP_ARGS(ino, namelen, name, major_hash, minor_hash, blkno), TP_STRUCT__entry( __field(unsigned long long, ino) __field(int, namelen) __string(name, name) __field(unsigned int, major_hash) __field(unsigned int,minor_hash) __field(unsigned long long, blkno) ), TP_fast_assign( __entry->ino = ino; __entry->namelen = namelen; __assign_str(name); __entry->major_hash = major_hash; __entry->minor_hash = minor_hash; __entry->blkno = blkno; ), TP_printk("%llu %.*s %u %u %llu", __entry->ino, __entry->namelen, __get_str(name), __entry->major_hash, __entry->minor_hash, __entry->blkno) ); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_dx_dir_search_leaf_info); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_delete_entry_dx); DEFINE_OCFS2_ULL_EVENT(ocfs2_readdir); TRACE_EVENT(ocfs2_find_files_on_disk, TP_PROTO(int namelen, const char *name, void *blkno, unsigned long long dir), TP_ARGS(namelen, name, blkno, dir), TP_STRUCT__entry( __field(int, namelen) __string(name, name) __field(void *, blkno) __field(unsigned long long, dir) ), TP_fast_assign( __entry->namelen = namelen; __assign_str(name); __entry->blkno = blkno; __entry->dir = dir; ), TP_printk("%.*s %p %llu", __entry->namelen, __get_str(name), __entry->blkno, __entry->dir) ); TRACE_EVENT(ocfs2_check_dir_for_entry, TP_PROTO(unsigned long long dir, int namelen, const char *name), TP_ARGS(dir, namelen, name), TP_STRUCT__entry( __field(unsigned long long, dir) __field(int, namelen) __string(name, name) ), TP_fast_assign( __entry->dir = dir; __entry->namelen = namelen; __assign_str(name); ), TP_printk("%llu %.*s", __entry->dir, __entry->namelen, __get_str(name)) ); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_dx_dir_attach_index); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_format_cluster); TRACE_EVENT(ocfs2_dx_dir_index_root_block, TP_PROTO(unsigned long long dir, unsigned int major_hash, unsigned int minor_hash, int namelen, const char *name, unsigned int num_used), TP_ARGS(dir, major_hash, minor_hash, namelen, name, num_used), TP_STRUCT__entry( __field(unsigned long long, dir) __field(unsigned int, major_hash) __field(unsigned int, minor_hash) __field(int, namelen) __string(name, name) __field(unsigned int, num_used) ), TP_fast_assign( __entry->dir = dir; __entry->major_hash = major_hash; __entry->minor_hash = minor_hash; __entry->namelen = namelen; __assign_str(name); __entry->num_used = num_used; ), TP_printk("%llu %x %x %.*s %u", __entry->dir, __entry->major_hash, __entry->minor_hash, __entry->namelen, __get_str(name), __entry->num_used) ); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_extend_dir); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_rebalance); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_dx_dir_rebalance_split); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_prepare_dir_for_insert); /* End of trace events for fs/ocfs2/dir.c. */ /* Trace events for fs/ocfs2/namei.c. */ DECLARE_EVENT_CLASS(ocfs2__dentry_ops, TP_PROTO(void *dir, void *dentry, int name_len, const char *name, unsigned long long dir_blkno, unsigned long long extra), TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra), TP_STRUCT__entry( __field(void *, dir) __field(void *, dentry) __field(int, name_len) __string(name, name) __field(unsigned long long, dir_blkno) __field(unsigned long long, extra) ), TP_fast_assign( __entry->dir = dir; __entry->dentry = dentry; __entry->name_len = name_len; __assign_str(name); __entry->dir_blkno = dir_blkno; __entry->extra = extra; ), TP_printk("%p %p %.*s %llu %llu", __entry->dir, __entry->dentry, __entry->name_len, __get_str(name), __entry->dir_blkno, __entry->extra) ); #define DEFINE_OCFS2_DENTRY_OPS(name) \ DEFINE_EVENT(ocfs2__dentry_ops, name, \ TP_PROTO(void *dir, void *dentry, int name_len, const char *name, \ unsigned long long dir_blkno, unsigned long long extra), \ TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra)) DEFINE_OCFS2_DENTRY_OPS(ocfs2_lookup); DEFINE_OCFS2_DENTRY_OPS(ocfs2_mkdir); DEFINE_OCFS2_DENTRY_OPS(ocfs2_create); DEFINE_OCFS2_DENTRY_OPS(ocfs2_unlink); DEFINE_OCFS2_DENTRY_OPS(ocfs2_symlink_create); DEFINE_OCFS2_DENTRY_OPS(ocfs2_mv_orphaned_inode_to_new); DEFINE_OCFS2_POINTER_EVENT(ocfs2_lookup_ret); TRACE_EVENT(ocfs2_mknod, TP_PROTO(void *dir, void *dentry, int name_len, const char *name, unsigned long long dir_blkno, unsigned long dev, int mode), TP_ARGS(dir, dentry, name_len, name, dir_blkno, dev, mode), TP_STRUCT__entry( __field(void *, dir) __field(void *, dentry) __field(int, name_len) __string(name, name) __field(unsigned long long, dir_blkno) __field(unsigned long, dev) __field(int, mode) ), TP_fast_assign( __entry->dir = dir; __entry->dentry = dentry; __entry->name_len = name_len; __assign_str(name); __entry->dir_blkno = dir_blkno; __entry->dev = dev; __entry->mode = mode; ), TP_printk("%p %p %.*s %llu %lu %d", __entry->dir, __entry->dentry, __entry->name_len, __get_str(name), __entry->dir_blkno, __entry->dev, __entry->mode) ); TRACE_EVENT(ocfs2_link, TP_PROTO(unsigned long long ino, int old_len, const char *old_name, int name_len, const char *name), TP_ARGS(ino, old_len, old_name, name_len, name), TP_STRUCT__entry( __field(unsigned long long, ino) __field(int, old_len) __string(old_name, old_name) __field(int, name_len) __string(name, name) ), TP_fast_assign( __entry->ino = ino; __entry->old_len = old_len; __assign_str(old_name); __entry->name_len = name_len; __assign_str(name); ), TP_printk("%llu %.*s %.*s", __entry->ino, __entry->old_len, __get_str(old_name), __entry->name_len, __get_str(name)) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_unlink_noent); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock_end); TRACE_EVENT(ocfs2_rename, TP_PROTO(void *old_dir, void *old_dentry, void *new_dir, void *new_dentry, int old_len, const char *old_name, int new_len, const char *new_name), TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, old_len, old_name, new_len, new_name), TP_STRUCT__entry( __field(void *, old_dir) __field(void *, old_dentry) __field(void *, new_dir) __field(void *, new_dentry) __field(int, old_len) __string(old_name, old_name) __field(int, new_len) __string(new_name, new_name) ), TP_fast_assign( __entry->old_dir = old_dir; __entry->old_dentry = old_dentry; __entry->new_dir = new_dir; __entry->new_dentry = new_dentry; __entry->old_len = old_len; __assign_str(old_name); __entry->new_len = new_len; __assign_str(new_name); ), TP_printk("%p %p %p %p %.*s %.*s", __entry->old_dir, __entry->old_dentry, __entry->new_dir, __entry->new_dentry, __entry->old_len, __get_str(old_name), __entry->new_len, __get_str(new_name)) ); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted); TRACE_EVENT(ocfs2_rename_target_exists, TP_PROTO(int new_len, const char *new_name), TP_ARGS(new_len, new_name), TP_STRUCT__entry( __field(int, new_len) __string(new_name, new_name) ), TP_fast_assign( __entry->new_len = new_len; __assign_str(new_name); ), TP_printk("%.*s", __entry->new_len, __get_str(new_name)) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_rename_disagree); TRACE_EVENT(ocfs2_rename_over_existing, TP_PROTO(unsigned long long new_blkno, void *new_bh, unsigned long long newdi_blkno), TP_ARGS(new_blkno, new_bh, newdi_blkno), TP_STRUCT__entry( __field(unsigned long long, new_blkno) __field(void *, new_bh) __field(unsigned long long, newdi_blkno) ), TP_fast_assign( __entry->new_blkno = new_blkno; __entry->new_bh = new_bh; __entry->newdi_blkno = newdi_blkno; ), TP_printk("%llu %p %llu", __entry->new_blkno, __entry->new_bh, __entry->newdi_blkno) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_create_symlink_data); TRACE_EVENT(ocfs2_symlink_begin, TP_PROTO(void *dir, void *dentry, const char *symname, int len, const char *name), TP_ARGS(dir, dentry, symname, len, name), TP_STRUCT__entry( __field(void *, dir) __field(void *, dentry) __field(const char *, symname) __field(int, len) __string(name, name) ), TP_fast_assign( __entry->dir = dir; __entry->dentry = dentry; __entry->symname = symname; __entry->len = len; __assign_str(name); ), TP_printk("%p %p %s %.*s", __entry->dir, __entry->dentry, __entry->symname, __entry->len, __get_str(name)) ); TRACE_EVENT(ocfs2_blkno_stringify, TP_PROTO(unsigned long long blkno, const char *name, int namelen), TP_ARGS(blkno, name, namelen), TP_STRUCT__entry( __field(unsigned long long, blkno) __string(name, name) __field(int, namelen) ), TP_fast_assign( __entry->blkno = blkno; __assign_str(name); __entry->namelen = namelen; ), TP_printk("%llu %s %d", __entry->blkno, __get_str(name), __entry->namelen) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_add_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_orphan_add_end); TRACE_EVENT(ocfs2_orphan_del, TP_PROTO(unsigned long long dir, const char *name, int namelen), TP_ARGS(dir, name, namelen), TP_STRUCT__entry( __field(unsigned long long, dir) __string(name, name) __field(int, namelen) ), TP_fast_assign( __entry->dir = dir; __assign_str(name); __entry->namelen = namelen; ), TP_printk("%llu %s %d", __entry->dir, __get_str(name), __entry->namelen) ); /* End of trace events for fs/ocfs2/namei.c. */ /* Trace events for fs/ocfs2/dcache.c. */ TRACE_EVENT(ocfs2_dentry_revalidate, TP_PROTO(void *dentry, int len, const char *name), TP_ARGS(dentry, len, name), TP_STRUCT__entry( __field(void *, dentry) __field(int, len) __string(name, name) ), TP_fast_assign( __entry->dentry = dentry; __entry->len = len; __assign_str(name); ), TP_printk("%p %.*s", __entry->dentry, __entry->len, __get_str(name)) ); TRACE_EVENT(ocfs2_dentry_revalidate_negative, TP_PROTO(int len, const char *name, unsigned long pgen, unsigned long gen), TP_ARGS(len, name, pgen, gen), TP_STRUCT__entry( __field(int, len) __string(name, name) __field(unsigned long, pgen) __field(unsigned long, gen) ), TP_fast_assign( __entry->len = len; __assign_str(name); __entry->pgen = pgen; __entry->gen = gen; ), TP_printk("%.*s %lu %lu", __entry->len, __get_str(name), __entry->pgen, __entry->gen) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_delete); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_dentry_revalidate_orphaned); DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_nofsdata); DEFINE_OCFS2_INT_EVENT(ocfs2_dentry_revalidate_ret); TRACE_EVENT(ocfs2_find_local_alias, TP_PROTO(int len, const char *name), TP_ARGS(len, name), TP_STRUCT__entry( __field(int, len) __string(name, name) ), TP_fast_assign( __entry->len = len; __assign_str(name); ), TP_printk("%.*s", __entry->len, __get_str(name)) ); TRACE_EVENT(ocfs2_dentry_attach_lock, TP_PROTO(int len, const char *name, unsigned long long parent, void *fsdata), TP_ARGS(len, name, parent, fsdata), TP_STRUCT__entry( __field(int, len) __string(name, name) __field(unsigned long long, parent) __field(void *, fsdata) ), TP_fast_assign( __entry->len = len; __assign_str(name); __entry->parent = parent; __entry->fsdata = fsdata; ), TP_printk("%.*s %llu %p", __entry->len, __get_str(name), __entry->parent, __entry->fsdata) ); TRACE_EVENT(ocfs2_dentry_attach_lock_found, TP_PROTO(const char *name, unsigned long long parent, unsigned long long ino), TP_ARGS(name, parent, ino), TP_STRUCT__entry( __string(name, name) __field(unsigned long long, parent) __field(unsigned long long, ino) ), TP_fast_assign( __assign_str(name); __entry->parent = parent; __entry->ino = ino; ), TP_printk("%s %llu %llu", __get_str(name), __entry->parent, __entry->ino) ); /* End of trace events for fs/ocfs2/dcache.c. */ /* Trace events for fs/ocfs2/export.c. */ TRACE_EVENT(ocfs2_get_dentry_begin, TP_PROTO(void *sb, void *handle, unsigned long long blkno), TP_ARGS(sb, handle, blkno), TP_STRUCT__entry( __field(void *, sb) __field(void *, handle) __field(unsigned long long, blkno) ), TP_fast_assign( __entry->sb = sb; __entry->handle = handle; __entry->blkno = blkno; ), TP_printk("%p %p %llu", __entry->sb, __entry->handle, __entry->blkno) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_get_dentry_test_bit); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_get_dentry_stale); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_get_dentry_generation); DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_dentry_end); TRACE_EVENT(ocfs2_get_parent, TP_PROTO(void *child, int len, const char *name, unsigned long long ino), TP_ARGS(child, len, name, ino), TP_STRUCT__entry( __field(void *, child) __field(int, len) __string(name, name) __field(unsigned long long, ino) ), TP_fast_assign( __entry->child = child; __entry->len = len; __assign_str(name); __entry->ino = ino; ), TP_printk("%p %.*s %llu", __entry->child, __entry->len, __get_str(name), __entry->ino) ); DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_parent_end); TRACE_EVENT(ocfs2_encode_fh_begin, TP_PROTO(void *dentry, int name_len, const char *name, void *fh, int len, int connectable), TP_ARGS(dentry, name_len, name, fh, len, connectable), TP_STRUCT__entry( __field(void *, dentry) __field(int, name_len) __string(name, name) __field(void *, fh) __field(int, len) __field(int, connectable) ), TP_fast_assign( __entry->dentry = dentry; __entry->name_len = name_len; __assign_str(name); __entry->fh = fh; __entry->len = len; __entry->connectable = connectable; ), TP_printk("%p %.*s %p %d %d", __entry->dentry, __entry->name_len, __get_str(name), __entry->fh, __entry->len, __entry->connectable) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_self); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_parent); DEFINE_OCFS2_INT_EVENT(ocfs2_encode_fh_type); /* End of trace events for fs/ocfs2/export.c. */ /* Trace events for fs/ocfs2/journal.c. */ DEFINE_OCFS2_UINT_EVENT(ocfs2_commit_cache_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits); DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_journal_init); DEFINE_OCFS2_UINT_EVENT(ocfs2_journal_init_maxlen); DEFINE_OCFS2_INT_EVENT(ocfs2_journal_shutdown); DEFINE_OCFS2_POINTER_EVENT(ocfs2_journal_shutdown_wait); DEFINE_OCFS2_ULL_EVENT(ocfs2_complete_recovery); DEFINE_OCFS2_INT_EVENT(ocfs2_complete_recovery_end); TRACE_EVENT(ocfs2_complete_recovery_slot, TP_PROTO(int slot, unsigned long long la_ino, unsigned long long tl_ino, void *qrec), TP_ARGS(slot, la_ino, tl_ino, qrec), TP_STRUCT__entry( __field(int, slot) __field(unsigned long long, la_ino) __field(unsigned long long, tl_ino) __field(void *, qrec) ), TP_fast_assign( __entry->slot = slot; __entry->la_ino = la_ino; __entry->tl_ino = tl_ino; __entry->qrec = qrec; ), TP_printk("%d %llu %llu %p", __entry->slot, __entry->la_ino, __entry->tl_ino, __entry->qrec) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_recovery_thread_node); DEFINE_OCFS2_INT_EVENT(ocfs2_recovery_thread_end); TRACE_EVENT(ocfs2_recovery_thread, TP_PROTO(int node_num, int osb_node_num, int disable, void *recovery_thread, int map_set), TP_ARGS(node_num, osb_node_num, disable, recovery_thread, map_set), TP_STRUCT__entry( __field(int, node_num) __field(int, osb_node_num) __field(int,disable) __field(void *, recovery_thread) __field(int,map_set) ), TP_fast_assign( __entry->node_num = node_num; __entry->osb_node_num = osb_node_num; __entry->disable = disable; __entry->recovery_thread = recovery_thread; __entry->map_set = map_set; ), TP_printk("%d %d %d %p %d", __entry->node_num, __entry->osb_node_num, __entry->disable, __entry->recovery_thread, __entry->map_set) ); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_replay_journal_recovered); DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_lock_err); DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_skip); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_recover_node); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_recover_node_skip); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_mark_dead_nodes); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_begin); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_end); DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_filldir); DEFINE_OCFS2_INT_EVENT(ocfs2_recover_orphans); DEFINE_OCFS2_ULL_EVENT(ocfs2_recover_orphans_iput); DEFINE_OCFS2_INT_EVENT(ocfs2_wait_on_mount); /* End of trace events for fs/ocfs2/journal.c. */ /* Trace events for fs/ocfs2/buffer_head_io.c. */ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_read_blocks_sync); DEFINE_OCFS2_ULL_EVENT(ocfs2_read_blocks_sync_jbd); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_read_blocks_from_disk); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_bh); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_end); TRACE_EVENT(ocfs2_write_block, TP_PROTO(unsigned long long block, void *ci), TP_ARGS(block, ci), TP_STRUCT__entry( __field(unsigned long long, block) __field(void *, ci) ), TP_fast_assign( __entry->block = block; __entry->ci = ci; ), TP_printk("%llu %p", __entry->block, __entry->ci) ); TRACE_EVENT(ocfs2_read_blocks_begin, TP_PROTO(void *ci, unsigned long long block, unsigned int nr, int flags), TP_ARGS(ci, block, nr, flags), TP_STRUCT__entry( __field(void *, ci) __field(unsigned long long, block) __field(unsigned int, nr) __field(int, flags) ), TP_fast_assign( __entry->ci = ci; __entry->block = block; __entry->nr = nr; __entry->flags = flags; ), TP_printk("%p %llu %u %d", __entry->ci, __entry->block, __entry->nr, __entry->flags) ); /* End of trace events for fs/ocfs2/buffer_head_io.c. */ /* Trace events for fs/ocfs2/uptodate.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_copied_metadata_tree); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_metadata_cache_purge); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_buffer_cached_begin); TRACE_EVENT(ocfs2_buffer_cached_end, TP_PROTO(int index, void *item), TP_ARGS(index, item), TP_STRUCT__entry( __field(int, index) __field(void *, item) ), TP_fast_assign( __entry->index = index; __entry->item = item; ), TP_printk("%d %p", __entry->index, __entry->item) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_append_cache_array); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_insert_cache_tree); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_expand_cache); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_set_buffer_uptodate); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_set_buffer_uptodate_begin); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_remove_metadata_array); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_remove_metadata_tree); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_remove_block_from_cache); /* End of trace events for fs/ocfs2/uptodate.c. */ #endif /* _TRACE_OCFS2_H */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE ocfs2_trace #include <trace/define_trace.h>
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 // SPDX-License-Identifier: GPL-2.0-or-later /* A network driver using virtio. * * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation */ //#define DEBUG #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/module.h> #include <linux/virtio.h> #include <linux/virtio_net.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/scatterlist.h> #include <linux/if_vlan.h> #include <linux/slab.h> #include <linux/cpu.h> #include <linux/average.h> #include <linux/filter.h> #include <linux/kernel.h> #include <linux/dim.h> #include <net/route.h> #include <net/xdp.h> #include <net/net_failover.h> #include <net/netdev_rx_queue.h> #include <net/netdev_queues.h> #include <net/xdp_sock_drv.h> static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); static bool csum = true, gso = true, napi_tx = true; module_param(csum, bool, 0444); module_param(gso, bool, 0444); module_param(napi_tx, bool, 0644); /* FIXME: MTU in config. */ #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) #define GOOD_COPY_LEN 128 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) /* Separating two types of XDP xmit */ #define VIRTIO_XDP_TX BIT(0) #define VIRTIO_XDP_REDIR BIT(1) #define VIRTIO_XDP_FLAG BIT(0) #define VIRTIO_ORPHAN_FLAG BIT(1) /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- * term, transient changes in packet size. */ DECLARE_EWMA(pkt_len, 0, 64) #define VIRTNET_DRIVER_VERSION "1.0.0" static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, VIRTIO_NET_F_GUEST_HDRLEN }; #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ (1ULL << VIRTIO_NET_F_GUEST_USO6)) struct virtnet_stat_desc { char desc[ETH_GSTRING_LEN]; size_t offset; size_t qstat_offset; }; struct virtnet_sq_free_stats { u64 packets; u64 bytes; u64 napi_packets; u64 napi_bytes; }; struct virtnet_sq_stats { struct u64_stats_sync syncp; u64_stats_t packets; u64_stats_t bytes; u64_stats_t xdp_tx; u64_stats_t xdp_tx_drops; u64_stats_t kicks; u64_stats_t tx_timeouts; u64_stats_t stop; u64_stats_t wake; }; struct virtnet_rq_stats { struct u64_stats_sync syncp; u64_stats_t packets; u64_stats_t bytes; u64_stats_t drops; u64_stats_t xdp_packets; u64_stats_t xdp_tx; u64_stats_t xdp_redirects; u64_stats_t xdp_drops; u64_stats_t kicks; }; #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} #define VIRTNET_SQ_STAT_QSTAT(name, m) \ { \ name, \ offsetof(struct virtnet_sq_stats, m), \ offsetof(struct netdev_queue_stats_tx, m), \ } #define VIRTNET_RQ_STAT_QSTAT(name, m) \ { \ name, \ offsetof(struct virtnet_rq_stats, m), \ offsetof(struct netdev_queue_stats_rx, m), \ } static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { VIRTNET_SQ_STAT("xdp_tx", xdp_tx), VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), VIRTNET_SQ_STAT("kicks", kicks), VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), }; static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { VIRTNET_RQ_STAT("drops", drops), VIRTNET_RQ_STAT("xdp_packets", xdp_packets), VIRTNET_RQ_STAT("xdp_tx", xdp_tx), VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), VIRTNET_RQ_STAT("xdp_drops", xdp_drops), VIRTNET_RQ_STAT("kicks", kicks), }; static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { VIRTNET_SQ_STAT_QSTAT("packets", packets), VIRTNET_SQ_STAT_QSTAT("bytes", bytes), VIRTNET_SQ_STAT_QSTAT("stop", stop), VIRTNET_SQ_STAT_QSTAT("wake", wake), }; static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { VIRTNET_RQ_STAT_QSTAT("packets", packets), VIRTNET_RQ_STAT_QSTAT("bytes", bytes), }; #define VIRTNET_STATS_DESC_CQ(name) \ {#name, offsetof(struct virtio_net_stats_cvq, name), -1} #define VIRTNET_STATS_DESC_RX(class, name) \ {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} #define VIRTNET_STATS_DESC_TX(class, name) \ {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { VIRTNET_STATS_DESC_CQ(command_num), VIRTNET_STATS_DESC_CQ(ok_num), }; static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { VIRTNET_STATS_DESC_RX(basic, packets), VIRTNET_STATS_DESC_RX(basic, bytes), VIRTNET_STATS_DESC_RX(basic, notifications), VIRTNET_STATS_DESC_RX(basic, interrupts), }; static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { VIRTNET_STATS_DESC_TX(basic, packets), VIRTNET_STATS_DESC_TX(basic, bytes), VIRTNET_STATS_DESC_TX(basic, notifications), VIRTNET_STATS_DESC_TX(basic, interrupts), }; static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { VIRTNET_STATS_DESC_RX(csum, needs_csum), }; static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), }; static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), }; static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), }; #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ { \ #name, \ offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ offsetof(struct netdev_queue_stats_rx, qstat_field), \ } #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ { \ #name, \ offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ offsetof(struct netdev_queue_stats_tx, qstat_field), \ } static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), }; static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), }; static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), }; static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), }; static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), }; static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), }; static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), }; static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), }; #define VIRTNET_Q_TYPE_RX 0 #define VIRTNET_Q_TYPE_TX 1 #define VIRTNET_Q_TYPE_CQ 2 struct virtnet_interrupt_coalesce { u32 max_packets; u32 max_usecs; }; /* The dma information of pages allocated at a time. */ struct virtnet_rq_dma { dma_addr_t addr; u32 ref; u16 len; u16 need_sync; }; /* Internal representation of a send virtqueue */ struct send_queue { /* Virtqueue associated with this send _queue */ struct virtqueue *vq; /* TX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; /* Name of the send queue: output.$index */ char name[16]; struct virtnet_sq_stats stats; struct virtnet_interrupt_coalesce intr_coal; struct napi_struct napi; /* Record whether sq is in reset state. */ bool reset; }; /* Internal representation of a receive virtqueue */ struct receive_queue { /* Virtqueue associated with this receive_queue */ struct virtqueue *vq; struct napi_struct napi; struct bpf_prog __rcu *xdp_prog; struct virtnet_rq_stats stats; /* The number of rx notifications */ u16 calls; /* Is dynamic interrupt moderation enabled? */ bool dim_enabled; /* Used to protect dim_enabled and inter_coal */ struct mutex dim_lock; /* Dynamic Interrupt Moderation */ struct dim dim; u32 packets_in_napi; struct virtnet_interrupt_coalesce intr_coal; /* Chain pages by the private ptr. */ struct page *pages; /* Average packet length for mergeable receive buffers. */ struct ewma_pkt_len mrg_avg_pkt_len; /* Page frag for packet buffer allocation. */ struct page_frag alloc_frag; /* RX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; /* Min single buffer size for mergeable buffers case. */ unsigned int min_buf_len; /* Name of this receive queue: input.$index */ char name[16]; struct xdp_rxq_info xdp_rxq; /* Record the last dma info to free after new pages is allocated. */ struct virtnet_rq_dma *last_dma; struct xsk_buff_pool *xsk_pool; /* xdp rxq used by xsk */ struct xdp_rxq_info xsk_rxq_info; struct xdp_buff **xsk_buffs; /* Do dma by self */ bool do_dma; }; /* This structure can contain rss message with maximum settings for indirection table and keysize * Note, that default structure that describes RSS configuration virtio_net_rss_config * contains same info but can't handle table values. * In any case, structure would be passed to virtio hw through sg_buf split by parts * because table sizes may be differ according to the device configuration. */ #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 struct virtio_net_ctrl_rss { u32 hash_types; u16 indirection_table_mask; u16 unclassified_queue; u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; u16 max_tx_vq; u8 hash_key_length; u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; }; /* Control VQ buffers: protected by the rtnl lock */ struct control_buf { struct virtio_net_ctrl_hdr hdr; virtio_net_ctrl_ack status; }; struct virtnet_info { struct virtio_device *vdev; struct virtqueue *cvq; struct net_device *dev; struct send_queue *sq; struct receive_queue *rq; unsigned int status; /* Max # of queue pairs supported by the device */ u16 max_queue_pairs; /* # of queue pairs currently used by the driver */ u16 curr_queue_pairs; /* # of XDP queue pairs currently used by the driver */ u16 xdp_queue_pairs; /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ bool xdp_enabled; /* I like... big packets and I cannot lie! */ bool big_packets; /* number of sg entries allocated for big packets */ unsigned int big_packets_num_skbfrags; /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; /* Host supports rss and/or hash report */ bool has_rss; bool has_rss_hash_report; u8 rss_key_size; u16 rss_indir_table_size; u32 rss_hash_types_supported; u32 rss_hash_types_saved; struct virtio_net_ctrl_rss rss; /* Has control virtqueue */ bool has_cvq; /* Lock to protect the control VQ */ struct mutex cvq_lock; /* Host can handle any s/g split between our header and packet data */ bool any_header_sg; /* Packet virtio header size */ u8 hdr_len; /* Work struct for delayed refilling if we run low on memory. */ struct delayed_work refill; /* Is delayed refill enabled? */ bool refill_enabled; /* The lock to synchronize the access to refill_enabled */ spinlock_t refill_lock; /* Work struct for config space updates */ struct work_struct config_work; /* Work struct for setting rx mode */ struct work_struct rx_mode_work; /* OK to queue work setting RX mode? */ bool rx_mode_work_enabled; /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; /* CPU hotplug instances for online & dead */ struct hlist_node node; struct hlist_node node_dead; struct control_buf *ctrl; /* Ethtool settings */ u8 duplex; u32 speed; /* Is rx dynamic interrupt moderation enabled? */ bool rx_dim_enabled; /* Interrupt coalescing settings */ struct virtnet_interrupt_coalesce intr_coal_tx; struct virtnet_interrupt_coalesce intr_coal_rx; unsigned long guest_offloads; unsigned long guest_offloads_capable; /* failover when STANDBY feature enabled */ struct failover *failover; u64 device_stats_cap; }; struct padded_vnet_hdr { struct virtio_net_hdr_v1_hash hdr; /* * hdr is in a separate sg buffer, and data sg buffer shares same page * with this header sg. This padding makes next sg 16 byte aligned * after the header. */ char padding[12]; }; struct virtio_net_common_hdr { union { struct virtio_net_hdr hdr; struct virtio_net_hdr_mrg_rxbuf mrg_hdr; struct virtio_net_hdr_v1_hash hash_v1_hdr; }; }; static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, struct net_device *dev, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats); static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, struct sk_buff *skb, u8 flags); static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, struct sk_buff *curr_skb, struct page *page, void *buf, int len, int truesize); static bool is_xdp_frame(void *ptr) { return (unsigned long)ptr & VIRTIO_XDP_FLAG; } static void *xdp_to_ptr(struct xdp_frame *ptr) { return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); } static struct xdp_frame *ptr_to_xdp(void *ptr) { return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); } static bool is_orphan_skb(void *ptr) { return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; } static void *skb_to_ptr(struct sk_buff *skb, bool orphan) { return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); } static struct sk_buff *ptr_to_skb(void *ptr) { return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); } static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, bool in_napi, struct virtnet_sq_free_stats *stats) { unsigned int len; void *ptr; while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { if (!is_xdp_frame(ptr)) { struct sk_buff *skb = ptr_to_skb(ptr); pr_debug("Sent skb %p\n", skb); if (is_orphan_skb(ptr)) { stats->packets++; stats->bytes += skb->len; } else { stats->napi_packets++; stats->napi_bytes += skb->len; } napi_consume_skb(skb, in_napi); } else { struct xdp_frame *frame = ptr_to_xdp(ptr); stats->packets++; stats->bytes += xdp_get_frame_len(frame); xdp_return_frame(frame); } } netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); } /* Converting between virtqueue no. and kernel tx/rx queue no. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq */ static int vq2txq(struct virtqueue *vq) { return (vq->index - 1) / 2; } static int txq2vq(int txq) { return txq * 2 + 1; } static int vq2rxq(struct virtqueue *vq) { return vq->index / 2; } static int rxq2vq(int rxq) { return rxq * 2; } static int vq_type(struct virtnet_info *vi, int qid) { if (qid == vi->max_queue_pairs * 2) return VIRTNET_Q_TYPE_CQ; if (qid % 2) return VIRTNET_Q_TYPE_TX; return VIRTNET_Q_TYPE_RX; } static inline struct virtio_net_common_hdr * skb_vnet_common_hdr(struct sk_buff *skb) { return (struct virtio_net_common_hdr *)skb->cb; } /* * private is used to chain pages for big packets, put the whole * most recent used list in the beginning for reuse */ static void give_pages(struct receive_queue *rq, struct page *page) { struct page *end; /* Find end of list, sew whole thing into vi->rq.pages. */ for (end = page; end->private; end = (struct page *)end->private); end->private = (unsigned long)rq->pages; rq->pages = page; } static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) { struct page *p = rq->pages; if (p) { rq->pages = (struct page *)p->private; /* clear private here, it is used to chain pages */ p->private = 0; } else p = alloc_page(gfp_mask); return p; } static void virtnet_rq_free_buf(struct virtnet_info *vi, struct receive_queue *rq, void *buf) { if (vi->mergeable_rx_bufs) put_page(virt_to_head_page(buf)); else if (vi->big_packets) give_pages(rq, buf); else put_page(virt_to_head_page(buf)); } static void enable_delayed_refill(struct virtnet_info *vi) { spin_lock_bh(&vi->refill_lock); vi->refill_enabled = true; spin_unlock_bh(&vi->refill_lock); } static void disable_delayed_refill(struct virtnet_info *vi) { spin_lock_bh(&vi->refill_lock); vi->refill_enabled = false; spin_unlock_bh(&vi->refill_lock); } static void enable_rx_mode_work(struct virtnet_info *vi) { rtnl_lock(); vi->rx_mode_work_enabled = true; rtnl_unlock(); } static void disable_rx_mode_work(struct virtnet_info *vi) { rtnl_lock(); vi->rx_mode_work_enabled = false; rtnl_unlock(); } static void virtqueue_napi_schedule(struct napi_struct *napi, struct virtqueue *vq) { if (napi_schedule_prep(napi)) { virtqueue_disable_cb(vq); __napi_schedule(napi); } } static bool virtqueue_napi_complete(struct napi_struct *napi, struct virtqueue *vq, int processed) { int opaque; opaque = virtqueue_enable_cb_prepare(vq); if (napi_complete_done(napi, processed)) { if (unlikely(virtqueue_poll(vq, opaque))) virtqueue_napi_schedule(napi, vq); else return true; } else { virtqueue_disable_cb(vq); } return false; } static void skb_xmit_done(struct virtqueue *vq) { struct virtnet_info *vi = vq->vdev->priv; struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; /* Suppress further interrupts. */ virtqueue_disable_cb(vq); if (napi->weight) virtqueue_napi_schedule(napi, vq); else /* We were probably waiting for more output buffers. */ netif_wake_subqueue(vi->dev, vq2txq(vq)); } #define MRG_CTX_HEADER_SHIFT 22 static void *mergeable_len_to_ctx(unsigned int truesize, unsigned int headroom) { return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); } static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) { return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; } static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) { return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); } static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, unsigned int headroom, unsigned int len) { struct sk_buff *skb; skb = build_skb(buf, buflen); if (unlikely(!skb)) return NULL; skb_reserve(skb, headroom); skb_put(skb, len); return skb; } /* Called from bottom half context */ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, unsigned int headroom) { struct sk_buff *skb; struct virtio_net_common_hdr *hdr; unsigned int copy, hdr_len, hdr_padded_len; struct page *page_to_free = NULL; int tailroom, shinfo_size; char *p, *hdr_p, *buf; p = page_address(page) + offset; hdr_p = p; hdr_len = vi->hdr_len; if (vi->mergeable_rx_bufs) hdr_padded_len = hdr_len; else hdr_padded_len = sizeof(struct padded_vnet_hdr); buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; tailroom = truesize - headroom - hdr_padded_len - len; shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { skb = virtnet_build_skb(buf, truesize, p - buf, len); if (unlikely(!skb)) return NULL; page = (struct page *)page->private; if (page) give_pages(rq, page); goto ok; } /* copy small packet so we can reuse these pages for small data */ skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); if (unlikely(!skb)) return NULL; /* Copy all frame if it fits skb->head, otherwise * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. */ if (len <= skb_tailroom(skb)) copy = len; else copy = ETH_HLEN; skb_put_data(skb, p, copy); len -= copy; offset += copy; if (vi->mergeable_rx_bufs) { if (len) skb_add_rx_frag(skb, 0, page, offset, len, truesize); else page_to_free = page; goto ok; } /* * Verify that we can indeed put this data into a skb. * This is here to handle cases when the device erroneously * tries to receive more than is possible. This is usually * the case of a broken device. */ if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { net_dbg_ratelimited("%s: too much data\n", skb->dev->name); dev_kfree_skb(skb); return NULL; } BUG_ON(offset >= PAGE_SIZE); while (len) { unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, frag_size, truesize); len -= frag_size; page = (struct page *)page->private; offset = 0; } if (page) give_pages(rq, page); ok: hdr = skb_vnet_common_hdr(skb); memcpy(hdr, hdr_p, hdr_len); if (page_to_free) put_page(page_to_free); return skb; } static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) { struct page *page = virt_to_head_page(buf); struct virtnet_rq_dma *dma; void *head; int offset; head = page_address(page); dma = head; --dma->ref; if (dma->need_sync && len) { offset = buf - (head + sizeof(*dma)); virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, offset, len, DMA_FROM_DEVICE); } if (dma->ref) return; virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); put_page(page); } static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) { void *buf; buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); if (buf && rq->do_dma) virtnet_rq_unmap(rq, buf, *len); return buf; } static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) { struct virtnet_rq_dma *dma; dma_addr_t addr; u32 offset; void *head; if (!rq->do_dma) { sg_init_one(rq->sg, buf, len); return; } head = page_address(rq->alloc_frag.page); offset = buf - head; dma = head; addr = dma->addr - sizeof(*dma) + offset; sg_init_table(rq->sg, 1); rq->sg[0].dma_address = addr; rq->sg[0].length = len; } static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) { struct page_frag *alloc_frag = &rq->alloc_frag; struct virtnet_rq_dma *dma; void *buf, *head; dma_addr_t addr; if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) return NULL; head = page_address(alloc_frag->page); if (rq->do_dma) { dma = head; /* new pages */ if (!alloc_frag->offset) { if (rq->last_dma) { /* Now, the new page is allocated, the last dma * will not be used. So the dma can be unmapped * if the ref is 0. */ virtnet_rq_unmap(rq, rq->last_dma, 0); rq->last_dma = NULL; } dma->len = alloc_frag->size - sizeof(*dma); addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, dma->len, DMA_FROM_DEVICE, 0); if (virtqueue_dma_mapping_error(rq->vq, addr)) return NULL; dma->addr = addr; dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); /* Add a reference to dma to prevent the entire dma from * being released during error handling. This reference * will be freed after the pages are no longer used. */ get_page(alloc_frag->page); dma->ref = 1; alloc_frag->offset = sizeof(*dma); rq->last_dma = dma; } ++dma->ref; } buf = head + alloc_frag->offset; get_page(alloc_frag->page); alloc_frag->offset += size; return buf; } static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) { struct virtnet_info *vi = vq->vdev->priv; struct receive_queue *rq; int i = vq2rxq(vq); rq = &vi->rq[i]; if (rq->xsk_pool) { xsk_buff_free((struct xdp_buff *)buf); return; } if (rq->do_dma) virtnet_rq_unmap(rq, buf, 0); virtnet_rq_free_buf(vi, rq, buf); } static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, bool in_napi) { struct virtnet_sq_free_stats stats = {0}; __free_old_xmit(sq, txq, in_napi, &stats); /* Avoid overhead when no packets have been processed * happens when called speculatively from start_xmit. */ if (!stats.packets && !stats.napi_packets) return; u64_stats_update_begin(&sq->stats.syncp); u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); u64_stats_update_end(&sq->stats.syncp); } static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) { if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) return false; else if (q < vi->curr_queue_pairs) return true; else return false; } static void check_sq_full_and_disable(struct virtnet_info *vi, struct net_device *dev, struct send_queue *sq) { bool use_napi = sq->napi.weight; int qnum; qnum = sq - vi->sq; /* If running out of space, stop queue to avoid getting packets that we * are then unable to transmit. * An alternative would be to force queuing layer to requeue the skb by * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be * returned in a normal path of operation: it means that driver is not * maintaining the TX queue stop/start state properly, and causes * the stack to do a non-trivial amount of useless work. * Since most packets only take 1 or 2 ring slots, stopping the queue * early means 16 slots are typically wasted. */ if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); netif_tx_stop_queue(txq); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.stop); u64_stats_update_end(&sq->stats.syncp); if (use_napi) { if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) virtqueue_napi_schedule(&sq->napi, sq->vq); } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ free_old_xmit(sq, txq, false); if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { netif_start_subqueue(dev, qnum); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); u64_stats_update_end(&sq->stats.syncp); virtqueue_disable_cb(sq->vq); } } } } static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) { sg->dma_address = addr; sg->length = len; } static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, struct receive_queue *rq, void *buf, u32 len) { struct xdp_buff *xdp; u32 bufsize; xdp = (struct xdp_buff *)buf; bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; if (unlikely(len > bufsize)) { pr_debug("%s: rx error: len %u exceeds truesize %u\n", vi->dev->name, len, bufsize); DEV_STATS_INC(vi->dev, rx_length_errors); xsk_buff_free(xdp); return NULL; } xsk_buff_set_size(xdp, len); xsk_buff_dma_sync_for_cpu(xdp); return xdp; } static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, struct xdp_buff *xdp) { unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; unsigned int size; size = xdp->data_end - xdp->data_hard_start; skb = napi_alloc_skb(&rq->napi, size); if (unlikely(!skb)) { xsk_buff_free(xdp); return NULL; } skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); size = xdp->data_end - xdp->data_meta; memcpy(__skb_put(skb, size), xdp->data_meta, size); if (metasize) { __skb_pull(skb, metasize); skb_metadata_set(skb, metasize); } xsk_buff_free(xdp); return skb; } static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, struct xdp_buff *xdp, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { struct bpf_prog *prog; u32 ret; ret = XDP_PASS; rcu_read_lock(); prog = rcu_dereference(rq->xdp_prog); if (prog) ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); rcu_read_unlock(); switch (ret) { case XDP_PASS: return xsk_construct_skb(rq, xdp); case XDP_TX: case XDP_REDIRECT: return NULL; default: /* drop packet */ xsk_buff_free(xdp); u64_stats_inc(&stats->drops); return NULL; } } static void xsk_drop_follow_bufs(struct net_device *dev, struct receive_queue *rq, u32 num_buf, struct virtnet_rq_stats *stats) { struct xdp_buff *xdp; u32 len; while (num_buf-- > 1) { xdp = virtqueue_get_buf(rq->vq, &len); if (unlikely(!xdp)) { pr_debug("%s: rx error: %d buffers missing\n", dev->name, num_buf); DEV_STATS_INC(dev, rx_length_errors); break; } u64_stats_add(&stats->bytes, len); xsk_buff_free(xdp); } } static int xsk_append_merge_buffer(struct virtnet_info *vi, struct receive_queue *rq, struct sk_buff *head_skb, u32 num_buf, struct virtio_net_hdr_mrg_rxbuf *hdr, struct virtnet_rq_stats *stats) { struct sk_buff *curr_skb; struct xdp_buff *xdp; u32 len, truesize; struct page *page; void *buf; curr_skb = head_skb; while (--num_buf) { buf = virtqueue_get_buf(rq->vq, &len); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", vi->dev->name, num_buf, virtio16_to_cpu(vi->vdev, hdr->num_buffers)); DEV_STATS_INC(vi->dev, rx_length_errors); return -EINVAL; } u64_stats_add(&stats->bytes, len); xdp = buf_to_xdp(vi, rq, buf, len); if (!xdp) goto err; buf = napi_alloc_frag(len); if (!buf) { xsk_buff_free(xdp); goto err; } memcpy(buf, xdp->data - vi->hdr_len, len); xsk_buff_free(xdp); page = virt_to_page(buf); truesize = len; curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) { put_page(page); goto err; } } return 0; err: xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); return -EINVAL; } static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, struct xdp_buff *xdp, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr; struct bpf_prog *prog; struct sk_buff *skb; u32 ret, num_buf; hdr = xdp->data - vi->hdr_len; num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); ret = XDP_PASS; rcu_read_lock(); prog = rcu_dereference(rq->xdp_prog); /* TODO: support multi buffer. */ if (prog && num_buf == 1) ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); rcu_read_unlock(); switch (ret) { case XDP_PASS: skb = xsk_construct_skb(rq, xdp); if (!skb) goto drop_bufs; if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { dev_kfree_skb(skb); goto drop; } return skb; case XDP_TX: case XDP_REDIRECT: return NULL; default: /* drop packet */ xsk_buff_free(xdp); } drop_bufs: xsk_drop_follow_bufs(dev, rq, num_buf, stats); drop: u64_stats_inc(&stats->drops); return NULL; } static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, void *buf, u32 len, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { struct net_device *dev = vi->dev; struct sk_buff *skb = NULL; struct xdp_buff *xdp; u8 flags; len -= vi->hdr_len; u64_stats_add(&stats->bytes, len); xdp = buf_to_xdp(vi, rq, buf, len); if (!xdp) return; if (unlikely(len < ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); DEV_STATS_INC(dev, rx_length_errors); xsk_buff_free(xdp); return; } flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; if (!vi->mergeable_rx_bufs) skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); else skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); if (skb) virtnet_receive_done(vi, rq, skb, flags); } static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, struct xsk_buff_pool *pool, gfp_t gfp) { struct xdp_buff **xsk_buffs; dma_addr_t addr; int err = 0; u32 len, i; int num; xsk_buffs = rq->xsk_buffs; num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); if (!num) return -ENOMEM; len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; for (i = 0; i < num; ++i) { /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. * We assume XDP_PACKET_HEADROOM is larger than hdr->len. * (see function virtnet_xsk_pool_enable) */ addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; sg_init_table(rq->sg, 1); sg_fill_dma(rq->sg, addr, len); err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp); if (err) goto err; } return num; err: for (; i < num; ++i) xsk_buff_free(xsk_buffs[i]); return err; } static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) { struct virtnet_info *vi = netdev_priv(dev); struct send_queue *sq; if (!netif_running(dev)) return -ENETDOWN; if (qid >= vi->curr_queue_pairs) return -EINVAL; sq = &vi->sq[qid]; if (napi_if_scheduled_mark_missed(&sq->napi)) return 0; local_bh_disable(); virtqueue_napi_schedule(&sq->napi, sq->vq); local_bh_enable(); return 0; } static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct send_queue *sq, struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; struct skb_shared_info *shinfo; u8 nr_frags = 0; int err, i; if (unlikely(xdpf->headroom < vi->hdr_len)) return -EOVERFLOW; if (unlikely(xdp_frame_has_frags(xdpf))) { shinfo = xdp_get_shared_info_from_frame(xdpf); nr_frags = shinfo->nr_frags; } /* In wrapping function virtnet_xdp_xmit(), we need to free * up the pending old buffers, where we need to calculate the * position of skb_shared_info in xdp_get_frame_len() and * xdp_return_frame(), which will involve to xdpf->data and * xdpf->headroom. Therefore, we need to update the value of * headroom synchronously here. */ xdpf->headroom -= vi->hdr_len; xdpf->data -= vi->hdr_len; /* Zero header and leave csum up to XDP layers */ hdr = xdpf->data; memset(hdr, 0, vi->hdr_len); xdpf->len += vi->hdr_len; sg_init_table(sq->sg, nr_frags + 1); sg_set_buf(sq->sg, xdpf->data, xdpf->len); for (i = 0; i < nr_frags; i++) { skb_frag_t *frag = &shinfo->frags[i]; sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), skb_frag_size(frag), skb_frag_off(frag)); } err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, xdp_to_ptr(xdpf), GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ return 0; } /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on * the current cpu, so it does not need to be locked. * * Here we use marco instead of inline functions because we have to deal with * three issues at the same time: 1. the choice of sq. 2. judge and execute the * lock/unlock of txq 3. make sparse happy. It is difficult for two inline * functions to perfectly solve these three problems at the same time. */ #define virtnet_xdp_get_sq(vi) ({ \ int cpu = smp_processor_id(); \ struct netdev_queue *txq; \ typeof(vi) v = (vi); \ unsigned int qp; \ \ if (v->curr_queue_pairs > nr_cpu_ids) { \ qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ qp += cpu; \ txq = netdev_get_tx_queue(v->dev, qp); \ __netif_tx_acquire(txq); \ } else { \ qp = cpu % v->curr_queue_pairs; \ txq = netdev_get_tx_queue(v->dev, qp); \ __netif_tx_lock(txq, cpu); \ } \ v->sq + qp; \ }) #define virtnet_xdp_put_sq(vi, q) { \ struct netdev_queue *txq; \ typeof(vi) v = (vi); \ \ txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ if (v->curr_queue_pairs > nr_cpu_ids) \ __netif_tx_release(txq); \ else \ __netif_tx_unlock(txq); \ } static int virtnet_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { struct virtnet_info *vi = netdev_priv(dev); struct virtnet_sq_free_stats stats = {0}; struct receive_queue *rq = vi->rq; struct bpf_prog *xdp_prog; struct send_queue *sq; int nxmit = 0; int kicks = 0; int ret; int i; /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ xdp_prog = rcu_access_pointer(rq->xdp_prog); if (!xdp_prog) return -ENXIO; sq = virtnet_xdp_get_sq(vi); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; goto out; } /* Free up any pending old buffers before queueing new ones. */ __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), false, &stats); for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) break; nxmit++; } ret = nxmit; if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) check_sq_full_and_disable(vi, dev, sq); if (flags & XDP_XMIT_FLUSH) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) kicks = 1; } out: u64_stats_update_begin(&sq->stats.syncp); u64_stats_add(&sq->stats.bytes, stats.bytes); u64_stats_add(&sq->stats.packets, stats.packets); u64_stats_add(&sq->stats.xdp_tx, n); u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); u64_stats_add(&sq->stats.kicks, kicks); u64_stats_update_end(&sq->stats.syncp); virtnet_xdp_put_sq(vi, sq); return ret; } static void put_xdp_frags(struct xdp_buff *xdp) { struct skb_shared_info *shinfo; struct page *xdp_page; int i; if (xdp_buff_has_frags(xdp)) { shinfo = xdp_get_shared_info_from_buff(xdp); for (i = 0; i < shinfo->nr_frags; i++) { xdp_page = skb_frag_page(&shinfo->frags[i]); put_page(xdp_page); } } } static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, struct net_device *dev, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { struct xdp_frame *xdpf; int err; u32 act; act = bpf_prog_run_xdp(xdp_prog, xdp); u64_stats_inc(&stats->xdp_packets); switch (act) { case XDP_PASS: return act; case XDP_TX: u64_stats_inc(&stats->xdp_tx); xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) { netdev_dbg(dev, "convert buff to frame failed for xdp\n"); return XDP_DROP; } err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); if (unlikely(!err)) { xdp_return_frame_rx_napi(xdpf); } else if (unlikely(err < 0)) { trace_xdp_exception(dev, xdp_prog, act); return XDP_DROP; } *xdp_xmit |= VIRTIO_XDP_TX; return act; case XDP_REDIRECT: u64_stats_inc(&stats->xdp_redirects); err = xdp_do_redirect(dev, xdp, xdp_prog); if (err) return XDP_DROP; *xdp_xmit |= VIRTIO_XDP_REDIR; return act; default: bpf_warn_invalid_xdp_action(dev, xdp_prog, act); fallthrough; case XDP_ABORTED: trace_xdp_exception(dev, xdp_prog, act); fallthrough; case XDP_DROP: return XDP_DROP; } } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; } /* We copy the packet for XDP in the following cases: * * 1) Packet is scattered across multiple rx buffers. * 2) Headroom space is insufficient. * * This is inefficient but it's a temporary condition that * we hit right after XDP is enabled and until queue is refilled * with large buffers with sufficient headroom - so it should affect * at most queue size packets. * Afterwards, the conditions to enable * XDP should preclude the underlying device from sending packets * across multiple buffers (num_buf > 1), and we make sure buffers * have enough headroom. */ static struct page *xdp_linearize_page(struct receive_queue *rq, int *num_buf, struct page *p, int offset, int page_off, unsigned int *len) { int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page; if (page_off + *len + tailroom > PAGE_SIZE) return NULL; page = alloc_page(GFP_ATOMIC); if (!page) return NULL; memcpy(page_address(page) + page_off, page_address(p) + offset, *len); page_off += *len; while (--*num_buf) { unsigned int buflen; void *buf; int off; buf = virtnet_rq_get_buf(rq, &buflen, NULL); if (unlikely(!buf)) goto err_buf; p = virt_to_head_page(buf); off = buf - page_address(p); /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ if ((page_off + buflen + tailroom) > PAGE_SIZE) { put_page(p); goto err_buf; } memcpy(page_address(page) + page_off, page_address(p) + off, buflen); page_off += buflen; put_page(p); } /* Headroom does not contribute to packet length */ *len = page_off - XDP_PACKET_HEADROOM; return page; err_buf: __free_pages(page, 0); return NULL; } static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, unsigned int xdp_headroom, void *buf, unsigned int len) { unsigned int header_offset; unsigned int headroom; unsigned int buflen; struct sk_buff *skb; header_offset = VIRTNET_RX_PAD + xdp_headroom; headroom = vi->hdr_len + header_offset; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); skb = virtnet_build_skb(buf, buflen, headroom, len); if (unlikely(!skb)) return NULL; buf += header_offset; memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); return skb; } static struct sk_buff *receive_small_xdp(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, struct bpf_prog *xdp_prog, void *buf, unsigned int xdp_headroom, unsigned int len, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; unsigned int headroom = vi->hdr_len + header_offset; struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; struct page *page = virt_to_head_page(buf); struct page *xdp_page; unsigned int buflen; struct xdp_buff xdp; struct sk_buff *skb; unsigned int metasize = 0; u32 act; if (unlikely(hdr->hdr.gso_type)) goto err_xdp; /* Partially checksummed packets must be dropped. */ if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) goto err_xdp; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { int offset = buf - page_address(page) + header_offset; unsigned int tlen = len + vi->hdr_len; int num_buf = 1; xdp_headroom = virtnet_get_headroom(vi); header_offset = VIRTNET_RX_PAD + xdp_headroom; headroom = vi->hdr_len + header_offset; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); xdp_page = xdp_linearize_page(rq, &num_buf, page, offset, header_offset, &tlen); if (!xdp_page) goto err_xdp; buf = page_address(xdp_page); put_page(page); page = xdp_page; } xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, xdp_headroom, len, true); act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); switch (act) { case XDP_PASS: /* Recalculate length in case bpf program changed it */ len = xdp.data_end - xdp.data; metasize = xdp.data - xdp.data_meta; break; case XDP_TX: case XDP_REDIRECT: goto xdp_xmit; default: goto err_xdp; } skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); if (unlikely(!skb)) goto err; if (metasize) skb_metadata_set(skb, metasize); return skb; err_xdp: u64_stats_inc(&stats->xdp_drops); err: u64_stats_inc(&stats->drops); put_page(page); xdp_xmit: return NULL; } static struct sk_buff *receive_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, void *buf, void *ctx, unsigned int len, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { unsigned int xdp_headroom = (unsigned long)ctx; struct page *page = virt_to_head_page(buf); struct sk_buff *skb; /* We passed the address of virtnet header to virtio-core, * so truncate the padding. */ buf -= VIRTNET_RX_PAD + xdp_headroom; len -= vi->hdr_len; u64_stats_add(&stats->bytes, len); if (unlikely(len > GOOD_PACKET_LEN)) { pr_debug("%s: rx error: len %u exceeds max size %d\n", dev->name, len, GOOD_PACKET_LEN); DEV_STATS_INC(dev, rx_length_errors); goto err; } if (unlikely(vi->xdp_enabled)) { struct bpf_prog *xdp_prog; rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, xdp_headroom, len, xdp_xmit, stats); rcu_read_unlock(); return skb; } rcu_read_unlock(); } skb = receive_small_build_skb(vi, xdp_headroom, buf, len); if (likely(skb)) return skb; err: u64_stats_inc(&stats->drops); put_page(page); return NULL; } static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, void *buf, unsigned int len, struct virtnet_rq_stats *stats) { struct page *page = buf; struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); u64_stats_add(&stats->bytes, len - vi->hdr_len); if (unlikely(!skb)) goto err; return skb; err: u64_stats_inc(&stats->drops); give_pages(rq, page); return NULL; } static void mergeable_buf_free(struct receive_queue *rq, int num_buf, struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page; void *buf; int len; while (num_buf-- > 1) { buf = virtnet_rq_get_buf(rq, &len, NULL); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", dev->name, num_buf); DEV_STATS_INC(dev, rx_length_errors); break; } u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); put_page(page); } } /* Why not use xdp_build_skb_from_frame() ? * XDP core assumes that xdp frags are PAGE_SIZE in length, while in * virtio-net there are 2 points that do not match its requirements: * 1. The size of the prefilled buffer is not fixed before xdp is set. * 2. xdp_build_skb_from_frame() does more checks that we don't need, * like eth_type_trans() (which virtio-net does in receive_buf()). */ static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, struct virtnet_info *vi, struct xdp_buff *xdp, unsigned int xdp_frags_truesz) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); unsigned int headroom, data_len; struct sk_buff *skb; int metasize; u8 nr_frags; if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { pr_debug("Error building skb as missing reserved tailroom for xdp"); return NULL; } if (unlikely(xdp_buff_has_frags(xdp))) nr_frags = sinfo->nr_frags; skb = build_skb(xdp->data_hard_start, xdp->frame_sz); if (unlikely(!skb)) return NULL; headroom = xdp->data - xdp->data_hard_start; data_len = xdp->data_end - xdp->data; skb_reserve(skb, headroom); __skb_put(skb, data_len); metasize = xdp->data - xdp->data_meta; metasize = metasize > 0 ? metasize : 0; if (metasize) skb_metadata_set(skb, metasize); if (unlikely(xdp_buff_has_frags(xdp))) xdp_update_skb_shared_info(skb, nr_frags, sinfo->xdp_frags_size, xdp_frags_truesz, xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } /* TODO: build xdp in big mode */ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, struct xdp_buff *xdp, void *buf, unsigned int len, unsigned int frame_sz, int *num_buf, unsigned int *xdp_frags_truesize, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; unsigned int headroom, tailroom, room; unsigned int truesize, cur_frag_size; struct skb_shared_info *shinfo; unsigned int xdp_frags_truesz = 0; struct page *page; skb_frag_t *frag; int offset; void *ctx; xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); if (!*num_buf) return 0; if (*num_buf > 1) { /* If we want to build multi-buffer xdp, we need * to specify that the flags of xdp_buff have the * XDP_FLAGS_HAS_FRAG bit. */ if (!xdp_buff_has_frags(xdp)) xdp_buff_set_frags_flag(xdp); shinfo = xdp_get_shared_info_from_buff(xdp); shinfo->nr_frags = 0; shinfo->xdp_frags_size = 0; } if (*num_buf > MAX_SKB_FRAGS + 1) return -EINVAL; while (--*num_buf > 0) { buf = virtnet_rq_get_buf(rq, &len, &ctx); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, *num_buf, virtio16_to_cpu(vi->vdev, hdr->num_buffers)); DEV_STATS_INC(dev, rx_length_errors); goto err; } u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); offset = buf - page_address(page); truesize = mergeable_ctx_to_truesize(ctx); headroom = mergeable_ctx_to_headroom(ctx); tailroom = headroom ? sizeof(struct skb_shared_info) : 0; room = SKB_DATA_ALIGN(headroom + tailroom); cur_frag_size = truesize; xdp_frags_truesz += cur_frag_size; if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { put_page(page); pr_debug("%s: rx error: len %u exceeds truesize %lu\n", dev->name, len, (unsigned long)(truesize - room)); DEV_STATS_INC(dev, rx_length_errors); goto err; } frag = &shinfo->frags[shinfo->nr_frags++]; skb_frag_fill_page_desc(frag, page, offset, len); if (page_is_pfmemalloc(page)) xdp_buff_set_frag_pfmemalloc(xdp); shinfo->xdp_frags_size += len; } *xdp_frags_truesize = xdp_frags_truesz; return 0; err: put_xdp_frags(xdp); return -EINVAL; } static void *mergeable_xdp_get_buf(struct virtnet_info *vi, struct receive_queue *rq, struct bpf_prog *xdp_prog, void *ctx, unsigned int *frame_sz, int *num_buf, struct page **page, int offset, unsigned int *len, struct virtio_net_hdr_mrg_rxbuf *hdr) { unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); struct page *xdp_page; unsigned int xdp_room; /* Transient failure which in theory could occur if * in-flight packets from before XDP was enabled reach * the receive path after XDP is loaded. */ if (unlikely(hdr->hdr.gso_type)) return NULL; /* Partially checksummed packets must be dropped. */ if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) return NULL; /* Now XDP core assumes frag size is PAGE_SIZE, but buffers * with headroom may add hole in truesize, which * make their length exceed PAGE_SIZE. So we disabled the * hole mechanism for xdp. See add_recvbuf_mergeable(). */ *frame_sz = truesize; if (likely(headroom >= virtnet_get_headroom(vi) && (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { return page_address(*page) + offset; } /* This happens when headroom is not enough because * of the buffer was prefilled before XDP is set. * This should only happen for the first several packets. * In fact, vq reset can be used here to help us clean up * the prefilled buffers, but many existing devices do not * support it, and we don't want to bother users who are * using xdp normally. */ if (!xdp_prog->aux->xdp_has_frags) { /* linearize data for XDP */ xdp_page = xdp_linearize_page(rq, num_buf, *page, offset, XDP_PACKET_HEADROOM, len); if (!xdp_page) return NULL; } else { xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + sizeof(struct skb_shared_info)); if (*len + xdp_room > PAGE_SIZE) return NULL; xdp_page = alloc_page(GFP_ATOMIC); if (!xdp_page) return NULL; memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, page_address(*page) + offset, *len); } *frame_sz = PAGE_SIZE; put_page(*page); *page = xdp_page; return page_address(*page) + XDP_PACKET_HEADROOM; } static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, struct bpf_prog *xdp_prog, void *buf, void *ctx, unsigned int len, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); unsigned int xdp_frags_truesz = 0; struct sk_buff *head_skb; unsigned int frame_sz; struct xdp_buff xdp; void *data; u32 act; int err; data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, offset, &len, hdr); if (unlikely(!data)) goto err_xdp; err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, &num_buf, &xdp_frags_truesz, stats); if (unlikely(err)) goto err_xdp; act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); switch (act) { case XDP_PASS: head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); if (unlikely(!head_skb)) break; return head_skb; case XDP_TX: case XDP_REDIRECT: return NULL; default: break; } put_xdp_frags(&xdp); err_xdp: put_page(page); mergeable_buf_free(rq, num_buf, dev, stats); u64_stats_inc(&stats->xdp_drops); u64_stats_inc(&stats->drops); return NULL; } static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, struct sk_buff *curr_skb, struct page *page, void *buf, int len, int truesize) { int num_skb_frags; int offset; num_skb_frags = skb_shinfo(curr_skb)->nr_frags; if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); if (unlikely(!nskb)) return NULL; if (curr_skb == head_skb) skb_shinfo(curr_skb)->frag_list = nskb; else curr_skb->next = nskb; curr_skb = nskb; head_skb->truesize += nskb->truesize; num_skb_frags = 0; } if (curr_skb != head_skb) { head_skb->data_len += len; head_skb->len += len; head_skb->truesize += truesize; } offset = buf - page_address(page); if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { put_page(page); skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, len, truesize); } else { skb_add_rx_frag(curr_skb, num_skb_frags, page, offset, len, truesize); } return curr_skb; } static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, void *buf, void *ctx, unsigned int len, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); struct sk_buff *head_skb, *curr_skb; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); head_skb = NULL; u64_stats_add(&stats->bytes, len - vi->hdr_len); if (unlikely(len > truesize - room)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", dev->name, len, (unsigned long)(truesize - room)); DEV_STATS_INC(dev, rx_length_errors); goto err_skb; } if (unlikely(vi->xdp_enabled)) { struct bpf_prog *xdp_prog; rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, len, xdp_xmit, stats); rcu_read_unlock(); return head_skb; } rcu_read_unlock(); } head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) goto err_skb; while (--num_buf) { buf = virtnet_rq_get_buf(rq, &len, &ctx); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers out of %d missing\n", dev->name, num_buf, virtio16_to_cpu(vi->vdev, hdr->num_buffers)); DEV_STATS_INC(dev, rx_length_errors); goto err_buf; } u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); truesize = mergeable_ctx_to_truesize(ctx); headroom = mergeable_ctx_to_headroom(ctx); tailroom = headroom ? sizeof(struct skb_shared_info) : 0; room = SKB_DATA_ALIGN(headroom + tailroom); if (unlikely(len > truesize - room)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", dev->name, len, (unsigned long)(truesize - room)); DEV_STATS_INC(dev, rx_length_errors); goto err_skb; } curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) goto err_skb; } ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); return head_skb; err_skb: put_page(page); mergeable_buf_free(rq, num_buf, dev, stats); err_buf: u64_stats_inc(&stats->drops); dev_kfree_skb(head_skb); return NULL; } static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, struct sk_buff *skb) { enum pkt_hash_types rss_hash_type; if (!hdr_hash || !skb) return; switch (__le16_to_cpu(hdr_hash->hash_report)) { case VIRTIO_NET_HASH_REPORT_TCPv4: case VIRTIO_NET_HASH_REPORT_UDPv4: case VIRTIO_NET_HASH_REPORT_TCPv6: case VIRTIO_NET_HASH_REPORT_UDPv6: case VIRTIO_NET_HASH_REPORT_TCPv6_EX: case VIRTIO_NET_HASH_REPORT_UDPv6_EX: rss_hash_type = PKT_HASH_TYPE_L4; break; case VIRTIO_NET_HASH_REPORT_IPv4: case VIRTIO_NET_HASH_REPORT_IPv6: case VIRTIO_NET_HASH_REPORT_IPv6_EX: rss_hash_type = PKT_HASH_TYPE_L3; break; case VIRTIO_NET_HASH_REPORT_NONE: default: rss_hash_type = PKT_HASH_TYPE_NONE; } skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); } static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, struct sk_buff *skb, u8 flags) { struct virtio_net_common_hdr *hdr; struct net_device *dev = vi->dev; hdr = skb_vnet_common_hdr(skb); if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); if (flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; if (virtio_net_hdr_to_skb(skb, &hdr->hdr, virtio_is_little_endian(vi->vdev))) { net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", dev->name, hdr->hdr.gso_type, hdr->hdr.gso_size); goto frame_err; } skb_record_rx_queue(skb, vq2rxq(rq->vq)); skb->protocol = eth_type_trans(skb, dev); pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); napi_gro_receive(&rq->napi, skb); return; frame_err: DEV_STATS_INC(dev, rx_frame_errors); dev_kfree_skb(skb); } static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, void *buf, unsigned int len, void **ctx, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { struct net_device *dev = vi->dev; struct sk_buff *skb; u8 flags; if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); DEV_STATS_INC(dev, rx_length_errors); virtnet_rq_free_buf(vi, rq, buf); return; } /* 1. Save the flags early, as the XDP program might overwrite them. * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID * stay valid after XDP processing. * 2. XDP doesn't work with partially checksummed packets (refer to * virtnet_xdp_set()), so packets marked as * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. */ flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; if (vi->mergeable_rx_bufs) skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); else if (vi->big_packets) skb = receive_big(dev, vi, rq, buf, len, stats); else skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); if (unlikely(!skb)) return; virtnet_receive_done(vi, rq, skb, flags); } /* Unlike mergeable buffers, all buffers are allocated to the * same size, except for the headroom. For this reason we do * not need to use mergeable_len_to_ctx here - it is enough * to store the headroom as the context ignoring the truesize. */ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { char *buf; unsigned int xdp_headroom = virtnet_get_headroom(vi); void *ctx = (void *)(unsigned long)xdp_headroom; int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; int err; len = SKB_DATA_ALIGN(len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); buf = virtnet_rq_alloc(rq, len, gfp); if (unlikely(!buf)) return -ENOMEM; buf += VIRTNET_RX_PAD + xdp_headroom; virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { if (rq->do_dma) virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); } return err; } static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { struct page *first, *list = NULL; char *p; int i, err, offset; sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { first = get_a_page(rq, gfp); if (!first) { if (list) give_pages(rq, list); return -ENOMEM; } sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); /* chain new page in list head to match sg */ first->private = (unsigned long)list; list = first; } first = get_a_page(rq, gfp); if (!first) { give_pages(rq, list); return -ENOMEM; } p = page_address(first); /* rq->sg[0], rq->sg[1] share the same page */ /* a separated rq->sg[0] for header - required in case !any_header_sg */ sg_set_buf(&rq->sg[0], p, vi->hdr_len); /* rq->sg[1] for data packet, from offset */ offset = sizeof(struct padded_vnet_hdr); sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); /* chain first in list head */ first->private = (unsigned long)list; err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, first, gfp); if (err < 0) give_pages(rq, first); return err; } static unsigned int get_mergeable_buf_len(struct receive_queue *rq, struct ewma_pkt_len *avg_pkt_len, unsigned int room) { struct virtnet_info *vi = rq->vq->vdev->priv; const size_t hdr_len = vi->hdr_len; unsigned int len; if (room) return PAGE_SIZE - room; len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), rq->min_buf_len, PAGE_SIZE - hdr_len); return ALIGN(len, L1_CACHE_BYTES); } static int add_recvbuf_mergeable(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { struct page_frag *alloc_frag = &rq->alloc_frag; unsigned int headroom = virtnet_get_headroom(vi); unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); unsigned int len, hole; void *ctx; char *buf; int err; /* Extra tailroom is needed to satisfy XDP's assumption. This * means rx frags coalescing won't work, but consider we've * disabled GSO for XDP, it won't be a big issue. */ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); buf = virtnet_rq_alloc(rq, len + room, gfp); if (unlikely(!buf)) return -ENOMEM; buf += headroom; /* advance address leaving hole at front of pkt */ hole = alloc_frag->size - alloc_frag->offset; if (hole < len + room) { /* To avoid internal fragmentation, if there is very likely not * enough space for another buffer, add the remaining space to * the current buffer. * XDP core assumes that frame_size of xdp_buff and the length * of the frag are PAGE_SIZE, so we disable the hole mechanism. */ if (!headroom) len += hole; alloc_frag->offset += hole; } virtnet_rq_init_one_sg(rq, buf, len); ctx = mergeable_len_to_ctx(len + room, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { if (rq->do_dma) virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); } return err; } /* * Returns false if we couldn't fill entirely (OOM). * * Normally run in the receive path, but can also be run from ndo_open * before we're receiving packets, or from refill_work which is * careful to disable receiving (using napi_disable). */ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { int err; if (rq->xsk_pool) { err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); goto kick; } do { if (vi->mergeable_rx_bufs) err = add_recvbuf_mergeable(vi, rq, gfp); else if (vi->big_packets) err = add_recvbuf_big(vi, rq, gfp); else err = add_recvbuf_small(vi, rq, gfp); if (err) break; } while (rq->vq->num_free); kick: if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { unsigned long flags; flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); u64_stats_inc(&rq->stats.kicks); u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); } return err != -ENOMEM; } static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; rq->calls++; virtqueue_napi_schedule(&rq->napi, rvq); } static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) { napi_enable(napi); /* If all buffers were filled by other side before we napi_enabled, we * won't get another interrupt, so process any outstanding packets now. * Call local_bh_enable after to trigger softIRQ processing. */ local_bh_disable(); virtqueue_napi_schedule(napi, vq); local_bh_enable(); } static void virtnet_napi_tx_enable(struct virtnet_info *vi, struct virtqueue *vq, struct napi_struct *napi) { if (!napi->weight) return; /* Tx napi touches cachelines on the cpu handling tx interrupts. Only * enable the feature if this is likely affine with the transmit path. */ if (!vi->affinity_hint_set) { napi->weight = 0; return; } return virtnet_napi_enable(vq, napi); } static void virtnet_napi_tx_disable(struct napi_struct *napi) { if (napi->weight) napi_disable(napi); } static void refill_work(struct work_struct *work) { struct virtnet_info *vi = container_of(work, struct virtnet_info, refill.work); bool still_empty; int i; for (i = 0; i < vi->curr_queue_pairs; i++) { struct receive_queue *rq = &vi->rq[i]; napi_disable(&rq->napi); still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); virtnet_napi_enable(rq->vq, &rq->napi); /* In theory, this can happen: if we don't get any buffers in * we will *never* try to fill again. */ if (still_empty) schedule_delayed_work(&vi->refill, HZ/2); } } static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, struct receive_queue *rq, int budget, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { unsigned int len; int packets = 0; void *buf; while (packets < budget) { buf = virtqueue_get_buf(rq->vq, &len); if (!buf) break; virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); packets++; } return packets; } static int virtnet_receive_packets(struct virtnet_info *vi, struct receive_queue *rq, int budget, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { unsigned int len; int packets = 0; void *buf; if (!vi->big_packets || vi->mergeable_rx_bufs) { void *ctx; while (packets < budget && (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); packets++; } } else { while (packets < budget && (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); packets++; } } return packets; } static int virtnet_receive(struct receive_queue *rq, int budget, unsigned int *xdp_xmit) { struct virtnet_info *vi = rq->vq->vdev->priv; struct virtnet_rq_stats stats = {}; int i, packets; if (rq->xsk_pool) packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); else packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { spin_lock(&vi->refill_lock); if (vi->refill_enabled) schedule_delayed_work(&vi->refill, 0); spin_unlock(&vi->refill_lock); } } u64_stats_set(&stats.packets, packets); u64_stats_update_begin(&rq->stats.syncp); for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { size_t offset = virtnet_rq_stats_desc[i].offset; u64_stats_t *item, *src; item = (u64_stats_t *)((u8 *)&rq->stats + offset); src = (u64_stats_t *)((u8 *)&stats + offset); u64_stats_add(item, u64_stats_read(src)); } u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); u64_stats_update_end(&rq->stats.syncp); return packets; } static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) { struct virtnet_info *vi = rq->vq->vdev->priv; unsigned int index = vq2rxq(rq->vq); struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) return; if (__netif_tx_trylock(txq)) { if (sq->reset) { __netif_tx_unlock(txq); return; } do { virtqueue_disable_cb(sq->vq); free_old_xmit(sq, txq, !!budget); } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); u64_stats_update_end(&sq->stats.syncp); } netif_tx_wake_queue(txq); } __netif_tx_unlock(txq); } } static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) { struct dim_sample cur_sample = {}; if (!rq->packets_in_napi) return; /* Don't need protection when fetching stats, since fetcher and * updater of the stats are in same context */ dim_update_sample(rq->calls, u64_stats_read(&rq->stats.packets), u64_stats_read(&rq->stats.bytes), &cur_sample); net_dim(&rq->dim, cur_sample); rq->packets_in_napi = 0; } static int virtnet_poll(struct napi_struct *napi, int budget) { struct receive_queue *rq = container_of(napi, struct receive_queue, napi); struct virtnet_info *vi = rq->vq->vdev->priv; struct send_queue *sq; unsigned int received; unsigned int xdp_xmit = 0; bool napi_complete; virtnet_poll_cleantx(rq, budget); received = virtnet_receive(rq, budget, &xdp_xmit); rq->packets_in_napi += received; if (xdp_xmit & VIRTIO_XDP_REDIR) xdp_do_flush(); /* Out of packets? */ if (received < budget) { napi_complete = virtqueue_napi_complete(napi, rq->vq, received); /* Intentionally not taking dim_lock here. This may result in a * spurious net_dim call. But if that happens virtnet_rx_dim_work * will not act on the scheduled work. */ if (napi_complete && rq->dim_enabled) virtnet_rx_dim_update(vi, rq); } if (xdp_xmit & VIRTIO_XDP_TX) { sq = virtnet_xdp_get_sq(vi); if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.kicks); u64_stats_update_end(&sq->stats.syncp); } virtnet_xdp_put_sq(vi, sq); } return received; } static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) { virtnet_napi_tx_disable(&vi->sq[qp_index].napi); napi_disable(&vi->rq[qp_index].napi); xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); } static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) { struct net_device *dev = vi->dev; int err; err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, vi->rq[qp_index].napi.napi_id); if (err < 0) return err; err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); if (err < 0) goto err_xdp_reg_mem_model; netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); return 0; err_xdp_reg_mem_model: xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); return err; } static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) { if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) return; net_dim_work_cancel(dim); } static void virtnet_update_settings(struct virtnet_info *vi) { u32 speed; u8 duplex; if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) return; virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); if (ethtool_validate_speed(speed)) vi->speed = speed; virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); if (ethtool_validate_duplex(duplex)) vi->duplex = duplex; } static int virtnet_open(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); int i, err; enable_delayed_refill(vi); for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) /* Make sure we have some buffers: if oom use wq. */ if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); err = virtnet_enable_queue_pair(vi, i); if (err < 0) goto err_enable_qp; } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { if (vi->status & VIRTIO_NET_S_LINK_UP) netif_carrier_on(vi->dev); virtio_config_driver_enable(vi->vdev); } else { vi->status = VIRTIO_NET_S_LINK_UP; netif_carrier_on(dev); } return 0; err_enable_qp: disable_delayed_refill(vi); cancel_delayed_work_sync(&vi->refill); for (i--; i >= 0; i--) { virtnet_disable_queue_pair(vi, i); virtnet_cancel_dim(vi, &vi->rq[i].dim); } return err; } static int virtnet_poll_tx(struct napi_struct *napi, int budget) { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; unsigned int index = vq2txq(sq->vq); struct netdev_queue *txq; int opaque; bool done; if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { /* We don't need to enable cb for XDP */ napi_complete_done(napi, 0); return 0; } txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); virtqueue_disable_cb(sq->vq); free_old_xmit(sq, txq, !!budget); if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); u64_stats_update_end(&sq->stats.syncp); } netif_tx_wake_queue(txq); } opaque = virtqueue_enable_cb_prepare(sq->vq); done = napi_complete_done(napi, 0); if (!done) virtqueue_disable_cb(sq->vq); __netif_tx_unlock(txq); if (done) { if (unlikely(virtqueue_poll(sq->vq, opaque))) { if (napi_schedule_prep(napi)) { __netif_tx_lock(txq, raw_smp_processor_id()); virtqueue_disable_cb(sq->vq); __netif_tx_unlock(txq); __napi_schedule(napi); } } } return 0; } static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) { struct virtio_net_hdr_mrg_rxbuf *hdr; const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; int num_sg; unsigned hdr_len = vi->hdr_len; bool can_push; pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); can_push = vi->any_header_sg && !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; /* Even if we can, don't push here yet as this would skew * csum_start offset below. */ if (can_push) hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); else hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; if (virtio_net_hdr_from_skb(skb, &hdr->hdr, virtio_is_little_endian(vi->vdev), false, 0)) return -EPROTO; if (vi->mergeable_rx_bufs) hdr->num_buffers = 0; sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); if (can_push) { __skb_push(skb, hdr_len); num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); if (unlikely(num_sg < 0)) return num_sg; /* Pull header back to avoid skew in tx bytes calculations. */ __skb_pull(skb, hdr_len); } else { sg_set_buf(sq->sg, hdr, hdr_len); num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); if (unlikely(num_sg < 0)) return num_sg; num_sg++; } return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb_to_ptr(skb, orphan), GFP_ATOMIC); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); int qnum = skb_get_queue_mapping(skb); struct send_queue *sq = &vi->sq[qnum]; int err; struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); bool xmit_more = netdev_xmit_more(); bool use_napi = sq->napi.weight; bool kick; /* Free up any pending old buffers before queueing new ones. */ do { if (use_napi) virtqueue_disable_cb(sq->vq); free_old_xmit(sq, txq, false); } while (use_napi && !xmit_more && unlikely(!virtqueue_enable_cb_delayed(sq->vq))); /* timestamp packet in software */ skb_tx_timestamp(skb); /* Try to transmit */ err = xmit_skb(sq, skb, !use_napi); /* This should not happen! */ if (unlikely(err)) { DEV_STATS_INC(dev, tx_fifo_errors); if (net_ratelimit()) dev_warn(&dev->dev, "Unexpected TXQ (%d) queue failure: %d\n", qnum, err); DEV_STATS_INC(dev, tx_dropped); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); nf_reset_ct(skb); } check_sq_full_and_disable(vi, dev, sq); kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : !xmit_more || netif_xmit_stopped(txq); if (kick) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.kicks); u64_stats_update_end(&sq->stats.syncp); } } return NETDEV_TX_OK; } static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) { bool running = netif_running(vi->dev); if (running) { napi_disable(&rq->napi); virtnet_cancel_dim(vi, &rq->dim); } } static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) { bool running = netif_running(vi->dev); if (!try_fill_recv(vi, rq, GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); if (running) virtnet_napi_enable(rq->vq, &rq->napi); } static int virtnet_rx_resize(struct virtnet_info *vi, struct receive_queue *rq, u32 ring_num) { int err, qindex; qindex = rq - vi->rq; virtnet_rx_pause(vi, rq); err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); if (err) netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); virtnet_rx_resume(vi, rq); return err; } static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) { bool running = netif_running(vi->dev); struct netdev_queue *txq; int qindex; qindex = sq - vi->sq; if (running) virtnet_napi_tx_disable(&sq->napi); txq = netdev_get_tx_queue(vi->dev, qindex); /* 1. wait all ximt complete * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() */ __netif_tx_lock_bh(txq); /* Prevent rx poll from accessing sq. */ sq->reset = true; /* Prevent the upper layer from trying to send packets. */ netif_stop_subqueue(vi->dev, qindex); __netif_tx_unlock_bh(txq); } static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) { bool running = netif_running(vi->dev); struct netdev_queue *txq; int qindex; qindex = sq - vi->sq; txq = netdev_get_tx_queue(vi->dev, qindex); __netif_tx_lock_bh(txq); sq->reset = false; netif_tx_wake_queue(txq); __netif_tx_unlock_bh(txq); if (running) virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); } static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, u32 ring_num) { int qindex, err; qindex = sq - vi->sq; virtnet_tx_pause(vi, sq); err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); if (err) netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); virtnet_tx_resume(vi, sq); return err; } /* * Send command via the control virtqueue and check status. Commands * supported by the hypervisor, as indicated by feature bits, should * never fail unless improperly formatted. */ static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, struct scatterlist *out, struct scatterlist *in) { struct scatterlist *sgs[5], hdr, stat; u32 out_num = 0, tmp, in_num = 0; bool ok; int ret; /* Caller should know better */ BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); mutex_lock(&vi->cvq_lock); vi->ctrl->status = ~0; vi->ctrl->hdr.class = class; vi->ctrl->hdr.cmd = cmd; /* Add header */ sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); sgs[out_num++] = &hdr; if (out) sgs[out_num++] = out; /* Add return status. */ sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); sgs[out_num + in_num++] = &stat; if (in) sgs[out_num + in_num++] = in; BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); if (ret < 0) { dev_warn(&vi->vdev->dev, "Failed to add sgs for command vq: %d\n.", ret); mutex_unlock(&vi->cvq_lock); return false; } if (unlikely(!virtqueue_kick(vi->cvq))) goto unlock; /* Spin for a response, the kick causes an ioport write, trapping * into the hypervisor, so the request should be handled immediately. */ while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) { cond_resched(); cpu_relax(); } unlock: ok = vi->ctrl->status == VIRTIO_NET_OK; mutex_unlock(&vi->cvq_lock); return ok; } static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, struct scatterlist *out) { return virtnet_send_command_reply(vi, class, cmd, out, NULL); } static int virtnet_set_mac_address(struct net_device *dev, void *p) { struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; int ret; struct sockaddr *addr; struct scatterlist sg; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) return -EOPNOTSUPP; addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); if (!addr) return -ENOMEM; ret = eth_prepare_mac_addr_change(dev, addr); if (ret) goto out; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { sg_init_one(&sg, addr->sa_data, dev->addr_len); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { dev_warn(&vdev->dev, "Failed to set mac address by vq command.\n"); ret = -EINVAL; goto out; } } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { unsigned int i; /* Naturally, this has an atomicity problem. */ for (i = 0; i < dev->addr_len; i++) virtio_cwrite8(vdev, offsetof(struct virtio_net_config, mac) + i, addr->sa_data[i]); } eth_commit_mac_addr_change(dev, p); ret = 0; out: kfree(addr); return ret; } static void virtnet_stats(struct net_device *dev, struct rtnl_link_stats64 *tot) { struct virtnet_info *vi = netdev_priv(dev); unsigned int start; int i; for (i = 0; i < vi->max_queue_pairs; i++) { u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; struct receive_queue *rq = &vi->rq[i]; struct send_queue *sq = &vi->sq[i]; do { start = u64_stats_fetch_begin(&sq->stats.syncp); tpackets = u64_stats_read(&sq->stats.packets); tbytes = u64_stats_read(&sq->stats.bytes); terrors = u64_stats_read(&sq->stats.tx_timeouts); } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); do { start = u64_stats_fetch_begin(&rq->stats.syncp); rpackets = u64_stats_read(&rq->stats.packets); rbytes = u64_stats_read(&rq->stats.bytes); rdrops = u64_stats_read(&rq->stats.drops); } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; tot->rx_bytes += rbytes; tot->tx_bytes += tbytes; tot->rx_dropped += rdrops; tot->tx_errors += terrors; } tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); } static void virtnet_ack_link_announce(struct virtnet_info *vi) { if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); } static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) { struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; struct scatterlist sg; struct net_device *dev = vi->dev; if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) return 0; mq = kzalloc(sizeof(*mq), GFP_KERNEL); if (!mq) return -ENOMEM; mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); sg_init_one(&sg, mq, sizeof(*mq)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", queue_pairs); return -EINVAL; } else { vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ if (dev->flags & IFF_UP) schedule_delayed_work(&vi->refill, 0); } return 0; } static int virtnet_close(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); int i; /* Make sure NAPI doesn't schedule refill work */ disable_delayed_refill(vi); /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); /* Prevent the config change callback from changing carrier * after close */ virtio_config_driver_disable(vi->vdev); /* Stop getting status/speed updates: we don't care until next * open */ cancel_work_sync(&vi->config_work); for (i = 0; i < vi->max_queue_pairs; i++) { virtnet_disable_queue_pair(vi, i); virtnet_cancel_dim(vi, &vi->rq[i].dim); } netif_carrier_off(dev); return 0; } static void virtnet_rx_mode_work(struct work_struct *work) { struct virtnet_info *vi = container_of(work, struct virtnet_info, rx_mode_work); u8 *promisc_allmulti __free(kfree) = NULL; struct net_device *dev = vi->dev; struct scatterlist sg[2]; struct virtio_net_ctrl_mac *mac_data; struct netdev_hw_addr *ha; int uc_count; int mc_count; void *buf; int i; /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) return; promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); if (!promisc_allmulti) { dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); return; } rtnl_lock(); *promisc_allmulti = !!(dev->flags & IFF_PROMISC); sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_PROMISC, sg)) dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", *promisc_allmulti ? "en" : "dis"); *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", *promisc_allmulti ? "en" : "dis"); netif_addr_lock_bh(dev); uc_count = netdev_uc_count(dev); mc_count = netdev_mc_count(dev); /* MAC filter - use one buffer for both lists */ buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + (2 * sizeof(mac_data->entries)), GFP_ATOMIC); mac_data = buf; if (!buf) { netif_addr_unlock_bh(dev); rtnl_unlock(); return; } sg_init_table(sg, 2); /* Store the unicast list and count in the front of the buffer */ mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); i = 0; netdev_for_each_uc_addr(ha, dev) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); sg_set_buf(&sg[0], mac_data, sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); /* multicast list and count fill the end */ mac_data = (void *)&mac_data->macs[uc_count][0]; mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); i = 0; netdev_for_each_mc_addr(ha, dev) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); netif_addr_unlock_bh(dev); sg_set_buf(&sg[1], mac_data, sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); rtnl_unlock(); kfree(buf); } static void virtnet_set_rx_mode(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); if (vi->rx_mode_work_enabled) schedule_work(&vi->rx_mode_work); } static int virtnet_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct virtnet_info *vi = netdev_priv(dev); __virtio16 *_vid __free(kfree) = NULL; struct scatterlist sg; _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); if (!_vid) return -ENOMEM; *_vid = cpu_to_virtio16(vi->vdev, vid); sg_init_one(&sg, _vid, sizeof(*_vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_ADD, &sg)) dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); return 0; } static int virtnet_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct virtnet_info *vi = netdev_priv(dev); __virtio16 *_vid __free(kfree) = NULL; struct scatterlist sg; _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); if (!_vid) return -ENOMEM; *_vid = cpu_to_virtio16(vi->vdev, vid); sg_init_one(&sg, _vid, sizeof(*_vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_DEL, &sg)) dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); return 0; } static void virtnet_clean_affinity(struct virtnet_info *vi) { int i; if (vi->affinity_hint_set) { for (i = 0; i < vi->max_queue_pairs; i++) { virtqueue_set_affinity(vi->rq[i].vq, NULL); virtqueue_set_affinity(vi->sq[i].vq, NULL); } vi->affinity_hint_set = false; } } static void virtnet_set_affinity(struct virtnet_info *vi) { cpumask_var_t mask; int stragglers; int group_size; int i, j, cpu; int num_cpu; int stride; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { virtnet_clean_affinity(vi); return; } num_cpu = num_online_cpus(); stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); stragglers = num_cpu >= vi->curr_queue_pairs ? num_cpu % vi->curr_queue_pairs : 0; cpu = cpumask_first(cpu_online_mask); for (i = 0; i < vi->curr_queue_pairs; i++) { group_size = stride + (i < stragglers ? 1 : 0); for (j = 0; j < group_size; j++) { cpumask_set_cpu(cpu, mask); cpu = cpumask_next_wrap(cpu, cpu_online_mask, nr_cpu_ids, false); } virtqueue_set_affinity(vi->rq[i].vq, mask); virtqueue_set_affinity(vi->sq[i].vq, mask); __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); cpumask_clear(mask); } vi->affinity_hint_set = true; free_cpumask_var(mask); } static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) { struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, node); virtnet_set_affinity(vi); return 0; } static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) { struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, node_dead); virtnet_set_affinity(vi); return 0; } static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) { struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, node); virtnet_clean_affinity(vi); return 0; } static enum cpuhp_state virtionet_online; static int virtnet_cpu_notif_add(struct virtnet_info *vi) { int ret; ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); if (ret) return ret; ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, &vi->node_dead); if (!ret) return ret; cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); return ret; } static void virtnet_cpu_notif_remove(struct virtnet_info *vi) { cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, &vi->node_dead); } static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, u16 vqn, u32 max_usecs, u32 max_packets) { struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; struct scatterlist sgs; coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); if (!coal_vq) return -ENOMEM; coal_vq->vqn = cpu_to_le16(vqn); coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); coal_vq->coal.max_packets = cpu_to_le32(max_packets); sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, &sgs)) return -EINVAL; return 0; } static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, u16 queue, u32 max_usecs, u32 max_packets) { int err; if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) return -EOPNOTSUPP; err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), max_usecs, max_packets); if (err) return err; vi->rq[queue].intr_coal.max_usecs = max_usecs; vi->rq[queue].intr_coal.max_packets = max_packets; return 0; } static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, u16 queue, u32 max_usecs, u32 max_packets) { int err; if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) return -EOPNOTSUPP; err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), max_usecs, max_packets); if (err) return err; vi->sq[queue].intr_coal.max_usecs = max_usecs; vi->sq[queue].intr_coal.max_packets = max_packets; return 0; } static void virtnet_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); ring->rx_max_pending = vi->rq[0].vq->num_max; ring->tx_max_pending = vi->sq[0].vq->num_max; ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); } static int virtnet_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); u32 rx_pending, tx_pending; struct receive_queue *rq; struct send_queue *sq; int i, err; if (ring->rx_mini_pending || ring->rx_jumbo_pending) return -EINVAL; rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); if (ring->rx_pending == rx_pending && ring->tx_pending == tx_pending) return 0; if (ring->rx_pending > vi->rq[0].vq->num_max) return -EINVAL; if (ring->tx_pending > vi->sq[0].vq->num_max) return -EINVAL; for (i = 0; i < vi->max_queue_pairs; i++) { rq = vi->rq + i; sq = vi->sq + i; if (ring->tx_pending != tx_pending) { err = virtnet_tx_resize(vi, sq, ring->tx_pending); if (err) return err; /* Upon disabling and re-enabling a transmit virtqueue, the device must * set the coalescing parameters of the virtqueue to those configured * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver * did not set any TX coalescing parameters, to 0. */ err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, vi->intr_coal_tx.max_usecs, vi->intr_coal_tx.max_packets); /* Don't break the tx resize action if the vq coalescing is not * supported. The same is true for rx resize below. */ if (err && err != -EOPNOTSUPP) return err; } if (ring->rx_pending != rx_pending) { err = virtnet_rx_resize(vi, rq, ring->rx_pending); if (err) return err; /* The reason is same as the transmit virtqueue reset */ mutex_lock(&vi->rq[i].dim_lock); err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, vi->intr_coal_rx.max_usecs, vi->intr_coal_rx.max_packets); mutex_unlock(&vi->rq[i].dim_lock); if (err && err != -EOPNOTSUPP) return err; } } return 0; } static bool virtnet_commit_rss_command(struct virtnet_info *vi) { struct net_device *dev = vi->dev; struct scatterlist sgs[4]; unsigned int sg_buf_size; /* prepare sgs */ sg_init_table(sgs, 4); sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); sg_buf_size = sizeof(uint16_t) * (vi->rss.indirection_table_mask + 1); sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); sg_buf_size = vi->rss_key_size; sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) goto err; return true; err: dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); return false; } static void virtnet_init_default_rss(struct virtnet_info *vi) { u32 indir_val = 0; int i = 0; vi->rss.hash_types = vi->rss_hash_types_supported; vi->rss_hash_types_saved = vi->rss_hash_types_supported; vi->rss.indirection_table_mask = vi->rss_indir_table_size ? vi->rss_indir_table_size - 1 : 0; vi->rss.unclassified_queue = 0; for (; i < vi->rss_indir_table_size; ++i) { indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); vi->rss.indirection_table[i] = indir_val; } vi->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; vi->rss.hash_key_length = vi->rss_key_size; netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); } static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) { info->data = 0; switch (info->flow_type) { case TCP_V4_FLOW: if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { info->data = RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3; } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { info->data = RXH_IP_SRC | RXH_IP_DST; } break; case TCP_V6_FLOW: if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { info->data = RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3; } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { info->data = RXH_IP_SRC | RXH_IP_DST; } break; case UDP_V4_FLOW: if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { info->data = RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3; } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { info->data = RXH_IP_SRC | RXH_IP_DST; } break; case UDP_V6_FLOW: if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { info->data = RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3; } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { info->data = RXH_IP_SRC | RXH_IP_DST; } break; case IPV4_FLOW: if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) info->data = RXH_IP_SRC | RXH_IP_DST; break; case IPV6_FLOW: if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) info->data = RXH_IP_SRC | RXH_IP_DST; break; default: info->data = 0; break; } } static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) { u32 new_hashtypes = vi->rss_hash_types_saved; bool is_disable = info->data & RXH_DISCARD; bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); /* supports only 'sd', 'sdfn' and 'r' */ if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) return false; switch (info->flow_type) { case TCP_V4_FLOW: new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); if (!is_disable) new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); break; case UDP_V4_FLOW: new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); if (!is_disable) new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); break; case IPV4_FLOW: new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; if (!is_disable) new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; break; case TCP_V6_FLOW: new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); if (!is_disable) new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); break; case UDP_V6_FLOW: new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); if (!is_disable) new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); break; case IPV6_FLOW: new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; if (!is_disable) new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; break; default: /* unsupported flow */ return false; } /* if unsupported hashtype was set */ if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) return false; if (new_hashtypes != vi->rss_hash_types_saved) { vi->rss_hash_types_saved = new_hashtypes; vi->rss.hash_types = vi->rss_hash_types_saved; if (vi->dev->features & NETIF_F_RXHASH) return virtnet_commit_rss_command(vi); } return true; } static void virtnet_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); } /* TODO: Eliminate OOO packets during switching */ static int virtnet_set_channels(struct net_device *dev, struct ethtool_channels *channels) { struct virtnet_info *vi = netdev_priv(dev); u16 queue_pairs = channels->combined_count; int err; /* We don't support separate rx/tx channels. * We don't allow setting 'other' channels. */ if (channels->rx_count || channels->tx_count || channels->other_count) return -EINVAL; if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) return -EINVAL; /* For now we don't support modifying channels while XDP is loaded * also when XDP is loaded all RX queues have XDP programs so we only * need to check a single RX queue. */ if (vi->rq[0].xdp_prog) return -EINVAL; cpus_read_lock(); err = virtnet_set_queues(vi, queue_pairs); if (err) { cpus_read_unlock(); goto err; } virtnet_set_affinity(vi); cpus_read_unlock(); netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs); err: return err; } static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, int num, int qid, const struct virtnet_stat_desc *desc) { int i; if (qid < 0) { for (i = 0; i < num; ++i) ethtool_sprintf(p, noq_fmt, desc[i].desc); } else { for (i = 0; i < num; ++i) ethtool_sprintf(p, fmt, qid, desc[i].desc); } } /* qid == -1: for rx/tx queue total field */ static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) { const struct virtnet_stat_desc *desc; const char *fmt, *noq_fmt; u8 *p = *data; u32 num; if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { noq_fmt = "cq_hw_%s"; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { desc = &virtnet_stats_cvq_desc[0]; num = ARRAY_SIZE(virtnet_stats_cvq_desc); virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); } } if (type == VIRTNET_Q_TYPE_RX) { fmt = "rx%u_%s"; noq_fmt = "rx_%s"; desc = &virtnet_rq_stats_desc[0]; num = ARRAY_SIZE(virtnet_rq_stats_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); fmt = "rx%u_hw_%s"; noq_fmt = "rx_hw_%s"; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { desc = &virtnet_stats_rx_basic_desc[0]; num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { desc = &virtnet_stats_rx_csum_desc[0]; num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { desc = &virtnet_stats_rx_speed_desc[0]; num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); } } if (type == VIRTNET_Q_TYPE_TX) { fmt = "tx%u_%s"; noq_fmt = "tx_%s"; desc = &virtnet_sq_stats_desc[0]; num = ARRAY_SIZE(virtnet_sq_stats_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); fmt = "tx%u_hw_%s"; noq_fmt = "tx_hw_%s"; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { desc = &virtnet_stats_tx_basic_desc[0]; num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { desc = &virtnet_stats_tx_gso_desc[0]; num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { desc = &virtnet_stats_tx_speed_desc[0]; num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); } } *data = p; } struct virtnet_stats_ctx { /* The stats are write to qstats or ethtool -S */ bool to_qstat; /* Used to calculate the offset inside the output buffer. */ u32 desc_num[3]; /* The actual supported stat types. */ u32 bitmap[3]; /* Used to calculate the reply buffer size. */ u32 size[3]; /* Record the output buffer. */ u64 *data; }; static void virtnet_stats_ctx_init(struct virtnet_info *vi, struct virtnet_stats_ctx *ctx, u64 *data, bool to_qstat) { u32 queue_type; ctx->data = data; ctx->to_qstat = to_qstat; if (to_qstat) { ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); queue_type = VIRTNET_Q_TYPE_RX; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); } queue_type = VIRTNET_Q_TYPE_TX; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); } return; } ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { queue_type = VIRTNET_Q_TYPE_CQ; ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); } queue_type = VIRTNET_Q_TYPE_RX; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); } queue_type = VIRTNET_Q_TYPE_TX; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); } } /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. * @sum: the position to store the sum values * @num: field num * @q_value: the first queue fields * @q_num: number of the queues */ static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) { u32 step = num; int i, j; u64 *p; for (i = 0; i < num; ++i) { p = sum + i; *p = 0; for (j = 0; j < q_num; ++j) *p += *(q_value + i + j * step); } } static void virtnet_fill_total_fields(struct virtnet_info *vi, struct virtnet_stats_ctx *ctx) { u64 *data, *first_rx_q, *first_tx_q; u32 num_cq, num_rx, num_tx; num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; first_rx_q = ctx->data + num_rx + num_tx + num_cq; first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; data = ctx->data; stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); data = ctx->data + num_rx; stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); } static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, struct virtnet_stats_ctx *ctx, const u8 *base, bool drv_stats, u8 reply_type) { const struct virtnet_stat_desc *desc; const u64_stats_t *v_stat; u64 offset, bitmap; const __le64 *v; u32 queue_type; int i, num; queue_type = vq_type(vi, qid); bitmap = ctx->bitmap[queue_type]; if (drv_stats) { if (queue_type == VIRTNET_Q_TYPE_RX) { desc = &virtnet_rq_stats_desc_qstat[0]; num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); } else { desc = &virtnet_sq_stats_desc_qstat[0]; num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); } for (i = 0; i < num; ++i) { offset = desc[i].qstat_offset / sizeof(*ctx->data); v_stat = (const u64_stats_t *)(base + desc[i].offset); ctx->data[offset] = u64_stats_read(v_stat); } return; } if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { desc = &virtnet_stats_rx_basic_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) goto found; } if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { desc = &virtnet_stats_rx_csum_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) goto found; } if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { desc = &virtnet_stats_rx_gso_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) goto found; } if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { desc = &virtnet_stats_rx_speed_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) goto found; } if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { desc = &virtnet_stats_tx_basic_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) goto found; } if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { desc = &virtnet_stats_tx_csum_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) goto found; } if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { desc = &virtnet_stats_tx_gso_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) goto found; } if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { desc = &virtnet_stats_tx_speed_desc_qstat[0]; num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) goto found; } return; found: for (i = 0; i < num; ++i) { offset = desc[i].qstat_offset / sizeof(*ctx->data); v = (const __le64 *)(base + desc[i].offset); ctx->data[offset] = le64_to_cpu(*v); } } /* virtnet_fill_stats - copy the stats to qstats or ethtool -S * The stats source is the device or the driver. * * @vi: virtio net info * @qid: the vq id * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) * @base: pointer to the device reply or the driver stats structure. * @drv_stats: designate the base type (device reply, driver stats) * @type: the type of the device reply (if drv_stats is true, this must be zero) */ static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, struct virtnet_stats_ctx *ctx, const u8 *base, bool drv_stats, u8 reply_type) { u32 queue_type, num_rx, num_tx, num_cq; const struct virtnet_stat_desc *desc; const u64_stats_t *v_stat; u64 offset, bitmap; const __le64 *v; int i, num; if (ctx->to_qstat) return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; queue_type = vq_type(vi, qid); bitmap = ctx->bitmap[queue_type]; /* skip the total fields of pairs */ offset = num_rx + num_tx; if (queue_type == VIRTNET_Q_TYPE_TX) { offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); num = ARRAY_SIZE(virtnet_sq_stats_desc); if (drv_stats) { desc = &virtnet_sq_stats_desc[0]; goto drv_stats; } offset += num; } else if (queue_type == VIRTNET_Q_TYPE_RX) { offset += num_cq + num_rx * (qid / 2); num = ARRAY_SIZE(virtnet_rq_stats_desc); if (drv_stats) { desc = &virtnet_rq_stats_desc[0]; goto drv_stats; } offset += num; } if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { desc = &virtnet_stats_cvq_desc[0]; num = ARRAY_SIZE(virtnet_stats_cvq_desc); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) goto found; offset += num; } if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { desc = &virtnet_stats_rx_basic_desc[0]; num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) goto found; offset += num; } if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { desc = &virtnet_stats_rx_csum_desc[0]; num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) goto found; offset += num; } if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { desc = &virtnet_stats_rx_speed_desc[0]; num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) goto found; offset += num; } if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { desc = &virtnet_stats_tx_basic_desc[0]; num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) goto found; offset += num; } if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { desc = &virtnet_stats_tx_gso_desc[0]; num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) goto found; offset += num; } if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { desc = &virtnet_stats_tx_speed_desc[0]; num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) goto found; offset += num; } return; found: for (i = 0; i < num; ++i) { v = (const __le64 *)(base + desc[i].offset); ctx->data[offset + i] = le64_to_cpu(*v); } return; drv_stats: for (i = 0; i < num; ++i) { v_stat = (const u64_stats_t *)(base + desc[i].offset); ctx->data[offset + i] = u64_stats_read(v_stat); } } static int __virtnet_get_hw_stats(struct virtnet_info *vi, struct virtnet_stats_ctx *ctx, struct virtio_net_ctrl_queue_stats *req, int req_size, void *reply, int res_size) { struct virtio_net_stats_reply_hdr *hdr; struct scatterlist sgs_in, sgs_out; void *p; u32 qid; int ok; sg_init_one(&sgs_out, req, req_size); sg_init_one(&sgs_in, reply, res_size); ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, VIRTIO_NET_CTRL_STATS_GET, &sgs_out, &sgs_in); if (!ok) return ok; for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { hdr = p; qid = le16_to_cpu(hdr->vq_index); virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); } return 0; } static void virtnet_make_stat_req(struct virtnet_info *vi, struct virtnet_stats_ctx *ctx, struct virtio_net_ctrl_queue_stats *req, int qid, int *idx) { int qtype = vq_type(vi, qid); u64 bitmap = ctx->bitmap[qtype]; if (!bitmap) return; req->stats[*idx].vq_index = cpu_to_le16(qid); req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); *idx += 1; } /* qid: -1: get stats of all vq. * > 0: get the stats for the special vq. This must not be cvq. */ static int virtnet_get_hw_stats(struct virtnet_info *vi, struct virtnet_stats_ctx *ctx, int qid) { int qnum, i, j, res_size, qtype, last_vq, first_vq; struct virtio_net_ctrl_queue_stats *req; bool enable_cvq; void *reply; int ok; if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) return 0; if (qid == -1) { last_vq = vi->curr_queue_pairs * 2 - 1; first_vq = 0; enable_cvq = true; } else { last_vq = qid; first_vq = qid; enable_cvq = false; } qnum = 0; res_size = 0; for (i = first_vq; i <= last_vq ; ++i) { qtype = vq_type(vi, i); if (ctx->bitmap[qtype]) { ++qnum; res_size += ctx->size[qtype]; } } if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; qnum += 1; } req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; reply = kmalloc(res_size, GFP_KERNEL); if (!reply) { kfree(req); return -ENOMEM; } j = 0; for (i = first_vq; i <= last_vq ; ++i) virtnet_make_stat_req(vi, ctx, req, i, &j); if (enable_cvq) virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); kfree(req); kfree(reply); return ok; } static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct virtnet_info *vi = netdev_priv(dev); unsigned int i; u8 *p = data; switch (stringset) { case ETH_SS_STATS: /* Generate the total field names. */ virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); for (i = 0; i < vi->curr_queue_pairs; ++i) virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); for (i = 0; i < vi->curr_queue_pairs; ++i) virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); break; } } static int virtnet_get_sset_count(struct net_device *dev, int sset) { struct virtnet_info *vi = netdev_priv(dev); struct virtnet_stats_ctx ctx = {0}; u32 pair_count; switch (sset) { case ETH_SS_STATS: virtnet_stats_ctx_init(vi, &ctx, NULL, false); pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + vi->curr_queue_pairs * pair_count; default: return -EOPNOTSUPP; } } static void virtnet_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct virtnet_info *vi = netdev_priv(dev); struct virtnet_stats_ctx ctx = {0}; unsigned int start, i; const u8 *stats_base; virtnet_stats_ctx_init(vi, &ctx, data, false); if (virtnet_get_hw_stats(vi, &ctx, -1)) dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); for (i = 0; i < vi->curr_queue_pairs; i++) { struct receive_queue *rq = &vi->rq[i]; struct send_queue *sq = &vi->sq[i]; stats_base = (const u8 *)&rq->stats; do { start = u64_stats_fetch_begin(&rq->stats.syncp); virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); stats_base = (const u8 *)&sq->stats; do { start = u64_stats_fetch_begin(&sq->stats.syncp); virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); } virtnet_fill_total_fields(vi, &ctx); } static void virtnet_get_channels(struct net_device *dev, struct ethtool_channels *channels) { struct virtnet_info *vi = netdev_priv(dev); channels->combined_count = vi->curr_queue_pairs; channels->max_combined = vi->max_queue_pairs; channels->max_other = 0; channels->rx_count = 0; channels->tx_count = 0; channels->other_count = 0; } static int virtnet_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { struct virtnet_info *vi = netdev_priv(dev); return ethtool_virtdev_set_link_ksettings(dev, cmd, &vi->speed, &vi->duplex); } static int virtnet_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct virtnet_info *vi = netdev_priv(dev); cmd->base.speed = vi->speed; cmd->base.duplex = vi->duplex; cmd->base.port = PORT_OTHER; return 0; } static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, struct ethtool_coalesce *ec) { struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; struct scatterlist sgs_tx; int i; coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); if (!coal_tx) return -ENOMEM; coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, &sgs_tx)) return -EINVAL; vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; for (i = 0; i < vi->max_queue_pairs; i++) { vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; } return 0; } static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, struct ethtool_coalesce *ec) { struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; struct scatterlist sgs_rx; int i; if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) return -EOPNOTSUPP; if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) return -EINVAL; if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { vi->rx_dim_enabled = true; for (i = 0; i < vi->max_queue_pairs; i++) { mutex_lock(&vi->rq[i].dim_lock); vi->rq[i].dim_enabled = true; mutex_unlock(&vi->rq[i].dim_lock); } return 0; } coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); if (!coal_rx) return -ENOMEM; if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { vi->rx_dim_enabled = false; for (i = 0; i < vi->max_queue_pairs; i++) { mutex_lock(&vi->rq[i].dim_lock); vi->rq[i].dim_enabled = false; mutex_unlock(&vi->rq[i].dim_lock); } } /* Since the per-queue coalescing params can be set, * we need apply the global new params even if they * are not updated. */ coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, &sgs_rx)) return -EINVAL; vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; for (i = 0; i < vi->max_queue_pairs; i++) { mutex_lock(&vi->rq[i].dim_lock); vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; mutex_unlock(&vi->rq[i].dim_lock); } return 0; } static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, struct ethtool_coalesce *ec) { int err; err = virtnet_send_tx_notf_coal_cmds(vi, ec); if (err) return err; err = virtnet_send_rx_notf_coal_cmds(vi, ec); if (err) return err; return 0; } static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, struct ethtool_coalesce *ec, u16 queue) { bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; u32 max_usecs, max_packets; bool cur_rx_dim; int err; mutex_lock(&vi->rq[queue].dim_lock); cur_rx_dim = vi->rq[queue].dim_enabled; max_usecs = vi->rq[queue].intr_coal.max_usecs; max_packets = vi->rq[queue].intr_coal.max_packets; if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || ec->rx_max_coalesced_frames != max_packets)) { mutex_unlock(&vi->rq[queue].dim_lock); return -EINVAL; } if (rx_ctrl_dim_on && !cur_rx_dim) { vi->rq[queue].dim_enabled = true; mutex_unlock(&vi->rq[queue].dim_lock); return 0; } if (!rx_ctrl_dim_on && cur_rx_dim) vi->rq[queue].dim_enabled = false; /* If no params are updated, userspace ethtool will * reject the modification. */ err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, ec->rx_coalesce_usecs, ec->rx_max_coalesced_frames); mutex_unlock(&vi->rq[queue].dim_lock); return err; } static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, struct ethtool_coalesce *ec, u16 queue) { int err; err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); if (err) return err; err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, ec->tx_coalesce_usecs, ec->tx_max_coalesced_frames); if (err) return err; return 0; } static void virtnet_rx_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); struct receive_queue *rq = container_of(dim, struct receive_queue, dim); struct virtnet_info *vi = rq->vq->vdev->priv; struct net_device *dev = vi->dev; struct dim_cq_moder update_moder; int qnum, err; qnum = rq - vi->rq; mutex_lock(&rq->dim_lock); if (!rq->dim_enabled) goto out; update_moder = net_dim_get_rx_irq_moder(dev, dim); if (update_moder.usec != rq->intr_coal.max_usecs || update_moder.pkts != rq->intr_coal.max_packets) { err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, update_moder.usec, update_moder.pkts); if (err) pr_debug("%s: Failed to send dim parameters on rxq%d\n", dev->name, qnum); } out: dim->state = DIM_START_MEASURE; mutex_unlock(&rq->dim_lock); } static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) { /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. */ if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) return -EOPNOTSUPP; if (ec->tx_max_coalesced_frames > 1 || ec->rx_max_coalesced_frames != 1) return -EINVAL; return 0; } static int virtnet_should_update_vq_weight(int dev_flags, int weight, int vq_weight, bool *should_update) { if (weight ^ vq_weight) { if (dev_flags & IFF_UP) return -EBUSY; *should_update = true; } return 0; } static int virtnet_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); int ret, queue_number, napi_weight; bool update_napi = false; /* Can't change NAPI weight if the link is up */ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, vi->sq[queue_number].napi.weight, &update_napi); if (ret) return ret; if (update_napi) { /* All queues that belong to [queue_number, vi->max_queue_pairs] will be * updated for the sake of simplicity, which might not be necessary */ break; } } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) ret = virtnet_send_notf_coal_cmds(vi, ec); else ret = virtnet_coal_params_supported(ec); if (ret) return ret; if (update_napi) { for (; queue_number < vi->max_queue_pairs; queue_number++) vi->sq[queue_number].napi.weight = napi_weight; } return ret; } static int virtnet_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; } else { ec->rx_max_coalesced_frames = 1; if (vi->sq[0].napi.weight) ec->tx_max_coalesced_frames = 1; } return 0; } static int virtnet_set_per_queue_coalesce(struct net_device *dev, u32 queue, struct ethtool_coalesce *ec) { struct virtnet_info *vi = netdev_priv(dev); int ret, napi_weight; bool update_napi = false; if (queue >= vi->max_queue_pairs) return -EINVAL; /* Can't change NAPI weight if the link is up */ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, vi->sq[queue].napi.weight, &update_napi); if (ret) return ret; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); else ret = virtnet_coal_params_supported(ec); if (ret) return ret; if (update_napi) vi->sq[queue].napi.weight = napi_weight; return 0; } static int virtnet_get_per_queue_coalesce(struct net_device *dev, u32 queue, struct ethtool_coalesce *ec) { struct virtnet_info *vi = netdev_priv(dev); if (queue >= vi->max_queue_pairs) return -EINVAL; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { mutex_lock(&vi->rq[queue].dim_lock); ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; mutex_unlock(&vi->rq[queue].dim_lock); } else { ec->rx_max_coalesced_frames = 1; if (vi->sq[queue].napi.weight) ec->tx_max_coalesced_frames = 1; } return 0; } static void virtnet_init_settings(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); vi->speed = SPEED_UNKNOWN; vi->duplex = DUPLEX_UNKNOWN; } static u32 virtnet_get_rxfh_key_size(struct net_device *dev) { return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; } static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) { return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; } static int virtnet_get_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh) { struct virtnet_info *vi = netdev_priv(dev); int i; if (rxfh->indir) { for (i = 0; i < vi->rss_indir_table_size; ++i) rxfh->indir[i] = vi->rss.indirection_table[i]; } if (rxfh->key) memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); rxfh->hfunc = ETH_RSS_HASH_TOP; return 0; } static int virtnet_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); bool update = false; int i; if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; if (rxfh->indir) { if (!vi->has_rss) return -EOPNOTSUPP; for (i = 0; i < vi->rss_indir_table_size; ++i) vi->rss.indirection_table[i] = rxfh->indir[i]; update = true; } if (rxfh->key) { /* If either _F_HASH_REPORT or _F_RSS are negotiated, the * device provides hash calculation capabilities, that is, * hash_key is configured. */ if (!vi->has_rss && !vi->has_rss_hash_report) return -EOPNOTSUPP; memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); update = true; } if (update) virtnet_commit_rss_command(vi); return 0; } static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { struct virtnet_info *vi = netdev_priv(dev); int rc = 0; switch (info->cmd) { case ETHTOOL_GRXRINGS: info->data = vi->curr_queue_pairs; break; case ETHTOOL_GRXFH: virtnet_get_hashflow(vi, info); break; default: rc = -EOPNOTSUPP; } return rc; } static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) { struct virtnet_info *vi = netdev_priv(dev); int rc = 0; switch (info->cmd) { case ETHTOOL_SRXFH: if (!virtnet_set_hashflow(vi, info)) rc = -EINVAL; break; default: rc = -EOPNOTSUPP; } return rc; } static const struct ethtool_ops virtnet_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = virtnet_get_ringparam, .set_ringparam = virtnet_set_ringparam, .get_strings = virtnet_get_strings, .get_sset_count = virtnet_get_sset_count, .get_ethtool_stats = virtnet_get_ethtool_stats, .set_channels = virtnet_set_channels, .get_channels = virtnet_get_channels, .get_ts_info = ethtool_op_get_ts_info, .get_link_ksettings = virtnet_get_link_ksettings, .set_link_ksettings = virtnet_set_link_ksettings, .set_coalesce = virtnet_set_coalesce, .get_coalesce = virtnet_get_coalesce, .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, .get_rxfh_key_size = virtnet_get_rxfh_key_size, .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, .get_rxfh = virtnet_get_rxfh, .set_rxfh = virtnet_set_rxfh, .get_rxnfc = virtnet_get_rxnfc, .set_rxnfc = virtnet_set_rxnfc, }; static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, struct netdev_queue_stats_rx *stats) { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = &vi->rq[i]; struct virtnet_stats_ctx ctx = {0}; virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); virtnet_get_hw_stats(vi, &ctx, i * 2); virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); } static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, struct netdev_queue_stats_tx *stats) { struct virtnet_info *vi = netdev_priv(dev); struct send_queue *sq = &vi->sq[i]; struct virtnet_stats_ctx ctx = {0}; virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); } static void virtnet_get_base_stats(struct net_device *dev, struct netdev_queue_stats_rx *rx, struct netdev_queue_stats_tx *tx) { struct virtnet_info *vi = netdev_priv(dev); /* The queue stats of the virtio-net will not be reset. So here we * return 0. */ rx->bytes = 0; rx->packets = 0; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { rx->hw_drops = 0; rx->hw_drop_overruns = 0; } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { rx->csum_unnecessary = 0; rx->csum_none = 0; rx->csum_bad = 0; } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { rx->hw_gro_packets = 0; rx->hw_gro_bytes = 0; rx->hw_gro_wire_packets = 0; rx->hw_gro_wire_bytes = 0; } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) rx->hw_drop_ratelimits = 0; tx->bytes = 0; tx->packets = 0; tx->stop = 0; tx->wake = 0; if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { tx->hw_drops = 0; tx->hw_drop_errors = 0; } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { tx->csum_none = 0; tx->needs_csum = 0; } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { tx->hw_gso_packets = 0; tx->hw_gso_bytes = 0; tx->hw_gso_wire_packets = 0; tx->hw_gso_wire_bytes = 0; } if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) tx->hw_drop_ratelimits = 0; } static const struct netdev_stat_ops virtnet_stat_ops = { .get_queue_stats_rx = virtnet_get_queue_stats_rx, .get_queue_stats_tx = virtnet_get_queue_stats_tx, .get_base_stats = virtnet_get_base_stats, }; static void virtnet_freeze_down(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); disable_rx_mode_work(vi); flush_work(&vi->rx_mode_work); netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); netif_tx_unlock_bh(vi->dev); if (netif_running(vi->dev)) virtnet_close(vi->dev); } static int init_vqs(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; int err; err = init_vqs(vi); if (err) return err; virtio_device_ready(vdev); enable_delayed_refill(vi); enable_rx_mode_work(vi); if (netif_running(vi->dev)) { err = virtnet_open(vi->dev); if (err) return err; } netif_tx_lock_bh(vi->dev); netif_device_attach(vi->dev); netif_tx_unlock_bh(vi->dev); return err; } static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) { __virtio64 *_offloads __free(kfree) = NULL; struct scatterlist sg; _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); if (!_offloads) return -ENOMEM; *_offloads = cpu_to_virtio64(vi->vdev, offloads); sg_init_one(&sg, _offloads, sizeof(*_offloads)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); return -EINVAL; } return 0; } static int virtnet_clear_guest_offloads(struct virtnet_info *vi) { u64 offloads = 0; if (!vi->guest_offloads) return 0; return virtnet_set_guest_offloads(vi, offloads); } static int virtnet_restore_guest_offloads(struct virtnet_info *vi) { u64 offloads = vi->guest_offloads; if (!vi->guest_offloads) return 0; return virtnet_set_guest_offloads(vi, offloads); } static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, struct xsk_buff_pool *pool) { int err, qindex; qindex = rq - vi->rq; if (pool) { err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); if (err < 0) return err; err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, MEM_TYPE_XSK_BUFF_POOL, NULL); if (err < 0) goto unreg; xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); } virtnet_rx_pause(vi, rq); err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf); if (err) { netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); pool = NULL; } rq->xsk_pool = pool; virtnet_rx_resume(vi, rq); if (pool) return 0; unreg: xdp_rxq_info_unreg(&rq->xsk_rxq_info); return err; } static int virtnet_xsk_pool_enable(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq; struct device *dma_dev; struct send_queue *sq; int err, size; if (vi->hdr_len > xsk_pool_get_headroom(pool)) return -EINVAL; /* In big_packets mode, xdp cannot work, so there is no need to * initialize xsk of rq. */ if (vi->big_packets && !vi->mergeable_rx_bufs) return -ENOENT; if (qid >= vi->curr_queue_pairs) return -EINVAL; sq = &vi->sq[qid]; rq = &vi->rq[qid]; /* xsk assumes that tx and rx must have the same dma device. The af-xdp * may use one buffer to receive from the rx and reuse this buffer to * send by the tx. So the dma dev of sq and rq must be the same one. * * But vq->dma_dev allows every vq has the respective dma dev. So I * check the dma dev of vq and sq is the same dev. */ if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) return -EINVAL; dma_dev = virtqueue_dma_dev(rq->vq); if (!dma_dev) return -EINVAL; size = virtqueue_get_vring_size(rq->vq); rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); if (!rq->xsk_buffs) return -ENOMEM; err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) goto err_xsk_map; err = virtnet_rq_bind_xsk_pool(vi, rq, pool); if (err) goto err_rq; return 0; err_rq: xsk_pool_dma_unmap(pool, 0); err_xsk_map: return err; } static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) { struct virtnet_info *vi = netdev_priv(dev); struct xsk_buff_pool *pool; struct receive_queue *rq; int err; if (qid >= vi->curr_queue_pairs) return -EINVAL; rq = &vi->rq[qid]; pool = rq->xsk_pool; err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); xsk_pool_dma_unmap(pool, 0); kvfree(rq->xsk_buffs); return err; } static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) { if (xdp->xsk.pool) return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); else return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); } static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + sizeof(struct skb_shared_info)); unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; struct virtnet_info *vi = netdev_priv(dev); struct bpf_prog *old_prog; u16 xdp_qp = 0, curr_qp; int i, err; if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); return -EOPNOTSUPP; } if (vi->mergeable_rx_bufs && !vi->any_header_sg) { NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); return -EINVAL; } if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); return -EINVAL; } curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; if (prog) xdp_qp = nr_cpu_ids; /* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", curr_qp + xdp_qp, vi->max_queue_pairs); xdp_qp = 0; } old_prog = rtnl_dereference(vi->rq[0].xdp_prog); if (!prog && !old_prog) return 0; if (prog) bpf_prog_add(prog, vi->max_queue_pairs - 1); /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { for (i = 0; i < vi->max_queue_pairs; i++) { napi_disable(&vi->rq[i].napi); virtnet_napi_tx_disable(&vi->sq[i].napi); } } if (!prog) { for (i = 0; i < vi->max_queue_pairs; i++) { rcu_assign_pointer(vi->rq[i].xdp_prog, prog); if (i == 0) virtnet_restore_guest_offloads(vi); } synchronize_net(); } err = virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp; if (prog) { vi->xdp_enabled = true; for (i = 0; i < vi->max_queue_pairs; i++) { rcu_assign_pointer(vi->rq[i].xdp_prog, prog); if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } if (!old_prog) xdp_features_set_redirect_target(dev, true); } else { xdp_features_clear_redirect_target(dev); vi->xdp_enabled = false; } for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); if (netif_running(dev)) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); } } return 0; err: if (!prog) { virtnet_clear_guest_offloads(vi); for (i = 0; i < vi->max_queue_pairs; i++) rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); } if (netif_running(dev)) { for (i = 0; i < vi->max_queue_pairs; i++) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; } static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return virtnet_xdp_set(dev, xdp->prog, xdp->extack); case XDP_SETUP_XSK_POOL: return virtnet_xsk_pool_setup(dev, xdp); default: return -EINVAL; } } static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { struct virtnet_info *vi = netdev_priv(dev); int ret; if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) return -EOPNOTSUPP; ret = snprintf(buf, len, "sby"); if (ret >= len) return -EOPNOTSUPP; return 0; } static int virtnet_set_features(struct net_device *dev, netdev_features_t features) { struct virtnet_info *vi = netdev_priv(dev); u64 offloads; int err; if ((dev->features ^ features) & NETIF_F_GRO_HW) { if (vi->xdp_enabled) return -EBUSY; if (features & NETIF_F_GRO_HW) offloads = vi->guest_offloads_capable; else offloads = vi->guest_offloads_capable & ~GUEST_OFFLOAD_GRO_HW_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) return err; vi->guest_offloads = offloads; } if ((dev->features ^ features) & NETIF_F_RXHASH) { if (features & NETIF_F_RXHASH) vi->rss.hash_types = vi->rss_hash_types_saved; else vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; if (!virtnet_commit_rss_command(vi)) return -EINVAL; } return 0; } static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct virtnet_info *priv = netdev_priv(dev); struct send_queue *sq = &priv->sq[txqueue]; struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.tx_timeouts); u64_stats_update_end(&sq->stats.syncp); netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", txqueue, sq->name, sq->vq->index, sq->vq->name, jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); } static int virtnet_init_irq_moder(struct virtnet_info *vi) { u8 profile_flags = 0, coal_flags = 0; int ret, i; profile_flags |= DIM_PROFILE_RX; coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, DIM_CQ_PERIOD_MODE_START_FROM_EQE, 0, virtnet_rx_dim_work, NULL); if (ret) return ret; for (i = 0; i < vi->max_queue_pairs; i++) net_dim_setting(vi->dev, &vi->rq[i].dim, false); return 0; } static void virtnet_free_irq_moder(struct virtnet_info *vi) { if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) return; rtnl_lock(); net_dim_free_irq_moder(vi->dev); rtnl_unlock(); } static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, .ndo_start_xmit = start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = virtnet_set_mac_address, .ndo_set_rx_mode = virtnet_set_rx_mode, .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, .ndo_bpf = virtnet_xdp, .ndo_xdp_xmit = virtnet_xdp_xmit, .ndo_xsk_wakeup = virtnet_xsk_wakeup, .ndo_features_check = passthru_features_check, .ndo_get_phys_port_name = virtnet_get_phys_port_name, .ndo_set_features = virtnet_set_features, .ndo_tx_timeout = virtnet_tx_timeout, }; static void virtnet_config_changed_work(struct work_struct *work) { struct virtnet_info *vi = container_of(work, struct virtnet_info, config_work); u16 v; if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, struct virtio_net_config, status, &v) < 0) return; if (v & VIRTIO_NET_S_ANNOUNCE) { netdev_notify_peers(vi->dev); virtnet_ack_link_announce(vi); } /* Ignore unknown (future) status bits */ v &= VIRTIO_NET_S_LINK_UP; if (vi->status == v) return; vi->status = v; if (vi->status & VIRTIO_NET_S_LINK_UP) { virtnet_update_settings(vi); netif_carrier_on(vi->dev); netif_tx_wake_all_queues(vi->dev); } else { netif_carrier_off(vi->dev); netif_tx_stop_all_queues(vi->dev); } } static void virtnet_config_changed(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; schedule_work(&vi->config_work); } static void virtnet_free_queues(struct virtnet_info *vi) { int i; for (i = 0; i < vi->max_queue_pairs; i++) { __netif_napi_del(&vi->rq[i].napi); __netif_napi_del(&vi->sq[i].napi); } /* We called __netif_napi_del(), * we need to respect an RCU grace period before freeing vi->rq */ synchronize_net(); kfree(vi->rq); kfree(vi->sq); kfree(vi->ctrl); } static void _free_receive_bufs(struct virtnet_info *vi) { struct bpf_prog *old_prog; int i; for (i = 0; i < vi->max_queue_pairs; i++) { while (vi->rq[i].pages) __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); old_prog = rtnl_dereference(vi->rq[i].xdp_prog); RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); if (old_prog) bpf_prog_put(old_prog); } } static void free_receive_bufs(struct virtnet_info *vi) { rtnl_lock(); _free_receive_bufs(vi); rtnl_unlock(); } static void free_receive_page_frags(struct virtnet_info *vi) { int i; for (i = 0; i < vi->max_queue_pairs; i++) if (vi->rq[i].alloc_frag.page) { if (vi->rq[i].do_dma && vi->rq[i].last_dma) virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); put_page(vi->rq[i].alloc_frag.page); } } static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) { if (!is_xdp_frame(buf)) dev_kfree_skb(buf); else xdp_return_frame(ptr_to_xdp(buf)); } static void free_unused_bufs(struct virtnet_info *vi) { void *buf; int i; for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) virtnet_sq_free_unused_buf(vq, buf); cond_resched(); } for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) virtnet_rq_unmap_free_buf(vq, buf); cond_resched(); } } static void virtnet_del_vqs(struct virtnet_info *vi) { struct virtio_device *vdev = vi->vdev; virtnet_clean_affinity(vi); vdev->config->del_vqs(vdev); virtnet_free_queues(vi); } /* How large should a single buffer be so a queue full of these can fit at * least one full packet? * Logic below assumes the mergeable buffer header is used. */ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) { const unsigned int hdr_len = vi->hdr_len; unsigned int rq_size = virtqueue_get_vring_size(vq); unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); return max(max(min_buf_len, hdr_len) - hdr_len, (unsigned int)GOOD_PACKET_LEN); } static int virtnet_find_vqs(struct virtnet_info *vi) { struct virtqueue_info *vqs_info; struct virtqueue **vqs; int ret = -ENOMEM; int total_vqs; bool *ctx; u16 i; /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by * possible control vq. */ total_vqs = vi->max_queue_pairs * 2 + virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); /* Allocate space for find_vqs parameters */ vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); if (!vqs) goto err_vq; vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); if (!vqs_info) goto err_vqs_info; if (!vi->big_packets || vi->mergeable_rx_bufs) { ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); if (!ctx) goto err_ctx; } else { ctx = NULL; } /* Parameters for control virtqueue, if any */ if (vi->has_cvq) { vqs_info[total_vqs - 1].name = "control"; } /* Allocate/initialize parameters for send/receive virtqueues */ for (i = 0; i < vi->max_queue_pairs; i++) { vqs_info[rxq2vq(i)].callback = skb_recv_done; vqs_info[txq2vq(i)].callback = skb_xmit_done; sprintf(vi->rq[i].name, "input.%u", i); sprintf(vi->sq[i].name, "output.%u", i); vqs_info[rxq2vq(i)].name = vi->rq[i].name; vqs_info[txq2vq(i)].name = vi->sq[i].name; if (ctx) vqs_info[rxq2vq(i)].ctx = true; } ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); if (ret) goto err_find; if (vi->has_cvq) { vi->cvq = vqs[total_vqs - 1]; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } for (i = 0; i < vi->max_queue_pairs; i++) { vi->rq[i].vq = vqs[rxq2vq(i)]; vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); vi->sq[i].vq = vqs[txq2vq(i)]; } /* run here: ret == 0. */ err_find: kfree(ctx); err_ctx: kfree(vqs_info); err_vqs_info: kfree(vqs); err_vq: return ret; } static int virtnet_alloc_queues(struct virtnet_info *vi) { int i; if (vi->has_cvq) { vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); if (!vi->ctrl) goto err_ctrl; } else { vi->ctrl = NULL; } vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); if (!vi->sq) goto err_sq; vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); if (!vi->rq) goto err_rq; INIT_DELAYED_WORK(&vi->refill, refill_work); for (i = 0; i < vi->max_queue_pairs; i++) { vi->rq[i].pages = NULL; netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight); netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, virtnet_poll_tx, napi_tx ? napi_weight : 0); sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); u64_stats_init(&vi->rq[i].stats.syncp); u64_stats_init(&vi->sq[i].stats.syncp); mutex_init(&vi->rq[i].dim_lock); } return 0; err_rq: kfree(vi->sq); err_sq: kfree(vi->ctrl); err_ctrl: return -ENOMEM; } static int init_vqs(struct virtnet_info *vi) { int ret; /* Allocate send & receive queues */ ret = virtnet_alloc_queues(vi); if (ret) goto err; ret = virtnet_find_vqs(vi); if (ret) goto err_free; cpus_read_lock(); virtnet_set_affinity(vi); cpus_read_unlock(); return 0; err_free: virtnet_free_queues(vi); err: return ret; } #ifdef CONFIG_SYSFS static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, char *buf) { struct virtnet_info *vi = netdev_priv(queue->dev); unsigned int queue_index = get_netdev_rx_queue_index(queue); unsigned int headroom = virtnet_get_headroom(vi); unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; struct ewma_pkt_len *avg; BUG_ON(queue_index >= vi->max_queue_pairs); avg = &vi->rq[queue_index].mrg_avg_pkt_len; return sprintf(buf, "%u\n", get_mergeable_buf_len(&vi->rq[queue_index], avg, SKB_DATA_ALIGN(headroom + tailroom))); } static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = __ATTR_RO(mergeable_rx_buffer_size); static struct attribute *virtio_net_mrg_rx_attrs[] = { &mergeable_rx_buffer_size_attribute.attr, NULL }; static const struct attribute_group virtio_net_mrg_rx_group = { .name = "virtio_net", .attrs = virtio_net_mrg_rx_attrs }; #endif static bool virtnet_fail_on_feature(struct virtio_device *vdev, unsigned int fbit, const char *fname, const char *dname) { if (!virtio_has_feature(vdev, fbit)) return false; dev_err(&vdev->dev, "device advertises feature %s but not %s", fname, dname); return true; } #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) static bool virtnet_validate_features(struct virtio_device *vdev) { if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, "VIRTIO_NET_F_CTRL_VQ"))) { return false; } return true; } #define MIN_MTU ETH_MIN_MTU #define MAX_MTU ETH_MAX_MTU static int virtnet_validate(struct virtio_device *vdev) { if (!vdev->config->get) { dev_err(&vdev->dev, "%s failure: config access disabled\n", __func__); return -EINVAL; } if (!virtnet_validate_features(vdev)) return -EINVAL; if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { int mtu = virtio_cread16(vdev, offsetof(struct virtio_net_config, mtu)); if (mtu < MIN_MTU) __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); } if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); } return 0; } static bool virtnet_check_guest_gso(const struct virtnet_info *vi) { return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); } static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) { bool guest_gso = virtnet_check_guest_gso(vi); /* If device can receive ANY guest GSO packets, regardless of mtu, * allocate packets of maximum size, otherwise limit it to only * mtu size worth only. */ if (mtu > ETH_DATA_LEN || guest_gso) { vi->big_packets = true; vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); } } #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 static enum xdp_rss_hash_type virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX }; static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, enum xdp_rss_hash_type *rss_type) { const struct xdp_buff *xdp = (void *)_ctx; struct virtio_net_hdr_v1_hash *hdr_hash; struct virtnet_info *vi; u16 hash_report; if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) return -ENODATA; vi = netdev_priv(xdp->rxq->dev); hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); hash_report = __le16_to_cpu(hdr_hash->hash_report); if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) hash_report = VIRTIO_NET_HASH_REPORT_NONE; *rss_type = virtnet_xdp_rss_type[hash_report]; *hash = __le32_to_cpu(hdr_hash->hash_value); return 0; } static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { .xmo_rx_hash = virtnet_xdp_rx_hash, }; static int virtnet_probe(struct virtio_device *vdev) { int i, err = -ENOMEM; struct net_device *dev; struct virtnet_info *vi; u16 max_queue_pairs; int mtu = 0; /* Find if host supports multiqueue/rss virtio_net device */ max_queue_pairs = 1; if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) max_queue_pairs = virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); /* We need at least 2 queue's */ if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) max_queue_pairs = 1; /* Allocate ourselves a network device with room for our info */ dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); if (!dev) return -ENOMEM; /* Set up network device as normal. */ dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | IFF_TX_SKB_NO_LINEAR; dev->netdev_ops = &virtnet_netdev; dev->stat_ops = &virtnet_stat_ops; dev->features = NETIF_F_HIGHDMA; dev->ethtool_ops = &virtnet_ethtool_ops; SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { /* This opens up the world of extra features. */ dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (csum) dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6; } /* Individual feature bits: what can host handle? */ if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) dev->hw_features |= NETIF_F_TSO; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) dev->hw_features |= NETIF_F_TSO6; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) dev->hw_features |= NETIF_F_TSO_ECN; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) dev->hw_features |= NETIF_F_GSO_UDP_L4; dev->features |= NETIF_F_GSO_ROBUST; if (gso) dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't * need to calculate checksums for partially checksummed packets, * as they're considered valid by the upper layer. * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only * receives fully checksummed packets. The device may assist in * validating these packets' checksums, so the driver won't have to. */ dev->features |= NETIF_F_RXCSUM; if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) dev->features |= NETIF_F_GRO_HW; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; /* MTU range: 68 - 65535 */ dev->min_mtu = MIN_MTU; dev->max_mtu = MAX_MTU; /* Configuration may specify what MAC to use. Otherwise random. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { u8 addr[ETH_ALEN]; virtio_cread_bytes(vdev, offsetof(struct virtio_net_config, mac), addr, ETH_ALEN); eth_hw_addr_set(dev, addr); } else { eth_hw_addr_random(dev); dev_info(&vdev->dev, "Assigned random MAC address %pM\n", dev->dev_addr); } /* Set up our device-specific information */ vi = netdev_priv(dev); vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; INIT_WORK(&vi->config_work, virtnet_config_changed_work); INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); spin_lock_init(&vi->refill_lock); if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { vi->mergeable_rx_bufs = true; dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; } if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { vi->has_rss = true; vi->rss_indir_table_size = virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); } if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); vi->rss_hash_types_supported = virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); vi->rss_hash_types_supported &= ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); dev->hw_features |= NETIF_F_RXHASH; dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; } if (vi->has_rss_hash_report) vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); else vi->hdr_len = sizeof(struct virtio_net_hdr); if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->any_header_sg = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) vi->has_cvq = true; mutex_init(&vi->cvq_lock); if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { mtu = virtio_cread16(vdev, offsetof(struct virtio_net_config, mtu)); if (mtu < dev->min_mtu) { /* Should never trigger: MTU was previously validated * in virtnet_validate. */ dev_err(&vdev->dev, "device MTU appears to have changed it is now %d < %d", mtu, dev->min_mtu); err = -EINVAL; goto free; } dev->mtu = mtu; dev->max_mtu = mtu; } virtnet_set_big_packets(vi, mtu); if (vi->any_header_sg) dev->needed_headroom = vi->hdr_len; /* Enable multiqueue by default */ if (num_online_cpus() >= max_queue_pairs) vi->curr_queue_pairs = max_queue_pairs; else vi->curr_queue_pairs = num_online_cpus(); vi->max_queue_pairs = max_queue_pairs; /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) goto free; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { vi->intr_coal_rx.max_usecs = 0; vi->intr_coal_tx.max_usecs = 0; vi->intr_coal_rx.max_packets = 0; /* Keep the default values of the coalescing parameters * aligned with the default napi_tx state. */ if (vi->sq[0].napi.weight) vi->intr_coal_tx.max_packets = 1; else vi->intr_coal_tx.max_packets = 0; } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ for (i = 0; i < vi->max_queue_pairs; i++) if (vi->sq[i].napi.weight) vi->sq[i].intr_coal.max_packets = 1; err = virtnet_init_irq_moder(vi); if (err) goto free; } #ifdef CONFIG_SYSFS if (vi->mergeable_rx_bufs) dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; #endif netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); virtnet_init_settings(dev); if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { vi->failover = net_failover_create(vi->dev); if (IS_ERR(vi->failover)) { err = PTR_ERR(vi->failover); goto free_vqs; } } if (vi->has_rss || vi->has_rss_hash_report) virtnet_init_default_rss(vi); enable_rx_mode_work(vi); /* serialize netdev register + virtio_device_ready() with ndo_open() */ rtnl_lock(); err = register_netdevice(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); rtnl_unlock(); goto free_failover; } /* Disable config change notification until ndo_open. */ virtio_config_driver_disable(vi->vdev); virtio_device_ready(vdev); virtnet_set_queues(vi, vi->curr_queue_pairs); /* a random MAC address has been assigned, notify the device. * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there * because many devices work fine without getting MAC explicitly */ if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { struct scatterlist sg; sg_init_one(&sg, dev->dev_addr, dev->addr_len); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { pr_debug("virtio_net: setting MAC address failed\n"); rtnl_unlock(); err = -EINVAL; goto free_unregister_netdev; } } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; struct scatterlist sg; __le64 v; stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); if (!stats_cap) { rtnl_unlock(); err = -ENOMEM; goto free_unregister_netdev; } sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, VIRTIO_NET_CTRL_STATS_QUERY, NULL, &sg)) { pr_debug("virtio_net: fail to get stats capability\n"); rtnl_unlock(); err = -EINVAL; goto free_unregister_netdev; } v = stats_cap->supported_stats_types[0]; vi->device_stats_cap = le64_to_cpu(v); } /* Assume link up if device can't report link status, otherwise get link status from config. */ netif_carrier_off(dev); if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { virtnet_config_changed_work(&vi->config_work); } else { vi->status = VIRTIO_NET_S_LINK_UP; virtnet_update_settings(vi); netif_carrier_on(dev); } for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) if (virtio_has_feature(vi->vdev, guest_offloads[i])) set_bit(guest_offloads[i], &vi->guest_offloads); vi->guest_offloads_capable = vi->guest_offloads; rtnl_unlock(); err = virtnet_cpu_notif_add(vi); if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); goto free_unregister_netdev; } pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", dev->name, max_queue_pairs); return 0; free_unregister_netdev: unregister_netdev(dev); free_failover: net_failover_destroy(vi->failover); free_vqs: virtio_reset_device(vdev); cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); virtnet_del_vqs(vi); free: free_netdev(dev); return err; } static void remove_vq_common(struct virtnet_info *vi) { virtio_reset_device(vi->vdev); /* Free unused buffers in both send and recv, if any. */ free_unused_bufs(vi); free_receive_bufs(vi); free_receive_page_frags(vi); virtnet_del_vqs(vi); } static void virtnet_remove(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; virtnet_cpu_notif_remove(vi); /* Make sure no work handler is accessing the device. */ flush_work(&vi->config_work); disable_rx_mode_work(vi); flush_work(&vi->rx_mode_work); virtnet_free_irq_moder(vi); unregister_netdev(vi->dev); net_failover_destroy(vi->failover); remove_vq_common(vi); free_netdev(vi->dev); } static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; virtnet_cpu_notif_remove(vi); virtnet_freeze_down(vdev); remove_vq_common(vi); return 0; } static __maybe_unused int virtnet_restore(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; int err; err = virtnet_restore_up(vdev); if (err) return err; virtnet_set_queues(vi, vi->curr_queue_pairs); err = virtnet_cpu_notif_add(vi); if (err) { virtnet_freeze_down(vdev); remove_vq_common(vi); return err; } return 0; } static struct virtio_device_id id_table[] = { { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, { 0 }, }; #define VIRTNET_FEATURES \ VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ VIRTIO_NET_F_MAC, \ VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ VIRTIO_NET_F_VQ_NOTF_COAL, \ VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS static unsigned int features[] = { VIRTNET_FEATURES, }; static unsigned int features_legacy[] = { VIRTNET_FEATURES, VIRTIO_NET_F_GSO, VIRTIO_F_ANY_LAYOUT, }; static struct virtio_driver virtio_net_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), .driver.name = KBUILD_MODNAME, .id_table = id_table, .validate = virtnet_validate, .probe = virtnet_probe, .remove = virtnet_remove, .config_changed = virtnet_config_changed, #ifdef CONFIG_PM_SLEEP .freeze = virtnet_freeze, .restore = virtnet_restore, #endif }; static __init int virtio_net_driver_init(void) { int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", virtnet_cpu_online, virtnet_cpu_down_prep); if (ret < 0) goto out; virtionet_online = ret; ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", NULL, virtnet_cpu_dead); if (ret) goto err_dead; ret = register_virtio_driver(&virtio_net_driver); if (ret) goto err_virtio; return 0; err_virtio: cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); err_dead: cpuhp_remove_multi_state(virtionet_online); out: return ret; } module_init(virtio_net_driver_init); static __exit void virtio_net_driver_exit(void) { unregister_virtio_driver(&virtio_net_driver); cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); cpuhp_remove_multi_state(virtionet_online); } module_exit(virtio_net_driver_exit); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_DESCRIPTION("Virtio network driver"); MODULE_LICENSE("GPL");
5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 // SPDX-License-Identifier: GPL-2.0-only /* * PS/2 mouse driver * * Copyright (c) 1999-2002 Vojtech Pavlik * Copyright (c) 2003-2004 Dmitry Torokhov */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define psmouse_fmt(fmt) fmt #include <linux/bitops.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/input.h> #include <linux/serio.h> #include <linux/init.h> #include <linux/libps2.h> #include <linux/mutex.h> #include <linux/types.h> #include "psmouse.h" #include "synaptics.h" #include "logips2pp.h" #include "alps.h" #include "hgpk.h" #include "lifebook.h" #include "trackpoint.h" #include "touchkit_ps2.h" #include "elantech.h" #include "sentelic.h" #include "cypress_ps2.h" #include "focaltech.h" #include "vmmouse.h" #include "byd.h" #define DRIVER_DESC "PS/2 mouse driver" MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>"); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); static unsigned int psmouse_max_proto = PSMOUSE_AUTO; static int psmouse_set_maxproto(const char *val, const struct kernel_param *); static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp); static const struct kernel_param_ops param_ops_proto_abbrev = { .set = psmouse_set_maxproto, .get = psmouse_get_maxproto, }; #define param_check_proto_abbrev(name, p) __param_check(name, p, unsigned int) module_param_named(proto, psmouse_max_proto, proto_abbrev, 0644); MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, any). Useful for KVM switches."); static unsigned int psmouse_resolution = 200; module_param_named(resolution, psmouse_resolution, uint, 0644); MODULE_PARM_DESC(resolution, "Resolution, in dpi."); static unsigned int psmouse_rate = 100; module_param_named(rate, psmouse_rate, uint, 0644); MODULE_PARM_DESC(rate, "Report rate, in reports per second."); static bool psmouse_smartscroll = true; module_param_named(smartscroll, psmouse_smartscroll, bool, 0644); MODULE_PARM_DESC(smartscroll, "Logitech Smartscroll autorepeat, 1 = enabled (default), 0 = disabled."); static bool psmouse_a4tech_2wheels; module_param_named(a4tech_workaround, psmouse_a4tech_2wheels, bool, 0644); MODULE_PARM_DESC(a4tech_workaround, "A4Tech second scroll wheel workaround, 1 = enabled, 0 = disabled (default)."); static unsigned int psmouse_resetafter = 5; module_param_named(resetafter, psmouse_resetafter, uint, 0644); MODULE_PARM_DESC(resetafter, "Reset device after so many bad packets (0 = never)."); static unsigned int psmouse_resync_time; module_param_named(resync_time, psmouse_resync_time, uint, 0644); MODULE_PARM_DESC(resync_time, "How long can mouse stay idle before forcing resync (in seconds, 0 = never)."); PSMOUSE_DEFINE_ATTR(protocol, S_IWUSR | S_IRUGO, NULL, psmouse_attr_show_protocol, psmouse_attr_set_protocol); PSMOUSE_DEFINE_ATTR(rate, S_IWUSR | S_IRUGO, (void *) offsetof(struct psmouse, rate), psmouse_show_int_attr, psmouse_attr_set_rate); PSMOUSE_DEFINE_ATTR(resolution, S_IWUSR | S_IRUGO, (void *) offsetof(struct psmouse, resolution), psmouse_show_int_attr, psmouse_attr_set_resolution); PSMOUSE_DEFINE_ATTR(resetafter, S_IWUSR | S_IRUGO, (void *) offsetof(struct psmouse, resetafter), psmouse_show_int_attr, psmouse_set_int_attr); PSMOUSE_DEFINE_ATTR(resync_time, S_IWUSR | S_IRUGO, (void *) offsetof(struct psmouse, resync_time), psmouse_show_int_attr, psmouse_set_int_attr); static struct attribute *psmouse_dev_attrs[] = { &psmouse_attr_protocol.dattr.attr, &psmouse_attr_rate.dattr.attr, &psmouse_attr_resolution.dattr.attr, &psmouse_attr_resetafter.dattr.attr, &psmouse_attr_resync_time.dattr.attr, NULL }; ATTRIBUTE_GROUPS(psmouse_dev); /* * psmouse_mutex protects all operations changing state of mouse * (connecting, disconnecting, changing rate or resolution via * sysfs). We could use a per-device semaphore but since there * rarely more than one PS/2 mouse connected and since semaphore * is taken in "slow" paths it is not worth it. */ static DEFINE_MUTEX(psmouse_mutex); static struct workqueue_struct *kpsmoused_wq; struct psmouse *psmouse_from_serio(struct serio *serio) { struct ps2dev *ps2dev = serio_get_drvdata(serio); return container_of(ps2dev, struct psmouse, ps2dev); } void psmouse_report_standard_buttons(struct input_dev *dev, u8 buttons) { input_report_key(dev, BTN_LEFT, buttons & BIT(0)); input_report_key(dev, BTN_MIDDLE, buttons & BIT(2)); input_report_key(dev, BTN_RIGHT, buttons & BIT(1)); } void psmouse_report_standard_motion(struct input_dev *dev, u8 *packet) { int x, y; x = packet[1] ? packet[1] - ((packet[0] << 4) & 0x100) : 0; y = packet[2] ? packet[2] - ((packet[0] << 3) & 0x100) : 0; input_report_rel(dev, REL_X, x); input_report_rel(dev, REL_Y, -y); } void psmouse_report_standard_packet(struct input_dev *dev, u8 *packet) { psmouse_report_standard_buttons(dev, packet[0]); psmouse_report_standard_motion(dev, packet); } /* * psmouse_process_byte() analyzes the PS/2 data stream and reports * relevant events to the input module once full packet has arrived. */ psmouse_ret_t psmouse_process_byte(struct psmouse *psmouse) { struct input_dev *dev = psmouse->dev; u8 *packet = psmouse->packet; int wheel; if (psmouse->pktcnt < psmouse->pktsize) return PSMOUSE_GOOD_DATA; /* Full packet accumulated, process it */ switch (psmouse->protocol->type) { case PSMOUSE_IMPS: /* IntelliMouse has scroll wheel */ input_report_rel(dev, REL_WHEEL, -(s8) packet[3]); break; case PSMOUSE_IMEX: /* Scroll wheel and buttons on IntelliMouse Explorer */ switch (packet[3] & 0xC0) { case 0x80: /* vertical scroll on IntelliMouse Explorer 4.0 */ input_report_rel(dev, REL_WHEEL, -sign_extend32(packet[3], 5)); break; case 0x40: /* horizontal scroll on IntelliMouse Explorer 4.0 */ input_report_rel(dev, REL_HWHEEL, -sign_extend32(packet[3], 5)); break; case 0x00: case 0xC0: wheel = sign_extend32(packet[3], 3); /* * Some A4Tech mice have two scroll wheels, with first * one reporting +/-1 in the lower nibble, and second * one reporting +/-2. */ if (psmouse_a4tech_2wheels && abs(wheel) > 1) input_report_rel(dev, REL_HWHEEL, wheel / 2); else input_report_rel(dev, REL_WHEEL, -wheel); input_report_key(dev, BTN_SIDE, packet[3] & BIT(4)); input_report_key(dev, BTN_EXTRA, packet[3] & BIT(5)); break; } break; case PSMOUSE_GENPS: /* Report scroll buttons on NetMice */ input_report_rel(dev, REL_WHEEL, -(s8) packet[3]); /* Extra buttons on Genius NewNet 3D */ input_report_key(dev, BTN_SIDE, packet[0] & BIT(6)); input_report_key(dev, BTN_EXTRA, packet[0] & BIT(7)); break; case PSMOUSE_THINKPS: /* Extra button on ThinkingMouse */ input_report_key(dev, BTN_EXTRA, packet[0] & BIT(3)); /* * Without this bit of weirdness moving up gives wildly * high Y changes. */ packet[1] |= (packet[0] & 0x40) << 1; break; case PSMOUSE_CORTRON: /* * Cortron PS2 Trackball reports SIDE button in the * 4th bit of the first byte. */ input_report_key(dev, BTN_SIDE, packet[0] & BIT(3)); packet[0] |= BIT(3); break; default: break; } /* Generic PS/2 Mouse */ packet[0] |= psmouse->extra_buttons; psmouse_report_standard_packet(dev, packet); input_sync(dev); return PSMOUSE_FULL_PACKET; } void psmouse_queue_work(struct psmouse *psmouse, struct delayed_work *work, unsigned long delay) { queue_delayed_work(kpsmoused_wq, work, delay); } /* * __psmouse_set_state() sets new psmouse state and resets all flags. */ static inline void __psmouse_set_state(struct psmouse *psmouse, enum psmouse_state new_state) { psmouse->state = new_state; psmouse->pktcnt = psmouse->out_of_sync_cnt = 0; psmouse->ps2dev.flags = 0; psmouse->last = jiffies; } /* * psmouse_set_state() sets new psmouse state and resets all flags and * counters while holding serio lock so fighting with interrupt handler * is not a concern. */ void psmouse_set_state(struct psmouse *psmouse, enum psmouse_state new_state) { serio_pause_rx(psmouse->ps2dev.serio); __psmouse_set_state(psmouse, new_state); serio_continue_rx(psmouse->ps2dev.serio); } /* * psmouse_handle_byte() processes one byte of the input data stream * by calling corresponding protocol handler. */ static int psmouse_handle_byte(struct psmouse *psmouse) { psmouse_ret_t rc = psmouse->protocol_handler(psmouse); switch (rc) { case PSMOUSE_BAD_DATA: if (psmouse->state == PSMOUSE_ACTIVATED) { psmouse_warn(psmouse, "%s at %s lost sync at byte %d\n", psmouse->name, psmouse->phys, psmouse->pktcnt); if (++psmouse->out_of_sync_cnt == psmouse->resetafter) { __psmouse_set_state(psmouse, PSMOUSE_IGNORE); psmouse_notice(psmouse, "issuing reconnect request\n"); serio_reconnect(psmouse->ps2dev.serio); return -EIO; } } psmouse->pktcnt = 0; break; case PSMOUSE_FULL_PACKET: psmouse->pktcnt = 0; if (psmouse->out_of_sync_cnt) { psmouse->out_of_sync_cnt = 0; psmouse_notice(psmouse, "%s at %s - driver resynced.\n", psmouse->name, psmouse->phys); } break; case PSMOUSE_GOOD_DATA: break; } return 0; } static void psmouse_handle_oob_data(struct psmouse *psmouse, u8 data) { switch (psmouse->oob_data_type) { case PSMOUSE_OOB_NONE: psmouse->oob_data_type = data; break; case PSMOUSE_OOB_EXTRA_BTNS: psmouse_report_standard_buttons(psmouse->dev, data); input_sync(psmouse->dev); psmouse->extra_buttons = data; psmouse->oob_data_type = PSMOUSE_OOB_NONE; break; default: psmouse_warn(psmouse, "unknown OOB_DATA type: 0x%02x\n", psmouse->oob_data_type); psmouse->oob_data_type = PSMOUSE_OOB_NONE; break; } } static enum ps2_disposition psmouse_pre_receive_byte(struct ps2dev *ps2dev, u8 data, unsigned int flags) { struct psmouse *psmouse = container_of(ps2dev, struct psmouse, ps2dev); if (psmouse->state == PSMOUSE_IGNORE) return PS2_IGNORE; if (unlikely((flags & SERIO_TIMEOUT) || ((flags & SERIO_PARITY) && !psmouse->protocol->ignore_parity))) { if (psmouse->state == PSMOUSE_ACTIVATED) psmouse_warn(psmouse, "bad data from KBC -%s%s\n", flags & SERIO_TIMEOUT ? " timeout" : "", flags & SERIO_PARITY ? " bad parity" : ""); return PS2_ERROR; } if (flags & SERIO_OOB_DATA) { psmouse_handle_oob_data(psmouse, data); return PS2_IGNORE; } return PS2_PROCESS; } static void psmouse_receive_byte(struct ps2dev *ps2dev, u8 data) { struct psmouse *psmouse = container_of(ps2dev, struct psmouse, ps2dev); pm_wakeup_event(&ps2dev->serio->dev, 0); if (psmouse->state <= PSMOUSE_RESYNCING) return; if (psmouse->state == PSMOUSE_ACTIVATED && psmouse->pktcnt && time_after(jiffies, psmouse->last + HZ/2)) { psmouse_info(psmouse, "%s at %s lost synchronization, throwing %d bytes away.\n", psmouse->name, psmouse->phys, psmouse->pktcnt); psmouse->badbyte = psmouse->packet[0]; __psmouse_set_state(psmouse, PSMOUSE_RESYNCING); psmouse_queue_work(psmouse, &psmouse->resync_work, 0); return; } psmouse->packet[psmouse->pktcnt++] = data; /* Check if this is a new device announcement (0xAA 0x00) */ if (unlikely(psmouse->packet[0] == PSMOUSE_RET_BAT && psmouse->pktcnt <= 2)) { if (psmouse->pktcnt == 1) { psmouse->last = jiffies; return; } if (psmouse->packet[1] == PSMOUSE_RET_ID || (psmouse->protocol->type == PSMOUSE_HGPK && psmouse->packet[1] == PSMOUSE_RET_BAT)) { __psmouse_set_state(psmouse, PSMOUSE_IGNORE); serio_reconnect(ps2dev->serio); return; } /* Not a new device, try processing first byte normally */ psmouse->pktcnt = 1; if (psmouse_handle_byte(psmouse)) return; psmouse->packet[psmouse->pktcnt++] = data; } /* * See if we need to force resync because mouse was idle for * too long. */ if (psmouse->state == PSMOUSE_ACTIVATED && psmouse->pktcnt == 1 && psmouse->resync_time && time_after(jiffies, psmouse->last + psmouse->resync_time * HZ)) { psmouse->badbyte = psmouse->packet[0]; __psmouse_set_state(psmouse, PSMOUSE_RESYNCING); psmouse_queue_work(psmouse, &psmouse->resync_work, 0); return; } psmouse->last = jiffies; psmouse_handle_byte(psmouse); } /* * psmouse_reset() resets the mouse into power-on state. */ int psmouse_reset(struct psmouse *psmouse) { u8 param[2]; int error; error = ps2_command(&psmouse->ps2dev, param, PSMOUSE_CMD_RESET_BAT); if (error) return error; if (param[0] != PSMOUSE_RET_BAT && param[1] != PSMOUSE_RET_ID) return -EIO; return 0; } /* * Here we set the mouse resolution. */ void psmouse_set_resolution(struct psmouse *psmouse, unsigned int resolution) { static const u8 params[] = { 0, 1, 2, 2, 3 }; u8 p; if (resolution == 0 || resolution > 200) resolution = 200; p = params[resolution / 50]; ps2_command(&psmouse->ps2dev, &p, PSMOUSE_CMD_SETRES); psmouse->resolution = 25 << p; } /* * Here we set the mouse report rate. */ static void psmouse_set_rate(struct psmouse *psmouse, unsigned int rate) { static const u8 rates[] = { 200, 100, 80, 60, 40, 20, 10, 0 }; u8 r; int i = 0; while (rates[i] > rate) i++; r = rates[i]; ps2_command(&psmouse->ps2dev, &r, PSMOUSE_CMD_SETRATE); psmouse->rate = r; } /* * Here we set the mouse scaling. */ static void psmouse_set_scale(struct psmouse *psmouse, enum psmouse_scale scale) { ps2_command(&psmouse->ps2dev, NULL, scale == PSMOUSE_SCALE21 ? PSMOUSE_CMD_SETSCALE21 : PSMOUSE_CMD_SETSCALE11); } /* * psmouse_poll() - default poll handler. Everyone except for ALPS uses it. */ static int psmouse_poll(struct psmouse *psmouse) { return ps2_command(&psmouse->ps2dev, psmouse->packet, PSMOUSE_CMD_POLL | (psmouse->pktsize << 8)); } static bool psmouse_check_pnp_id(const char *id, const char * const ids[]) { int i; for (i = 0; ids[i]; i++) if (!strcasecmp(id, ids[i])) return true; return false; } /* * psmouse_matches_pnp_id - check if psmouse matches one of the passed in ids. */ bool psmouse_matches_pnp_id(struct psmouse *psmouse, const char * const ids[]) { struct serio *serio = psmouse->ps2dev.serio; char *p, *fw_id_copy, *save_ptr; bool found = false; if (strncmp(serio->firmware_id, "PNP: ", 5)) return false; fw_id_copy = kstrndup(&serio->firmware_id[5], sizeof(serio->firmware_id) - 5, GFP_KERNEL); if (!fw_id_copy) return false; save_ptr = fw_id_copy; while ((p = strsep(&fw_id_copy, " ")) != NULL) { if (psmouse_check_pnp_id(p, ids)) { found = true; break; } } kfree(save_ptr); return found; } /* * Genius NetMouse magic init. */ static int genius_detect(struct psmouse *psmouse, bool set_properties) { struct ps2dev *ps2dev = &psmouse->ps2dev; u8 param[4]; param[0] = 3; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSCALE11); ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSCALE11); ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSCALE11); ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO); if (param[0] != 0x00 || param[1] != 0x33 || param[2] != 0x55) return -ENODEV; if (set_properties) { __set_bit(BTN_MIDDLE, psmouse->dev->keybit); __set_bit(BTN_EXTRA, psmouse->dev->keybit); __set_bit(BTN_SIDE, psmouse->dev->keybit); __set_bit(REL_WHEEL, psmouse->dev->relbit); psmouse->vendor = "Genius"; psmouse->name = "Mouse"; psmouse->pktsize = 4; } return 0; } /* * IntelliMouse magic init. */ static int intellimouse_detect(struct psmouse *psmouse, bool set_properties) { struct ps2dev *ps2dev = &psmouse->ps2dev; u8 param[2]; param[0] = 200; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); param[0] = 100; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); param[0] = 80; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); ps2_command(ps2dev, param, PSMOUSE_CMD_GETID); if (param[0] != 3) return -ENODEV; if (set_properties) { __set_bit(BTN_MIDDLE, psmouse->dev->keybit); __set_bit(REL_WHEEL, psmouse->dev->relbit); if (!psmouse->vendor) psmouse->vendor = "Generic"; if (!psmouse->name) psmouse->name = "Wheel Mouse"; psmouse->pktsize = 4; } return 0; } /* * Try IntelliMouse/Explorer magic init. */ static int im_explorer_detect(struct psmouse *psmouse, bool set_properties) { struct ps2dev *ps2dev = &psmouse->ps2dev; u8 param[2]; intellimouse_detect(psmouse, 0); param[0] = 200; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); param[0] = 200; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); param[0] = 80; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); ps2_command(ps2dev, param, PSMOUSE_CMD_GETID); if (param[0] != 4) return -ENODEV; /* Magic to enable horizontal scrolling on IntelliMouse 4.0 */ param[0] = 200; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); param[0] = 80; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); param[0] = 40; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); if (set_properties) { __set_bit(BTN_MIDDLE, psmouse->dev->keybit); __set_bit(REL_WHEEL, psmouse->dev->relbit); __set_bit(REL_HWHEEL, psmouse->dev->relbit); __set_bit(BTN_SIDE, psmouse->dev->keybit); __set_bit(BTN_EXTRA, psmouse->dev->keybit); if (!psmouse->vendor) psmouse->vendor = "Generic"; if (!psmouse->name) psmouse->name = "Explorer Mouse"; psmouse->pktsize = 4; } return 0; } /* * Kensington ThinkingMouse / ExpertMouse magic init. */ static int thinking_detect(struct psmouse *psmouse, bool set_properties) { struct ps2dev *ps2dev = &psmouse->ps2dev; u8 param[2]; static const u8 seq[] = { 20, 60, 40, 20, 20, 60, 40, 20, 20 }; int i; param[0] = 10; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); param[0] = 0; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); for (i = 0; i < ARRAY_SIZE(seq); i++) { param[0] = seq[i]; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); } ps2_command(ps2dev, param, PSMOUSE_CMD_GETID); if (param[0] != 2) return -ENODEV; if (set_properties) { __set_bit(BTN_MIDDLE, psmouse->dev->keybit); __set_bit(BTN_EXTRA, psmouse->dev->keybit); psmouse->vendor = "Kensington"; psmouse->name = "ThinkingMouse"; } return 0; } /* * Bare PS/2 protocol "detection". Always succeeds. */ static int ps2bare_detect(struct psmouse *psmouse, bool set_properties) { if (set_properties) { if (!psmouse->vendor) psmouse->vendor = "Generic"; if (!psmouse->name) psmouse->name = "Mouse"; /* * We have no way of figuring true number of buttons so let's * assume that the device has 3. */ input_set_capability(psmouse->dev, EV_KEY, BTN_MIDDLE); } return 0; } /* * Cortron PS/2 protocol detection. There's no special way to detect it, so it * must be forced by sysfs protocol writing. */ static int cortron_detect(struct psmouse *psmouse, bool set_properties) { if (set_properties) { psmouse->vendor = "Cortron"; psmouse->name = "PS/2 Trackball"; __set_bit(BTN_MIDDLE, psmouse->dev->keybit); __set_bit(BTN_SIDE, psmouse->dev->keybit); } return 0; } static const struct psmouse_protocol psmouse_protocols[] = { { .type = PSMOUSE_PS2, .name = "PS/2", .alias = "bare", .maxproto = true, .ignore_parity = true, .detect = ps2bare_detect, .try_passthru = true, }, #ifdef CONFIG_MOUSE_PS2_LOGIPS2PP { .type = PSMOUSE_PS2PP, .name = "PS2++", .alias = "logitech", .detect = ps2pp_detect, }, #endif { .type = PSMOUSE_THINKPS, .name = "ThinkPS/2", .alias = "thinkps", .detect = thinking_detect, }, #ifdef CONFIG_MOUSE_PS2_CYPRESS { .type = PSMOUSE_CYPRESS, .name = "CyPS/2", .alias = "cypress", .detect = cypress_detect, .init = cypress_init, }, #endif { .type = PSMOUSE_GENPS, .name = "GenPS/2", .alias = "genius", .detect = genius_detect, }, { .type = PSMOUSE_IMPS, .name = "ImPS/2", .alias = "imps", .maxproto = true, .ignore_parity = true, .detect = intellimouse_detect, .try_passthru = true, }, { .type = PSMOUSE_IMEX, .name = "ImExPS/2", .alias = "exps", .maxproto = true, .ignore_parity = true, .detect = im_explorer_detect, .try_passthru = true, }, #ifdef CONFIG_MOUSE_PS2_SYNAPTICS { .type = PSMOUSE_SYNAPTICS, .name = "SynPS/2", .alias = "synaptics", .detect = synaptics_detect, .init = synaptics_init_absolute, }, { .type = PSMOUSE_SYNAPTICS_RELATIVE, .name = "SynRelPS/2", .alias = "synaptics-relative", .detect = synaptics_detect, .init = synaptics_init_relative, }, #endif #ifdef CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS { .type = PSMOUSE_SYNAPTICS_SMBUS, .name = "SynSMBus", .alias = "synaptics-smbus", .detect = synaptics_detect, .init = synaptics_init_smbus, .smbus_companion = true, }, #endif #ifdef CONFIG_MOUSE_PS2_ALPS { .type = PSMOUSE_ALPS, .name = "AlpsPS/2", .alias = "alps", .detect = alps_detect, .init = alps_init, }, #endif #ifdef CONFIG_MOUSE_PS2_LIFEBOOK { .type = PSMOUSE_LIFEBOOK, .name = "LBPS/2", .alias = "lifebook", .detect = lifebook_detect, .init = lifebook_init, }, #endif #ifdef CONFIG_MOUSE_PS2_TRACKPOINT { .type = PSMOUSE_TRACKPOINT, .name = "TPPS/2", .alias = "trackpoint", .detect = trackpoint_detect, .try_passthru = true, }, #endif #ifdef CONFIG_MOUSE_PS2_TOUCHKIT { .type = PSMOUSE_TOUCHKIT_PS2, .name = "touchkitPS/2", .alias = "touchkit", .detect = touchkit_ps2_detect, }, #endif #ifdef CONFIG_MOUSE_PS2_OLPC { .type = PSMOUSE_HGPK, .name = "OLPC HGPK", .alias = "hgpk", .detect = hgpk_detect, }, #endif #ifdef CONFIG_MOUSE_PS2_ELANTECH { .type = PSMOUSE_ELANTECH, .name = "ETPS/2", .alias = "elantech", .detect = elantech_detect, .init = elantech_init_ps2, }, #endif #ifdef CONFIG_MOUSE_PS2_ELANTECH_SMBUS { .type = PSMOUSE_ELANTECH_SMBUS, .name = "ETSMBus", .alias = "elantech-smbus", .detect = elantech_detect, .init = elantech_init_smbus, .smbus_companion = true, }, #endif #ifdef CONFIG_MOUSE_PS2_SENTELIC { .type = PSMOUSE_FSP, .name = "FSPPS/2", .alias = "fsp", .detect = fsp_detect, .init = fsp_init, }, #endif { .type = PSMOUSE_CORTRON, .name = "CortronPS/2", .alias = "cortps", .detect = cortron_detect, }, #ifdef CONFIG_MOUSE_PS2_FOCALTECH { .type = PSMOUSE_FOCALTECH, .name = "FocalTechPS/2", .alias = "focaltech", .detect = focaltech_detect, .init = focaltech_init, }, #endif #ifdef CONFIG_MOUSE_PS2_VMMOUSE { .type = PSMOUSE_VMMOUSE, .name = VMMOUSE_PSNAME, .alias = "vmmouse", .detect = vmmouse_detect, .init = vmmouse_init, }, #endif #ifdef CONFIG_MOUSE_PS2_BYD { .type = PSMOUSE_BYD, .name = "BYDPS/2", .alias = "byd", .detect = byd_detect, .init = byd_init, }, #endif { .type = PSMOUSE_AUTO, .name = "auto", .alias = "any", .maxproto = true, }, }; static const struct psmouse_protocol *__psmouse_protocol_by_type(enum psmouse_type type) { int i; for (i = 0; i < ARRAY_SIZE(psmouse_protocols); i++) if (psmouse_protocols[i].type == type) return &psmouse_protocols[i]; return NULL; } static const struct psmouse_protocol *psmouse_protocol_by_type(enum psmouse_type type) { const struct psmouse_protocol *proto; proto = __psmouse_protocol_by_type(type); if (proto) return proto; WARN_ON(1); return &psmouse_protocols[0]; } static const struct psmouse_protocol *psmouse_protocol_by_name(const char *name, size_t len) { const struct psmouse_protocol *p; int i; for (i = 0; i < ARRAY_SIZE(psmouse_protocols); i++) { p = &psmouse_protocols[i]; if ((strlen(p->name) == len && !strncmp(p->name, name, len)) || (strlen(p->alias) == len && !strncmp(p->alias, name, len))) return &psmouse_protocols[i]; } return NULL; } /* * Apply default settings to the psmouse structure. Most of them will * be overridden by individual protocol initialization routines. */ static void psmouse_apply_defaults(struct psmouse *psmouse) { struct input_dev *input_dev = psmouse->dev; bitmap_zero(input_dev->evbit, EV_CNT); bitmap_zero(input_dev->keybit, KEY_CNT); bitmap_zero(input_dev->relbit, REL_CNT); bitmap_zero(input_dev->absbit, ABS_CNT); bitmap_zero(input_dev->mscbit, MSC_CNT); input_set_capability(input_dev, EV_KEY, BTN_LEFT); input_set_capability(input_dev, EV_KEY, BTN_RIGHT); input_set_capability(input_dev, EV_REL, REL_X); input_set_capability(input_dev, EV_REL, REL_Y); __set_bit(INPUT_PROP_POINTER, input_dev->propbit); psmouse->protocol = &psmouse_protocols[0]; psmouse->set_rate = psmouse_set_rate; psmouse->set_resolution = psmouse_set_resolution; psmouse->set_scale = psmouse_set_scale; psmouse->poll = psmouse_poll; psmouse->protocol_handler = psmouse_process_byte; psmouse->pktsize = 3; psmouse->reconnect = NULL; psmouse->fast_reconnect = NULL; psmouse->disconnect = NULL; psmouse->cleanup = NULL; psmouse->pt_activate = NULL; psmouse->pt_deactivate = NULL; } static bool psmouse_do_detect(int (*detect)(struct psmouse *, bool), struct psmouse *psmouse, bool allow_passthrough, bool set_properties) { if (psmouse->ps2dev.serio->id.type == SERIO_PS_PSTHRU && !allow_passthrough) { return false; } if (set_properties) psmouse_apply_defaults(psmouse); return detect(psmouse, set_properties) == 0; } static bool psmouse_try_protocol(struct psmouse *psmouse, enum psmouse_type type, unsigned int *max_proto, bool set_properties, bool init_allowed) { const struct psmouse_protocol *proto; proto = __psmouse_protocol_by_type(type); if (!proto) return false; if (!psmouse_do_detect(proto->detect, psmouse, proto->try_passthru, set_properties)) return false; if (set_properties && proto->init && init_allowed) { if (proto->init(psmouse) != 0) { /* * We detected device, but init failed. Adjust * max_proto so we only try standard protocols. */ if (*max_proto > PSMOUSE_IMEX) *max_proto = PSMOUSE_IMEX; return false; } } return true; } /* * psmouse_extensions() probes for any extensions to the basic PS/2 protocol * the mouse may have. */ static int psmouse_extensions(struct psmouse *psmouse, unsigned int max_proto, bool set_properties) { bool synaptics_hardware = false; int ret; /* * Always check for focaltech, this is safe as it uses pnp-id * matching. */ if (psmouse_do_detect(focaltech_detect, psmouse, false, set_properties)) { if (max_proto > PSMOUSE_IMEX && IS_ENABLED(CONFIG_MOUSE_PS2_FOCALTECH) && (!set_properties || focaltech_init(psmouse) == 0)) { return PSMOUSE_FOCALTECH; } /* * Restrict psmouse_max_proto so that psmouse_initialize() * does not try to reset rate and resolution, because even * that upsets the device. * This also causes us to basically fall through to basic * protocol detection, where we fully reset the mouse, * and set it up as bare PS/2 protocol device. */ psmouse_max_proto = max_proto = PSMOUSE_PS2; } /* * We always check for LifeBook because it does not disturb mouse * (it only checks DMI information). */ if (psmouse_try_protocol(psmouse, PSMOUSE_LIFEBOOK, &max_proto, set_properties, max_proto > PSMOUSE_IMEX)) return PSMOUSE_LIFEBOOK; if (psmouse_try_protocol(psmouse, PSMOUSE_VMMOUSE, &max_proto, set_properties, max_proto > PSMOUSE_IMEX)) return PSMOUSE_VMMOUSE; /* * Try Kensington ThinkingMouse (we try first, because Synaptics * probe upsets the ThinkingMouse). */ if (max_proto > PSMOUSE_IMEX && psmouse_try_protocol(psmouse, PSMOUSE_THINKPS, &max_proto, set_properties, true)) { return PSMOUSE_THINKPS; } /* * Try Synaptics TouchPad. Note that probing is done even if * Synaptics protocol support is disabled in config - we need to * know if it is Synaptics so we can reset it properly after * probing for IntelliMouse. */ if (max_proto > PSMOUSE_PS2 && psmouse_do_detect(synaptics_detect, psmouse, false, set_properties)) { synaptics_hardware = true; if (max_proto > PSMOUSE_IMEX) { /* * Try activating protocol, but check if support is * enabled first, since we try detecting Synaptics * even when protocol is disabled. */ if (IS_ENABLED(CONFIG_MOUSE_PS2_SYNAPTICS) || IS_ENABLED(CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS)) { if (!set_properties) return PSMOUSE_SYNAPTICS; ret = synaptics_init(psmouse); if (ret >= 0) return ret; } /* * Some Synaptics touchpads can emulate extended * protocols (like IMPS/2). Unfortunately * Logitech/Genius probes confuse some firmware * versions so we'll have to skip them. */ max_proto = PSMOUSE_IMEX; } /* * Make sure that touchpad is in relative mode, gestures * (taps) are enabled. */ synaptics_reset(psmouse); } /* * Try Cypress Trackpad. We must try it before Finger Sensing Pad * because Finger Sensing Pad probe upsets some modules of Cypress * Trackpads. */ if (max_proto > PSMOUSE_IMEX && psmouse_try_protocol(psmouse, PSMOUSE_CYPRESS, &max_proto, set_properties, true)) { return PSMOUSE_CYPRESS; } /* Try ALPS TouchPad */ if (max_proto > PSMOUSE_IMEX) { ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); if (psmouse_try_protocol(psmouse, PSMOUSE_ALPS, &max_proto, set_properties, true)) return PSMOUSE_ALPS; } /* Try OLPC HGPK touchpad */ if (max_proto > PSMOUSE_IMEX && psmouse_try_protocol(psmouse, PSMOUSE_HGPK, &max_proto, set_properties, true)) { return PSMOUSE_HGPK; } /* Try Elantech touchpad */ if (max_proto > PSMOUSE_IMEX && psmouse_try_protocol(psmouse, PSMOUSE_ELANTECH, &max_proto, set_properties, false)) { if (!set_properties) return PSMOUSE_ELANTECH; ret = elantech_init(psmouse); if (ret >= 0) return ret; } if (max_proto > PSMOUSE_IMEX) { if (psmouse_try_protocol(psmouse, PSMOUSE_GENPS, &max_proto, set_properties, true)) return PSMOUSE_GENPS; if (psmouse_try_protocol(psmouse, PSMOUSE_PS2PP, &max_proto, set_properties, true)) return PSMOUSE_PS2PP; if (psmouse_try_protocol(psmouse, PSMOUSE_TRACKPOINT, &max_proto, set_properties, true)) return PSMOUSE_TRACKPOINT; if (psmouse_try_protocol(psmouse, PSMOUSE_TOUCHKIT_PS2, &max_proto, set_properties, true)) return PSMOUSE_TOUCHKIT_PS2; } /* * Try Finger Sensing Pad. We do it here because its probe upsets * Trackpoint devices (causing TP_READ_ID command to time out). */ if (max_proto > PSMOUSE_IMEX && psmouse_try_protocol(psmouse, PSMOUSE_FSP, &max_proto, set_properties, true)) { return PSMOUSE_FSP; } /* * Reset to defaults in case the device got confused by extended * protocol probes. Note that we follow up with full reset because * some mice put themselves to sleep when they see PSMOUSE_RESET_DIS. */ ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); psmouse_reset(psmouse); if (max_proto >= PSMOUSE_IMEX && psmouse_try_protocol(psmouse, PSMOUSE_IMEX, &max_proto, set_properties, true)) { return PSMOUSE_IMEX; } if (max_proto >= PSMOUSE_IMPS && psmouse_try_protocol(psmouse, PSMOUSE_IMPS, &max_proto, set_properties, true)) { return PSMOUSE_IMPS; } /* * Okay, all failed, we have a standard mouse here. The number of * the buttons is still a question, though. We assume 3. */ psmouse_try_protocol(psmouse, PSMOUSE_PS2, &max_proto, set_properties, true); if (synaptics_hardware) { /* * We detected Synaptics hardware but it did not respond to * IMPS/2 probes. We need to reset the touchpad because if * there is a track point on the pass through port it could * get disabled while probing for protocol extensions. */ psmouse_reset(psmouse); } return PSMOUSE_PS2; } /* * psmouse_probe() probes for a PS/2 mouse. */ static int psmouse_probe(struct psmouse *psmouse) { struct ps2dev *ps2dev = &psmouse->ps2dev; u8 param[2]; int error; /* * First, we check if it's a mouse. It should send 0x00 or 0x03 in * case of an IntelliMouse in 4-byte mode or 0x04 for IM Explorer. * Sunrex K8561 IR Keyboard/Mouse reports 0xff on second and * subsequent ID queries, probably due to a firmware bug. */ param[0] = 0xa5; error = ps2_command(ps2dev, param, PSMOUSE_CMD_GETID); if (error) return error; if (param[0] != 0x00 && param[0] != 0x03 && param[0] != 0x04 && param[0] != 0xff) return -ENODEV; /* * Then we reset and disable the mouse so that it doesn't generate * events. */ error = ps2_command(ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); if (error) psmouse_warn(psmouse, "Failed to reset mouse on %s: %d\n", ps2dev->serio->phys, error); return 0; } /* * psmouse_initialize() initializes the mouse to a sane state. */ static void psmouse_initialize(struct psmouse *psmouse) { /* * We set the mouse report rate, resolution and scaling. */ if (psmouse_max_proto != PSMOUSE_PS2) { psmouse->set_rate(psmouse, psmouse->rate); psmouse->set_resolution(psmouse, psmouse->resolution); psmouse->set_scale(psmouse, PSMOUSE_SCALE11); } } /* * psmouse_activate() enables the mouse so that we get motion reports from it. */ int psmouse_activate(struct psmouse *psmouse) { if (ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_ENABLE)) { psmouse_warn(psmouse, "Failed to enable mouse on %s\n", psmouse->ps2dev.serio->phys); return -1; } psmouse_set_state(psmouse, PSMOUSE_ACTIVATED); return 0; } /* * psmouse_deactivate() puts the mouse into poll mode so that we don't get * motion reports from it unless we explicitly request it. */ int psmouse_deactivate(struct psmouse *psmouse) { int error; error = ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_DISABLE); if (error) { psmouse_warn(psmouse, "Failed to deactivate mouse on %s: %d\n", psmouse->ps2dev.serio->phys, error); return error; } psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); return 0; } /* * psmouse_resync() attempts to re-validate current protocol. */ static void psmouse_resync(struct work_struct *work) { struct psmouse *parent = NULL, *psmouse = container_of(work, struct psmouse, resync_work.work); struct serio *serio = psmouse->ps2dev.serio; psmouse_ret_t rc = PSMOUSE_GOOD_DATA; bool failed = false, enabled = false; int i; mutex_lock(&psmouse_mutex); if (psmouse->state != PSMOUSE_RESYNCING) goto out; if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { parent = psmouse_from_serio(serio->parent); psmouse_deactivate(parent); } /* * Some mice don't ACK commands sent while they are in the middle of * transmitting motion packet. To avoid delay we use ps2_sendbyte() * instead of ps2_command() which would wait for 200ms for an ACK * that may never come. * As an additional quirk ALPS touchpads may not only forget to ACK * disable command but will stop reporting taps, so if we see that * mouse at least once ACKs disable we will do full reconnect if ACK * is missing. */ psmouse->num_resyncs++; if (ps2_sendbyte(&psmouse->ps2dev, PSMOUSE_CMD_DISABLE, 20)) { if (psmouse->num_resyncs < 3 || psmouse->acks_disable_command) failed = true; } else psmouse->acks_disable_command = true; /* * Poll the mouse. If it was reset the packet will be shorter than * psmouse->pktsize and ps2_command will fail. We do not expect and * do not handle scenario when mouse "upgrades" its protocol while * disconnected since it would require additional delay. If we ever * see a mouse that does it we'll adjust the code. */ if (!failed) { if (psmouse->poll(psmouse)) failed = true; else { psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); for (i = 0; i < psmouse->pktsize; i++) { psmouse->pktcnt++; rc = psmouse->protocol_handler(psmouse); if (rc != PSMOUSE_GOOD_DATA) break; } if (rc != PSMOUSE_FULL_PACKET) failed = true; psmouse_set_state(psmouse, PSMOUSE_RESYNCING); } } /* * Now try to enable mouse. We try to do that even if poll failed * and also repeat our attempts 5 times, otherwise we may be left * out with disabled mouse. */ for (i = 0; i < 5; i++) { if (!ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_ENABLE)) { enabled = true; break; } msleep(200); } if (!enabled) { psmouse_warn(psmouse, "failed to re-enable mouse on %s\n", psmouse->ps2dev.serio->phys); failed = true; } if (failed) { psmouse_set_state(psmouse, PSMOUSE_IGNORE); psmouse_info(psmouse, "resync failed, issuing reconnect request\n"); serio_reconnect(serio); } else psmouse_set_state(psmouse, PSMOUSE_ACTIVATED); if (parent) psmouse_activate(parent); out: mutex_unlock(&psmouse_mutex); } /* * psmouse_cleanup() resets the mouse into power-on state. */ static void psmouse_cleanup(struct serio *serio) { struct psmouse *psmouse = psmouse_from_serio(serio); struct psmouse *parent = NULL; mutex_lock(&psmouse_mutex); if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { parent = psmouse_from_serio(serio->parent); psmouse_deactivate(parent); } psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); /* * Disable stream mode so cleanup routine can proceed undisturbed. */ if (ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_DISABLE)) psmouse_warn(psmouse, "Failed to disable mouse on %s\n", psmouse->ps2dev.serio->phys); if (psmouse->cleanup) psmouse->cleanup(psmouse); /* * Reset the mouse to defaults (bare PS/2 protocol). */ ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); /* * Some boxes, such as HP nx7400, get terribly confused if mouse * is not fully enabled before suspending/shutting down. */ ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_ENABLE); if (parent) { if (parent->pt_deactivate) parent->pt_deactivate(parent); psmouse_activate(parent); } mutex_unlock(&psmouse_mutex); } /* * psmouse_disconnect() closes and frees. */ static void psmouse_disconnect(struct serio *serio) { struct psmouse *psmouse = psmouse_from_serio(serio); struct psmouse *parent = NULL; mutex_lock(&psmouse_mutex); psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); /* make sure we don't have a resync in progress */ mutex_unlock(&psmouse_mutex); flush_workqueue(kpsmoused_wq); mutex_lock(&psmouse_mutex); if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { parent = psmouse_from_serio(serio->parent); psmouse_deactivate(parent); } if (psmouse->disconnect) psmouse->disconnect(psmouse); if (parent && parent->pt_deactivate) parent->pt_deactivate(parent); psmouse_set_state(psmouse, PSMOUSE_IGNORE); serio_close(serio); serio_set_drvdata(serio, NULL); if (psmouse->dev) input_unregister_device(psmouse->dev); kfree(psmouse); if (parent) psmouse_activate(parent); mutex_unlock(&psmouse_mutex); } static int psmouse_switch_protocol(struct psmouse *psmouse, const struct psmouse_protocol *proto) { const struct psmouse_protocol *selected_proto; struct input_dev *input_dev = psmouse->dev; enum psmouse_type type; input_dev->dev.parent = &psmouse->ps2dev.serio->dev; if (proto && (proto->detect || proto->init)) { psmouse_apply_defaults(psmouse); if (proto->detect && proto->detect(psmouse, true) < 0) return -1; if (proto->init && proto->init(psmouse) < 0) return -1; selected_proto = proto; } else { type = psmouse_extensions(psmouse, psmouse_max_proto, true); selected_proto = psmouse_protocol_by_type(type); } psmouse->protocol = selected_proto; /* * If mouse's packet size is 3 there is no point in polling the * device in hopes to detect protocol reset - we won't get less * than 3 bytes response anyhow. */ if (psmouse->pktsize == 3) psmouse->resync_time = 0; /* * Some smart KVMs fake response to POLL command returning just * 3 bytes and messing up our resync logic, so if initial poll * fails we won't try polling the device anymore. Hopefully * such KVM will maintain initially selected protocol. */ if (psmouse->resync_time && psmouse->poll(psmouse)) psmouse->resync_time = 0; snprintf(psmouse->devname, sizeof(psmouse->devname), "%s %s %s", selected_proto->name, psmouse->vendor, psmouse->name); input_dev->name = psmouse->devname; input_dev->phys = psmouse->phys; input_dev->id.bustype = BUS_I8042; input_dev->id.vendor = 0x0002; input_dev->id.product = psmouse->protocol->type; input_dev->id.version = psmouse->model; return 0; } /* * psmouse_connect() is a callback from the serio module when * an unhandled serio port is found. */ static int psmouse_connect(struct serio *serio, struct serio_driver *drv) { struct psmouse *psmouse, *parent = NULL; struct input_dev *input_dev; int retval = 0, error = -ENOMEM; mutex_lock(&psmouse_mutex); /* * If this is a pass-through port deactivate parent so the device * connected to this port can be successfully identified */ if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { parent = psmouse_from_serio(serio->parent); psmouse_deactivate(parent); } psmouse = kzalloc(sizeof(*psmouse), GFP_KERNEL); input_dev = input_allocate_device(); if (!psmouse || !input_dev) goto err_free; ps2_init(&psmouse->ps2dev, serio, psmouse_pre_receive_byte, psmouse_receive_byte); INIT_DELAYED_WORK(&psmouse->resync_work, psmouse_resync); psmouse->dev = input_dev; snprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys); psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); error = serio_open(serio, drv); if (error) goto err_clear_drvdata; /* give PT device some time to settle down before probing */ if (serio->id.type == SERIO_PS_PSTHRU) usleep_range(10000, 15000); if (psmouse_probe(psmouse) < 0) { error = -ENODEV; goto err_close_serio; } psmouse->rate = psmouse_rate; psmouse->resolution = psmouse_resolution; psmouse->resetafter = psmouse_resetafter; psmouse->resync_time = parent ? 0 : psmouse_resync_time; psmouse->smartscroll = psmouse_smartscroll; psmouse_switch_protocol(psmouse, NULL); if (!psmouse->protocol->smbus_companion) { psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); psmouse_initialize(psmouse); error = input_register_device(input_dev); if (error) goto err_protocol_disconnect; } else { /* Smbus companion will be reporting events, not us. */ input_free_device(input_dev); psmouse->dev = input_dev = NULL; } if (parent && parent->pt_activate) parent->pt_activate(parent); /* * PS/2 devices having SMBus companions should stay disabled * on PS/2 side, in order to have SMBus part operable. */ if (!psmouse->protocol->smbus_companion) psmouse_activate(psmouse); out: /* If this is a pass-through port the parent needs to be re-activated */ if (parent) psmouse_activate(parent); mutex_unlock(&psmouse_mutex); return retval; err_protocol_disconnect: if (psmouse->disconnect) psmouse->disconnect(psmouse); psmouse_set_state(psmouse, PSMOUSE_IGNORE); err_close_serio: serio_close(serio); err_clear_drvdata: serio_set_drvdata(serio, NULL); err_free: input_free_device(input_dev); kfree(psmouse); retval = error; goto out; } static int __psmouse_reconnect(struct serio *serio, bool fast_reconnect) { struct psmouse *psmouse = psmouse_from_serio(serio); struct psmouse *parent = NULL; int (*reconnect_handler)(struct psmouse *); enum psmouse_type type; int rc = -1; mutex_lock(&psmouse_mutex); if (fast_reconnect) { reconnect_handler = psmouse->fast_reconnect; if (!reconnect_handler) { rc = -ENOENT; goto out_unlock; } } else { reconnect_handler = psmouse->reconnect; } if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { parent = psmouse_from_serio(serio->parent); psmouse_deactivate(parent); } psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); if (reconnect_handler) { if (reconnect_handler(psmouse)) goto out; } else { psmouse_reset(psmouse); if (psmouse_probe(psmouse) < 0) goto out; type = psmouse_extensions(psmouse, psmouse_max_proto, false); if (psmouse->protocol->type != type) goto out; } /* * OK, the device type (and capabilities) match the old one, * we can continue using it, complete initialization */ if (!psmouse->protocol->smbus_companion) { psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); psmouse_initialize(psmouse); } if (parent && parent->pt_activate) parent->pt_activate(parent); /* * PS/2 devices having SMBus companions should stay disabled * on PS/2 side, in order to have SMBus part operable. */ if (!psmouse->protocol->smbus_companion) psmouse_activate(psmouse); rc = 0; out: /* If this is a pass-through port the parent waits to be activated */ if (parent) psmouse_activate(parent); out_unlock: mutex_unlock(&psmouse_mutex); return rc; } static int psmouse_reconnect(struct serio *serio) { return __psmouse_reconnect(serio, false); } static int psmouse_fast_reconnect(struct serio *serio) { return __psmouse_reconnect(serio, true); } static struct serio_device_id psmouse_serio_ids[] = { { .type = SERIO_8042, .proto = SERIO_ANY, .id = SERIO_ANY, .extra = SERIO_ANY, }, { .type = SERIO_PS_PSTHRU, .proto = SERIO_ANY, .id = SERIO_ANY, .extra = SERIO_ANY, }, { 0 } }; MODULE_DEVICE_TABLE(serio, psmouse_serio_ids); static struct serio_driver psmouse_drv = { .driver = { .name = "psmouse", .dev_groups = psmouse_dev_groups, }, .description = DRIVER_DESC, .id_table = psmouse_serio_ids, .interrupt = ps2_interrupt, .connect = psmouse_connect, .reconnect = psmouse_reconnect, .fast_reconnect = psmouse_fast_reconnect, .disconnect = psmouse_disconnect, .cleanup = psmouse_cleanup, }; ssize_t psmouse_attr_show_helper(struct device *dev, struct device_attribute *devattr, char *buf) { struct serio *serio = to_serio_port(dev); struct psmouse_attribute *attr = to_psmouse_attr(devattr); struct psmouse *psmouse = psmouse_from_serio(serio); if (psmouse->protocol->smbus_companion && devattr != &psmouse_attr_protocol.dattr) return -ENOENT; return attr->show(psmouse, attr->data, buf); } ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *devattr, const char *buf, size_t count) { struct serio *serio = to_serio_port(dev); struct psmouse_attribute *attr = to_psmouse_attr(devattr); struct psmouse *psmouse, *parent = NULL; int retval; retval = mutex_lock_interruptible(&psmouse_mutex); if (retval) goto out; psmouse = psmouse_from_serio(serio); if (psmouse->protocol->smbus_companion && devattr != &psmouse_attr_protocol.dattr) { retval = -ENOENT; goto out_unlock; } if (attr->protect) { if (psmouse->state == PSMOUSE_IGNORE) { retval = -ENODEV; goto out_unlock; } if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { parent = psmouse_from_serio(serio->parent); psmouse_deactivate(parent); } if (!psmouse->protocol->smbus_companion) psmouse_deactivate(psmouse); } retval = attr->set(psmouse, attr->data, buf, count); if (attr->protect) { if (retval != -ENODEV && !psmouse->protocol->smbus_companion) psmouse_activate(psmouse); if (parent) psmouse_activate(parent); } out_unlock: mutex_unlock(&psmouse_mutex); out: return retval; } static ssize_t psmouse_show_int_attr(struct psmouse *psmouse, void *offset, char *buf) { unsigned int *field = (unsigned int *)((char *)psmouse + (size_t)offset); return sprintf(buf, "%u\n", *field); } static ssize_t psmouse_set_int_attr(struct psmouse *psmouse, void *offset, const char *buf, size_t count) { unsigned int *field = (unsigned int *)((char *)psmouse + (size_t)offset); unsigned int value; int err; err = kstrtouint(buf, 10, &value); if (err) return err; *field = value; return count; } static ssize_t psmouse_attr_show_protocol(struct psmouse *psmouse, void *data, char *buf) { return sprintf(buf, "%s\n", psmouse->protocol->name); } static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, void *data, const char *buf, size_t count) { struct serio *serio = psmouse->ps2dev.serio; struct psmouse *parent = NULL; struct input_dev *old_dev, *new_dev; const struct psmouse_protocol *proto, *old_proto; int error; int retry = 0; proto = psmouse_protocol_by_name(buf, count); if (!proto) return -EINVAL; if (psmouse->protocol == proto) return count; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; while (!list_empty(&serio->children)) { if (++retry > 3) { psmouse_warn(psmouse, "failed to destroy children ports, protocol change aborted.\n"); input_free_device(new_dev); return -EIO; } mutex_unlock(&psmouse_mutex); serio_unregister_child_port(serio); mutex_lock(&psmouse_mutex); if (serio->drv != &psmouse_drv) { input_free_device(new_dev); return -ENODEV; } if (psmouse->protocol == proto) { input_free_device(new_dev); return count; /* switched by other thread */ } } if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { parent = psmouse_from_serio(serio->parent); if (parent->pt_deactivate) parent->pt_deactivate(parent); } old_dev = psmouse->dev; old_proto = psmouse->protocol; if (psmouse->disconnect) psmouse->disconnect(psmouse); psmouse_set_state(psmouse, PSMOUSE_IGNORE); psmouse->dev = new_dev; psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); if (psmouse_switch_protocol(psmouse, proto) < 0) { psmouse_reset(psmouse); /* default to PSMOUSE_PS2 */ psmouse_switch_protocol(psmouse, &psmouse_protocols[0]); } psmouse_initialize(psmouse); psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); if (psmouse->protocol->smbus_companion) { input_free_device(psmouse->dev); psmouse->dev = NULL; } else { error = input_register_device(psmouse->dev); if (error) { if (psmouse->disconnect) psmouse->disconnect(psmouse); psmouse_set_state(psmouse, PSMOUSE_IGNORE); input_free_device(new_dev); psmouse->dev = old_dev; psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); psmouse_switch_protocol(psmouse, old_proto); psmouse_initialize(psmouse); psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); return error; } } if (old_dev) input_unregister_device(old_dev); if (parent && parent->pt_activate) parent->pt_activate(parent); return count; } static ssize_t psmouse_attr_set_rate(struct psmouse *psmouse, void *data, const char *buf, size_t count) { unsigned int value; int err; err = kstrtouint(buf, 10, &value); if (err) return err; psmouse->set_rate(psmouse, value); return count; } static ssize_t psmouse_attr_set_resolution(struct psmouse *psmouse, void *data, const char *buf, size_t count) { unsigned int value; int err; err = kstrtouint(buf, 10, &value); if (err) return err; psmouse->set_resolution(psmouse, value); return count; } static int psmouse_set_maxproto(const char *val, const struct kernel_param *kp) { const struct psmouse_protocol *proto; if (!val) return -EINVAL; proto = psmouse_protocol_by_name(val, strlen(val)); if (!proto || !proto->maxproto) return -EINVAL; *((unsigned int *)kp->arg) = proto->type; return 0; } static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp) { int type = *((unsigned int *)kp->arg); return sprintf(buffer, "%s\n", psmouse_protocol_by_type(type)->name); } static int __init psmouse_init(void) { int err; lifebook_module_init(); synaptics_module_init(); hgpk_module_init(); err = psmouse_smbus_module_init(); if (err) return err; kpsmoused_wq = alloc_ordered_workqueue("kpsmoused", 0); if (!kpsmoused_wq) { pr_err("failed to create kpsmoused workqueue\n"); err = -ENOMEM; goto err_smbus_exit; } err = serio_register_driver(&psmouse_drv); if (err) goto err_destroy_wq; return 0; err_destroy_wq: destroy_workqueue(kpsmoused_wq); err_smbus_exit: psmouse_smbus_module_exit(); return err; } static void __exit psmouse_exit(void) { serio_unregister_driver(&psmouse_drv); destroy_workqueue(kpsmoused_wq); psmouse_smbus_module_exit(); } module_init(psmouse_init); module_exit(psmouse_exit);
22 13 20 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 /* * linux/fs/nls/nls_cp864.c * * Charset cp864 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x066a, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x00b0, 0x00b7, 0x2219, 0x221a, 0x2592, 0x2500, 0x2502, 0x253c, 0x2524, 0x252c, 0x251c, 0x2534, 0x2510, 0x250c, 0x2514, 0x2518, /* 0x90*/ 0x03b2, 0x221e, 0x03c6, 0x00b1, 0x00bd, 0x00bc, 0x2248, 0x00ab, 0x00bb, 0xfef7, 0xfef8, 0x0000, 0x0000, 0xfefb, 0xfefc, 0x0000, /* 0xa0*/ 0x00a0, 0x00ad, 0xfe82, 0x00a3, 0x00a4, 0xfe84, 0x0000, 0x0000, 0xfe8e, 0xfe8f, 0xfe95, 0xfe99, 0x060c, 0xfe9d, 0xfea1, 0xfea5, /* 0xb0*/ 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669, 0xfed1, 0x061b, 0xfeb1, 0xfeb5, 0xfeb9, 0x061f, /* 0xc0*/ 0x00a2, 0xfe80, 0xfe81, 0xfe83, 0xfe85, 0xfeca, 0xfe8b, 0xfe8d, 0xfe91, 0xfe93, 0xfe97, 0xfe9b, 0xfe9f, 0xfea3, 0xfea7, 0xfea9, /* 0xd0*/ 0xfeab, 0xfead, 0xfeaf, 0xfeb3, 0xfeb7, 0xfebb, 0xfebf, 0xfec1, 0xfec5, 0xfecb, 0xfecf, 0x00a6, 0x00ac, 0x00f7, 0x00d7, 0xfec9, /* 0xe0*/ 0x0640, 0xfed3, 0xfed7, 0xfedb, 0xfedf, 0xfee3, 0xfee7, 0xfeeb, 0xfeed, 0xfeef, 0xfef3, 0xfebd, 0xfecc, 0xfece, 0xfecd, 0xfee1, /* 0xf0*/ 0xfe7d, 0x0651, 0xfee5, 0xfee9, 0xfeec, 0xfef0, 0xfef2, 0xfed0, 0xfed5, 0xfef5, 0xfef6, 0xfedd, 0xfed9, 0xfef1, 0x25a0, 0x0000, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x00, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0x00, 0xc0, 0xa3, 0xa4, 0x00, 0xdb, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x97, 0xdc, 0xa1, 0x00, 0x00, /* 0xa8-0xaf */ 0x80, 0x93, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x98, 0x95, 0x94, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xf0-0xf7 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x92, 0x00, /* 0xc0-0xc7 */ }; static const unsigned char page06[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xac, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0xbb, 0x00, 0x00, 0x00, 0xbf, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0xf1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0x60-0x67 */ 0xb8, 0xb9, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x82, 0x83, 0x00, 0x00, 0x00, 0x91, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x96, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ }; static const unsigned char page25[256] = { 0x85, 0x00, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x8d, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x8c, 0x00, 0x00, 0x00, 0x8e, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x8f, 0x00, 0x00, 0x00, 0x8a, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x8b, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char pagefe[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, /* 0x78-0x7f */ 0xc1, 0xc2, 0xa2, 0xc3, 0xa5, 0xc4, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc7, 0xa8, 0xa9, /* 0x88-0x8f */ 0x00, 0xc8, 0x00, 0xc9, 0x00, 0xaa, 0x00, 0xca, /* 0x90-0x97 */ 0x00, 0xab, 0x00, 0xcb, 0x00, 0xad, 0x00, 0xcc, /* 0x98-0x9f */ 0x00, 0xae, 0x00, 0xcd, 0x00, 0xaf, 0x00, 0xce, /* 0xa0-0xa7 */ 0x00, 0xcf, 0x00, 0xd0, 0x00, 0xd1, 0x00, 0xd2, /* 0xa8-0xaf */ 0x00, 0xbc, 0x00, 0xd3, 0x00, 0xbd, 0x00, 0xd4, /* 0xb0-0xb7 */ 0x00, 0xbe, 0x00, 0xd5, 0x00, 0xeb, 0x00, 0xd6, /* 0xb8-0xbf */ 0x00, 0xd7, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0xdf, 0xc5, 0xd9, 0xec, 0xee, 0xed, 0xda, /* 0xc8-0xcf */ 0xf7, 0xba, 0x00, 0xe1, 0x00, 0xf8, 0x00, 0xe2, /* 0xd0-0xd7 */ 0x00, 0xfc, 0x00, 0xe3, 0x00, 0xfb, 0x00, 0xe4, /* 0xd8-0xdf */ 0x00, 0xef, 0x00, 0xe5, 0x00, 0xf2, 0x00, 0xe6, /* 0xe0-0xe7 */ 0x00, 0xf3, 0x00, 0xe7, 0xf4, 0xe8, 0x00, 0xe9, /* 0xe8-0xef */ 0xf5, 0xfd, 0xf6, 0xea, 0x00, 0xf9, 0xfa, 0x99, /* 0xf0-0xf7 */ 0x9a, 0x00, 0x00, 0x9d, 0x9e, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, page03, NULL, NULL, page06, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page22, NULL, NULL, page25, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, pagefe, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x00, 0x00, 0x9d, 0x9e, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0x00, 0x00, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x00, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x00, 0x91, 0x00, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x00, 0x00, 0x9d, 0x9e, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0x00, 0x00, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x00, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp864", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp864(void) { return register_nls(&table); } static void __exit exit_nls_cp864(void) { unregister_nls(&table); } module_init(init_nls_cp864) module_exit(exit_nls_cp864) MODULE_DESCRIPTION("NLS Codepage 864 (Arabic)"); MODULE_LICENSE("Dual BSD/GPL");
5220 1 888 307 663 885 891 309 880 213 213 12 202 213 34 34 34 34 152 318 393 395 143 394 389 3 3 31 30 31 572 573 22 571 573 22 570 573 569 573 573 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 // SPDX-License-Identifier: GPL-2.0-only /* * jump label support * * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> * Copyright (C) 2011 Peter Zijlstra * */ #include <linux/memory.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/sort.h> #include <linux/err.h> #include <linux/static_key.h> #include <linux/jump_label_ratelimit.h> #include <linux/bug.h> #include <linux/cpu.h> #include <asm/sections.h> /* mutex to protect coming/going of the jump_label table */ static DEFINE_MUTEX(jump_label_mutex); void jump_label_lock(void) { mutex_lock(&jump_label_mutex); } void jump_label_unlock(void) { mutex_unlock(&jump_label_mutex); } static int jump_label_cmp(const void *a, const void *b) { const struct jump_entry *jea = a; const struct jump_entry *jeb = b; /* * Entrires are sorted by key. */ if (jump_entry_key(jea) < jump_entry_key(jeb)) return -1; if (jump_entry_key(jea) > jump_entry_key(jeb)) return 1; /* * In the batching mode, entries should also be sorted by the code * inside the already sorted list of entries, enabling a bsearch in * the vector. */ if (jump_entry_code(jea) < jump_entry_code(jeb)) return -1; if (jump_entry_code(jea) > jump_entry_code(jeb)) return 1; return 0; } static void jump_label_swap(void *a, void *b, int size) { long delta = (unsigned long)a - (unsigned long)b; struct jump_entry *jea = a; struct jump_entry *jeb = b; struct jump_entry tmp = *jea; jea->code = jeb->code - delta; jea->target = jeb->target - delta; jea->key = jeb->key - delta; jeb->code = tmp.code + delta; jeb->target = tmp.target + delta; jeb->key = tmp.key + delta; } static void jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) { unsigned long size; void *swapfn = NULL; if (IS_ENABLED(CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE)) swapfn = jump_label_swap; size = (((unsigned long)stop - (unsigned long)start) / sizeof(struct jump_entry)); sort(start, size, sizeof(struct jump_entry), jump_label_cmp, swapfn); } static void jump_label_update(struct static_key *key); /* * There are similar definitions for the !CONFIG_JUMP_LABEL case in jump_label.h. * The use of 'atomic_read()' requires atomic.h and its problematic for some * kernel headers such as kernel.h and others. Since static_key_count() is not * used in the branch statements as it is for the !CONFIG_JUMP_LABEL case its ok * to have it be a function here. Similarly, for 'static_key_enable()' and * 'static_key_disable()', which require bug.h. This should allow jump_label.h * to be included from most/all places for CONFIG_JUMP_LABEL. */ int static_key_count(struct static_key *key) { /* * -1 means the first static_key_slow_inc() is in progress. * static_key_enabled() must return true, so return 1 here. */ int n = atomic_read(&key->enabled); return n >= 0 ? n : 1; } EXPORT_SYMBOL_GPL(static_key_count); /* * static_key_fast_inc_not_disabled - adds a user for a static key * @key: static key that must be already enabled * * The caller must make sure that the static key can't get disabled while * in this function. It doesn't patch jump labels, only adds a user to * an already enabled static key. * * Returns true if the increment was done. Unlike refcount_t the ref counter * is not saturated, but will fail to increment on overflow. */ bool static_key_fast_inc_not_disabled(struct static_key *key) { int v; STATIC_KEY_CHECK_USE(key); /* * Negative key->enabled has a special meaning: it sends * static_key_slow_inc/dec() down the slow path, and it is non-zero * so it counts as "enabled" in jump_label_update(). * * The INT_MAX overflow condition is either used by the networking * code to reset or detected in the slow path of * static_key_slow_inc_cpuslocked(). */ v = atomic_read(&key->enabled); do { if (v <= 0 || v == INT_MAX) return false; } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1))); return true; } EXPORT_SYMBOL_GPL(static_key_fast_inc_not_disabled); bool static_key_slow_inc_cpuslocked(struct static_key *key) { lockdep_assert_cpus_held(); /* * Careful if we get concurrent static_key_slow_inc/dec() calls; * later calls must wait for the first one to _finish_ the * jump_label_update() process. At the same time, however, * the jump_label_update() call below wants to see * static_key_enabled(&key) for jumps to be updated properly. */ if (static_key_fast_inc_not_disabled(key)) return true; guard(mutex)(&jump_label_mutex); /* Try to mark it as 'enabling in progress. */ if (!atomic_cmpxchg(&key->enabled, 0, -1)) { jump_label_update(key); /* * Ensure that when static_key_fast_inc_not_disabled() or * static_key_dec_not_one() observe the positive value, * they must also observe all the text changes. */ atomic_set_release(&key->enabled, 1); } else { /* * While holding the mutex this should never observe * anything else than a value >= 1 and succeed */ if (WARN_ON_ONCE(!static_key_fast_inc_not_disabled(key))) return false; } return true; } bool static_key_slow_inc(struct static_key *key) { bool ret; cpus_read_lock(); ret = static_key_slow_inc_cpuslocked(key); cpus_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(static_key_slow_inc); void static_key_enable_cpuslocked(struct static_key *key) { STATIC_KEY_CHECK_USE(key); lockdep_assert_cpus_held(); if (atomic_read(&key->enabled) > 0) { WARN_ON_ONCE(atomic_read(&key->enabled) != 1); return; } jump_label_lock(); if (atomic_read(&key->enabled) == 0) { atomic_set(&key->enabled, -1); jump_label_update(key); /* * See static_key_slow_inc(). */ atomic_set_release(&key->enabled, 1); } jump_label_unlock(); } EXPORT_SYMBOL_GPL(static_key_enable_cpuslocked); void static_key_enable(struct static_key *key) { cpus_read_lock(); static_key_enable_cpuslocked(key); cpus_read_unlock(); } EXPORT_SYMBOL_GPL(static_key_enable); void static_key_disable_cpuslocked(struct static_key *key) { STATIC_KEY_CHECK_USE(key); lockdep_assert_cpus_held(); if (atomic_read(&key->enabled) != 1) { WARN_ON_ONCE(atomic_read(&key->enabled) != 0); return; } jump_label_lock(); if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); jump_label_unlock(); } EXPORT_SYMBOL_GPL(static_key_disable_cpuslocked); void static_key_disable(struct static_key *key) { cpus_read_lock(); static_key_disable_cpuslocked(key); cpus_read_unlock(); } EXPORT_SYMBOL_GPL(static_key_disable); static bool static_key_dec_not_one(struct static_key *key) { int v; /* * Go into the slow path if key::enabled is less than or equal than * one. One is valid to shut down the key, anything less than one * is an imbalance, which is handled at the call site. * * That includes the special case of '-1' which is set in * static_key_slow_inc_cpuslocked(), but that's harmless as it is * fully serialized in the slow path below. By the time this task * acquires the jump label lock the value is back to one and the * retry under the lock must succeed. */ v = atomic_read(&key->enabled); do { /* * Warn about the '-1' case though; since that means a * decrement is concurrent with a first (0->1) increment. IOW * people are trying to disable something that wasn't yet fully * enabled. This suggests an ordering problem on the user side. */ WARN_ON_ONCE(v < 0); /* * Warn about underflow, and lie about success in an attempt to * not make things worse. */ if (WARN_ON_ONCE(v == 0)) return true; if (v <= 1) return false; } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1))); return true; } static void __static_key_slow_dec_cpuslocked(struct static_key *key) { lockdep_assert_cpus_held(); int val; if (static_key_dec_not_one(key)) return; guard(mutex)(&jump_label_mutex); val = atomic_read(&key->enabled); /* * It should be impossible to observe -1 with jump_label_mutex held, * see static_key_slow_inc_cpuslocked(). */ if (WARN_ON_ONCE(val == -1)) return; /* * Cannot already be 0, something went sideways. */ if (WARN_ON_ONCE(val == 0)) return; if (atomic_dec_and_test(&key->enabled)) jump_label_update(key); } static void __static_key_slow_dec(struct static_key *key) { cpus_read_lock(); __static_key_slow_dec_cpuslocked(key); cpus_read_unlock(); } void jump_label_update_timeout(struct work_struct *work) { struct static_key_deferred *key = container_of(work, struct static_key_deferred, work.work); __static_key_slow_dec(&key->key); } EXPORT_SYMBOL_GPL(jump_label_update_timeout); void static_key_slow_dec(struct static_key *key) { STATIC_KEY_CHECK_USE(key); __static_key_slow_dec(key); } EXPORT_SYMBOL_GPL(static_key_slow_dec); void static_key_slow_dec_cpuslocked(struct static_key *key) { STATIC_KEY_CHECK_USE(key); __static_key_slow_dec_cpuslocked(key); } void __static_key_slow_dec_deferred(struct static_key *key, struct delayed_work *work, unsigned long timeout) { STATIC_KEY_CHECK_USE(key); if (static_key_dec_not_one(key)) return; schedule_delayed_work(work, timeout); } EXPORT_SYMBOL_GPL(__static_key_slow_dec_deferred); void __static_key_deferred_flush(void *key, struct delayed_work *work) { STATIC_KEY_CHECK_USE(key); flush_delayed_work(work); } EXPORT_SYMBOL_GPL(__static_key_deferred_flush); void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { STATIC_KEY_CHECK_USE(key); key->timeout = rl; INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); } EXPORT_SYMBOL_GPL(jump_label_rate_limit); static int addr_conflict(struct jump_entry *entry, void *start, void *end) { if (jump_entry_code(entry) <= (unsigned long)end && jump_entry_code(entry) + jump_entry_size(entry) > (unsigned long)start) return 1; return 0; } static int __jump_label_text_reserved(struct jump_entry *iter_start, struct jump_entry *iter_stop, void *start, void *end, bool init) { struct jump_entry *iter; iter = iter_start; while (iter < iter_stop) { if (init || !jump_entry_is_init(iter)) { if (addr_conflict(iter, start, end)) return 1; } iter++; } return 0; } #ifndef arch_jump_label_transform_static static void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { /* nothing to do on most architectures */ } #endif static inline struct jump_entry *static_key_entries(struct static_key *key) { WARN_ON_ONCE(key->type & JUMP_TYPE_LINKED); return (struct jump_entry *)(key->type & ~JUMP_TYPE_MASK); } static inline bool static_key_type(struct static_key *key) { return key->type & JUMP_TYPE_TRUE; } static inline bool static_key_linked(struct static_key *key) { return key->type & JUMP_TYPE_LINKED; } static inline void static_key_clear_linked(struct static_key *key) { key->type &= ~JUMP_TYPE_LINKED; } static inline void static_key_set_linked(struct static_key *key) { key->type |= JUMP_TYPE_LINKED; } /*** * A 'struct static_key' uses a union such that it either points directly * to a table of 'struct jump_entry' or to a linked list of modules which in * turn point to 'struct jump_entry' tables. * * The two lower bits of the pointer are used to keep track of which pointer * type is in use and to store the initial branch direction, we use an access * function which preserves these bits. */ static void static_key_set_entries(struct static_key *key, struct jump_entry *entries) { unsigned long type; WARN_ON_ONCE((unsigned long)entries & JUMP_TYPE_MASK); type = key->type & JUMP_TYPE_MASK; key->entries = entries; key->type |= type; } static enum jump_label_type jump_label_type(struct jump_entry *entry) { struct static_key *key = jump_entry_key(entry); bool enabled = static_key_enabled(key); bool branch = jump_entry_is_branch(entry); /* See the comment in linux/jump_label.h */ return enabled ^ branch; } static bool jump_label_can_update(struct jump_entry *entry, bool init) { /* * Cannot update code that was in an init text area. */ if (!init && jump_entry_is_init(entry)) return false; if (!kernel_text_address(jump_entry_code(entry))) { /* * This skips patching built-in __exit, which * is part of init_section_contains() but is * not part of kernel_text_address(). * * Skipping built-in __exit is fine since it * will never be executed. */ WARN_ONCE(!jump_entry_is_init(entry), "can't patch jump_label at %pS", (void *)jump_entry_code(entry)); return false; } return true; } #ifndef HAVE_JUMP_LABEL_BATCH static void __jump_label_update(struct static_key *key, struct jump_entry *entry, struct jump_entry *stop, bool init) { for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { if (jump_label_can_update(entry, init)) arch_jump_label_transform(entry, jump_label_type(entry)); } } #else static void __jump_label_update(struct static_key *key, struct jump_entry *entry, struct jump_entry *stop, bool init) { for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { if (!jump_label_can_update(entry, init)) continue; if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) { /* * Queue is full: Apply the current queue and try again. */ arch_jump_label_transform_apply(); BUG_ON(!arch_jump_label_transform_queue(entry, jump_label_type(entry))); } } arch_jump_label_transform_apply(); } #endif void __init jump_label_init(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; struct static_key *key = NULL; struct jump_entry *iter; /* * Since we are initializing the static_key.enabled field with * with the 'raw' int values (to avoid pulling in atomic.h) in * jump_label.h, let's make sure that is safe. There are only two * cases to check since we initialize to 0 or 1. */ BUILD_BUG_ON((int)ATOMIC_INIT(0) != 0); BUILD_BUG_ON((int)ATOMIC_INIT(1) != 1); if (static_key_initialized) return; cpus_read_lock(); jump_label_lock(); jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; bool in_init; /* rewrite NOPs */ if (jump_label_type(iter) == JUMP_LABEL_NOP) arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); in_init = init_section_contains((void *)jump_entry_code(iter), 1); jump_entry_set_init(iter, in_init); iterk = jump_entry_key(iter); if (iterk == key) continue; key = iterk; static_key_set_entries(key, iter); } static_key_initialized = true; jump_label_unlock(); cpus_read_unlock(); } static inline bool static_key_sealed(struct static_key *key) { return (key->type & JUMP_TYPE_LINKED) && !(key->type & ~JUMP_TYPE_MASK); } static inline void static_key_seal(struct static_key *key) { unsigned long type = key->type & JUMP_TYPE_TRUE; key->type = JUMP_TYPE_LINKED | type; } void jump_label_init_ro(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; struct jump_entry *iter; if (WARN_ON_ONCE(!static_key_initialized)) return; cpus_read_lock(); jump_label_lock(); for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk = jump_entry_key(iter); if (!is_kernel_ro_after_init((unsigned long)iterk)) continue; if (static_key_sealed(iterk)) continue; static_key_seal(iterk); } jump_label_unlock(); cpus_read_unlock(); } #ifdef CONFIG_MODULES enum jump_label_type jump_label_init_type(struct jump_entry *entry) { struct static_key *key = jump_entry_key(entry); bool type = static_key_type(key); bool branch = jump_entry_is_branch(entry); /* See the comment in linux/jump_label.h */ return type ^ branch; } struct static_key_mod { struct static_key_mod *next; struct jump_entry *entries; struct module *mod; }; static inline struct static_key_mod *static_key_mod(struct static_key *key) { WARN_ON_ONCE(!static_key_linked(key)); return (struct static_key_mod *)(key->type & ~JUMP_TYPE_MASK); } /*** * key->type and key->next are the same via union. * This sets key->next and preserves the type bits. * * See additional comments above static_key_set_entries(). */ static void static_key_set_mod(struct static_key *key, struct static_key_mod *mod) { unsigned long type; WARN_ON_ONCE((unsigned long)mod & JUMP_TYPE_MASK); type = key->type & JUMP_TYPE_MASK; key->next = mod; key->type |= type; } static int __jump_label_mod_text_reserved(void *start, void *end) { struct module *mod; int ret; preempt_disable(); mod = __module_text_address((unsigned long)start); WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); if (!try_module_get(mod)) mod = NULL; preempt_enable(); if (!mod) return 0; ret = __jump_label_text_reserved(mod->jump_entries, mod->jump_entries + mod->num_jump_entries, start, end, mod->state == MODULE_STATE_COMING); module_put(mod); return ret; } static void __jump_label_mod_update(struct static_key *key) { struct static_key_mod *mod; for (mod = static_key_mod(key); mod; mod = mod->next) { struct jump_entry *stop; struct module *m; /* * NULL if the static_key is defined in a module * that does not use it */ if (!mod->entries) continue; m = mod->mod; if (!m) stop = __stop___jump_table; else stop = m->jump_entries + m->num_jump_entries; __jump_label_update(key, mod->entries, stop, m && m->state == MODULE_STATE_COMING); } } static int jump_label_add_module(struct module *mod) { struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; struct static_key *key = NULL; struct static_key_mod *jlm, *jlm2; /* if the module doesn't have jump label entries, just return */ if (iter_start == iter_stop) return 0; jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; bool in_init; in_init = within_module_init(jump_entry_code(iter), mod); jump_entry_set_init(iter, in_init); iterk = jump_entry_key(iter); if (iterk == key) continue; key = iterk; if (within_module((unsigned long)key, mod)) { static_key_set_entries(key, iter); continue; } /* * If the key was sealed at init, then there's no need to keep a * reference to its module entries - just patch them now and be * done with it. */ if (static_key_sealed(key)) goto do_poke; jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); if (!jlm) return -ENOMEM; if (!static_key_linked(key)) { jlm2 = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); if (!jlm2) { kfree(jlm); return -ENOMEM; } preempt_disable(); jlm2->mod = __module_address((unsigned long)key); preempt_enable(); jlm2->entries = static_key_entries(key); jlm2->next = NULL; static_key_set_mod(key, jlm2); static_key_set_linked(key); } jlm->mod = mod; jlm->entries = iter; jlm->next = static_key_mod(key); static_key_set_mod(key, jlm); static_key_set_linked(key); /* Only update if we've changed from our initial state */ do_poke: if (jump_label_type(iter) != jump_label_init_type(iter)) __jump_label_update(key, iter, iter_stop, true); } return 0; } static void jump_label_del_module(struct module *mod) { struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; struct static_key *key = NULL; struct static_key_mod *jlm, **prev; for (iter = iter_start; iter < iter_stop; iter++) { if (jump_entry_key(iter) == key) continue; key = jump_entry_key(iter); if (within_module((unsigned long)key, mod)) continue; /* No @jlm allocated because key was sealed at init. */ if (static_key_sealed(key)) continue; /* No memory during module load */ if (WARN_ON(!static_key_linked(key))) continue; prev = &key->next; jlm = static_key_mod(key); while (jlm && jlm->mod != mod) { prev = &jlm->next; jlm = jlm->next; } /* No memory during module load */ if (WARN_ON(!jlm)) continue; if (prev == &key->next) static_key_set_mod(key, jlm->next); else *prev = jlm->next; kfree(jlm); jlm = static_key_mod(key); /* if only one etry is left, fold it back into the static_key */ if (jlm->next == NULL) { static_key_set_entries(key, jlm->entries); static_key_clear_linked(key); kfree(jlm); } } } static int jump_label_module_notify(struct notifier_block *self, unsigned long val, void *data) { struct module *mod = data; int ret = 0; cpus_read_lock(); jump_label_lock(); switch (val) { case MODULE_STATE_COMING: ret = jump_label_add_module(mod); if (ret) { WARN(1, "Failed to allocate memory: jump_label may not work properly.\n"); jump_label_del_module(mod); } break; case MODULE_STATE_GOING: jump_label_del_module(mod); break; } jump_label_unlock(); cpus_read_unlock(); return notifier_from_errno(ret); } static struct notifier_block jump_label_module_nb = { .notifier_call = jump_label_module_notify, .priority = 1, /* higher than tracepoints */ }; static __init int jump_label_init_module(void) { return register_module_notifier(&jump_label_module_nb); } early_initcall(jump_label_init_module); #endif /* CONFIG_MODULES */ /*** * jump_label_text_reserved - check if addr range is reserved * @start: start text addr * @end: end text addr * * checks if the text addr located between @start and @end * overlaps with any of the jump label patch addresses. Code * that wants to modify kernel text should first verify that * it does not overlap with any of the jump label addresses. * Caller must hold jump_label_mutex. * * returns 1 if there is an overlap, 0 otherwise */ int jump_label_text_reserved(void *start, void *end) { bool init = system_state < SYSTEM_RUNNING; int ret = __jump_label_text_reserved(__start___jump_table, __stop___jump_table, start, end, init); if (ret) return ret; #ifdef CONFIG_MODULES ret = __jump_label_mod_text_reserved(start, end); #endif return ret; } static void jump_label_update(struct static_key *key) { struct jump_entry *stop = __stop___jump_table; bool init = system_state < SYSTEM_RUNNING; struct jump_entry *entry; #ifdef CONFIG_MODULES struct module *mod; if (static_key_linked(key)) { __jump_label_mod_update(key); return; } preempt_disable(); mod = __module_address((unsigned long)key); if (mod) { stop = mod->jump_entries + mod->num_jump_entries; init = mod->state == MODULE_STATE_COMING; } preempt_enable(); #endif entry = static_key_entries(key); /* if there are no users, entry can be NULL */ if (entry) __jump_label_update(key, entry, stop, init); } #ifdef CONFIG_STATIC_KEYS_SELFTEST static DEFINE_STATIC_KEY_TRUE(sk_true); static DEFINE_STATIC_KEY_FALSE(sk_false); static __init int jump_label_test(void) { int i; for (i = 0; i < 2; i++) { WARN_ON(static_key_enabled(&sk_true.key) != true); WARN_ON(static_key_enabled(&sk_false.key) != false); WARN_ON(!static_branch_likely(&sk_true)); WARN_ON(!static_branch_unlikely(&sk_true)); WARN_ON(static_branch_likely(&sk_false)); WARN_ON(static_branch_unlikely(&sk_false)); static_branch_disable(&sk_true); static_branch_enable(&sk_false); WARN_ON(static_key_enabled(&sk_true.key) == true); WARN_ON(static_key_enabled(&sk_false.key) == false); WARN_ON(static_branch_likely(&sk_true)); WARN_ON(static_branch_unlikely(&sk_true)); WARN_ON(!static_branch_likely(&sk_false)); WARN_ON(!static_branch_unlikely(&sk_false)); static_branch_enable(&sk_true); static_branch_disable(&sk_false); } return 0; } early_initcall(jump_label_test); #endif /* STATIC_KEYS_SELFTEST */
11 7 7 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/xattr_user.c * Handler for extended user attributes. * * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> */ #include <linux/string.h> #include <linux/fs.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" static bool ext4_xattr_user_list(struct dentry *dentry) { return test_opt(dentry->d_sb, XATTR_USER); } static int ext4_xattr_user_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); } static int ext4_xattr_user_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, value, size, flags); } const struct xattr_handler ext4_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .list = ext4_xattr_user_list, .get = ext4_xattr_user_get, .set = ext4_xattr_user_set, };
4 4 4 3 4 5 1 4 6 1 1 3 3 3 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/eventfd.h> #include <linux/eventpoll.h> #include <linux/io_uring.h> #include <linux/io_uring_types.h> #include "io-wq.h" #include "eventfd.h" struct io_ev_fd { struct eventfd_ctx *cq_ev_fd; unsigned int eventfd_async: 1; struct rcu_head rcu; refcount_t refs; atomic_t ops; }; enum { IO_EVENTFD_OP_SIGNAL_BIT, }; static void io_eventfd_free(struct rcu_head *rcu) { struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); eventfd_ctx_put(ev_fd->cq_ev_fd); kfree(ev_fd); } static void io_eventfd_do_signal(struct rcu_head *rcu) { struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); if (refcount_dec_and_test(&ev_fd->refs)) io_eventfd_free(rcu); } void io_eventfd_signal(struct io_ring_ctx *ctx) { struct io_ev_fd *ev_fd = NULL; if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) return; guard(rcu)(); /* * rcu_dereference ctx->io_ev_fd once and use it for both for checking * and eventfd_signal */ ev_fd = rcu_dereference(ctx->io_ev_fd); /* * Check again if ev_fd exists incase an io_eventfd_unregister call * completed between the NULL check of ctx->io_ev_fd at the start of * the function and rcu_read_lock. */ if (unlikely(!ev_fd)) return; if (!refcount_inc_not_zero(&ev_fd->refs)) return; if (ev_fd->eventfd_async && !io_wq_current_is_worker()) goto out; if (likely(eventfd_signal_allowed())) { eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); } else { if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); return; } } out: if (refcount_dec_and_test(&ev_fd->refs)) call_rcu(&ev_fd->rcu, io_eventfd_free); } void io_eventfd_flush_signal(struct io_ring_ctx *ctx) { bool skip; spin_lock(&ctx->completion_lock); /* * Eventfd should only get triggered when at least one event has been * posted. Some applications rely on the eventfd notification count * only changing IFF a new CQE has been added to the CQ ring. There's * no depedency on 1:1 relationship between how many times this * function is called (and hence the eventfd count) and number of CQEs * posted to the CQ ring. */ skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail; ctx->evfd_last_cq_tail = ctx->cached_cq_tail; spin_unlock(&ctx->completion_lock); if (skip) return; io_eventfd_signal(ctx); } int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, unsigned int eventfd_async) { struct io_ev_fd *ev_fd; __s32 __user *fds = arg; int fd; ev_fd = rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->uring_lock)); if (ev_fd) return -EBUSY; if (copy_from_user(&fd, fds, sizeof(*fds))) return -EFAULT; ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); if (!ev_fd) return -ENOMEM; ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); if (IS_ERR(ev_fd->cq_ev_fd)) { int ret = PTR_ERR(ev_fd->cq_ev_fd); kfree(ev_fd); return ret; } spin_lock(&ctx->completion_lock); ctx->evfd_last_cq_tail = ctx->cached_cq_tail; spin_unlock(&ctx->completion_lock); ev_fd->eventfd_async = eventfd_async; ctx->has_evfd = true; refcount_set(&ev_fd->refs, 1); atomic_set(&ev_fd->ops, 0); rcu_assign_pointer(ctx->io_ev_fd, ev_fd); return 0; } int io_eventfd_unregister(struct io_ring_ctx *ctx) { struct io_ev_fd *ev_fd; ev_fd = rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->uring_lock)); if (ev_fd) { ctx->has_evfd = false; rcu_assign_pointer(ctx->io_ev_fd, NULL); if (refcount_dec_and_test(&ev_fd->refs)) call_rcu(&ev_fd->rcu, io_eventfd_free); return 0; } return -ENXIO; }
10 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 // SPDX-License-Identifier: GPL-2.0-or-later /* * Sunplus spca561 subdriver * * Copyright (C) 2004 Michel Xhaard mxhaard@magic.fr * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca561" #include <linux/input.h> #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA561 USB Camera Driver"); MODULE_LICENSE("GPL"); #define EXPOSURE_MAX (2047 + 325) /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct { /* hue/contrast control cluster */ struct v4l2_ctrl *contrast; struct v4l2_ctrl *hue; }; struct v4l2_ctrl *autogain; #define EXPO12A_DEF 3 __u8 expo12a; /* expo/gain? for rev 12a */ __u8 chip_revision; #define Rev012A 0 #define Rev072A 1 signed char ag_cnt; #define AG_CNT_START 13 }; static const struct v4l2_pix_format sif_012a_mode[] = { {160, 120, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA561, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 4 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SPCA561, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 4 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; static const struct v4l2_pix_format sif_072a_mode[] = { {160, 120, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* * Initialization data * I'm not very sure how to split initialization from open data * chunks. For now, we'll consider everything as initialization */ /* Frame packet header offsets for the spca561 */ #define SPCA561_OFFSET_SNAP 1 #define SPCA561_OFFSET_TYPE 2 #define SPCA561_OFFSET_COMPRESS 3 #define SPCA561_OFFSET_FRAMSEQ 4 #define SPCA561_OFFSET_GPIO 5 #define SPCA561_OFFSET_USBBUFF 6 #define SPCA561_OFFSET_WIN2GRAVE 7 #define SPCA561_OFFSET_WIN2RAVE 8 #define SPCA561_OFFSET_WIN2BAVE 9 #define SPCA561_OFFSET_WIN2GBAVE 10 #define SPCA561_OFFSET_WIN1GRAVE 11 #define SPCA561_OFFSET_WIN1RAVE 12 #define SPCA561_OFFSET_WIN1BAVE 13 #define SPCA561_OFFSET_WIN1GBAVE 14 #define SPCA561_OFFSET_FREQ 15 #define SPCA561_OFFSET_VSYNC 16 #define SPCA561_INDEX_I2C_BASE 0x8800 #define SPCA561_SNAPBIT 0x20 #define SPCA561_SNAPCTRL 0x40 static const u16 rev72a_reset[][2] = { {0x0000, 0x8114}, /* Software GPIO output data */ {0x0001, 0x8114}, /* Software GPIO output data */ {0x0000, 0x8112}, /* Some kind of reset */ {} }; static const __u16 rev72a_init_data1[][2] = { {0x0003, 0x8701}, /* PCLK clock delay adjustment */ {0x0001, 0x8703}, /* HSYNC from cmos inverted */ {0x0011, 0x8118}, /* Enable and conf sensor */ {0x0001, 0x8118}, /* Conf sensor */ {0x0092, 0x8804}, /* I know nothing about these */ {0x0010, 0x8802}, /* 0x88xx registers, so I won't */ {} }; static const u16 rev72a_init_sensor1[][2] = { {0x0001, 0x000d}, {0x0002, 0x0018}, {0x0004, 0x0165}, {0x0005, 0x0021}, {0x0007, 0x00aa}, {0x0020, 0x1504}, {0x0039, 0x0002}, {0x0035, 0x0010}, {0x0009, 0x1049}, {0x0028, 0x000b}, {0x003b, 0x000f}, {0x003c, 0x0000}, {} }; static const __u16 rev72a_init_data2[][2] = { {0x0018, 0x8601}, /* Pixel/line selection for color separation */ {0x0000, 0x8602}, /* Optical black level for user setting */ {0x0060, 0x8604}, /* Optical black horizontal offset */ {0x0002, 0x8605}, /* Optical black vertical offset */ {0x0000, 0x8603}, /* Non-automatic optical black level */ {0x0002, 0x865b}, /* Horizontal offset for valid pixels */ {0x0000, 0x865f}, /* Vertical valid pixels window (x2) */ {0x00b0, 0x865d}, /* Horizontal valid pixels window (x2) */ {0x0090, 0x865e}, /* Vertical valid lines window (x2) */ {0x00e0, 0x8406}, /* Memory buffer threshold */ {0x0000, 0x8660}, /* Compensation memory stuff */ {0x0002, 0x8201}, /* Output address for r/w serial EEPROM */ {0x0008, 0x8200}, /* Clear valid bit for serial EEPROM */ {0x0001, 0x8200}, /* OprMode to be executed by hardware */ /* from ms-win */ {0x0000, 0x8611}, /* R offset for white balance */ {0x00fd, 0x8612}, /* Gr offset for white balance */ {0x0003, 0x8613}, /* B offset for white balance */ {0x0000, 0x8614}, /* Gb offset for white balance */ /* from ms-win */ {0x0035, 0x8651}, /* R gain for white balance */ {0x0040, 0x8652}, /* Gr gain for white balance */ {0x005f, 0x8653}, /* B gain for white balance */ {0x0040, 0x8654}, /* Gb gain for white balance */ {0x0002, 0x8502}, /* Maximum average bit rate stuff */ {0x0011, 0x8802}, {0x0087, 0x8700}, /* Set master clock (96Mhz????) */ {0x0081, 0x8702}, /* Master clock output enable */ {0x0000, 0x8500}, /* Set image type (352x288 no compression) */ /* Originally was 0x0010 (352x288 compression) */ {0x0002, 0x865b}, /* Horizontal offset for valid pixels */ {0x0003, 0x865c}, /* Vertical offset for valid lines */ {} }; static const u16 rev72a_init_sensor2[][2] = { {0x0003, 0x0121}, {0x0004, 0x0165}, {0x0005, 0x002f}, /* blanking control column */ {0x0006, 0x0000}, /* blanking mode row*/ {0x000a, 0x0002}, {0x0009, 0x1061}, /* setexposure times && pixel clock * 0001 0 | 000 0110 0001 */ {0x0035, 0x0014}, {} }; /******************** QC Express etch2 stuff ********************/ static const __u16 Pb100_1map8300[][2] = { /* reg, value */ {0x8320, 0x3304}, {0x8303, 0x0125}, /* image area */ {0x8304, 0x0169}, {0x8328, 0x000b}, {0x833c, 0x0001}, /*fixme: win:07*/ {0x832f, 0x1904}, /*fixme: was 0419*/ {0x8307, 0x00aa}, {0x8301, 0x0003}, {0x8302, 0x000e}, {} }; static const __u16 Pb100_2map8300[][2] = { /* reg, value */ {0x8339, 0x0000}, {0x8307, 0x00aa}, {} }; static const __u16 spca561_161rev12A_data1[][2] = { {0x29, 0x8118}, /* Control register (various enable bits) */ {0x08, 0x8114}, /* GPIO: Led off */ {0x0e, 0x8112}, /* 0x0e stream off 0x3e stream on */ {0x00, 0x8102}, /* white balance - new */ {0x92, 0x8804}, {0x04, 0x8802}, /* windows uses 08 */ {} }; static const __u16 spca561_161rev12A_data2[][2] = { {0x21, 0x8118}, {0x10, 0x8500}, {0x07, 0x8601}, {0x07, 0x8602}, {0x04, 0x8501}, {0x07, 0x8201}, /* windows uses 02 */ {0x08, 0x8200}, {0x01, 0x8200}, {0x90, 0x8604}, {0x00, 0x8605}, {0xb0, 0x8603}, /* sensor gains */ {0x07, 0x8601}, /* white balance - new */ {0x07, 0x8602}, /* white balance - new */ {0x00, 0x8610}, /* *red */ {0x00, 0x8611}, /* 3f *green */ {0x00, 0x8612}, /* green *blue */ {0x00, 0x8613}, /* blue *green */ {0x43, 0x8614}, /* green *red - white balance - was 0x35 */ {0x40, 0x8615}, /* 40 *green - white balance - was 0x35 */ {0x71, 0x8616}, /* 7a *blue - white balance - was 0x35 */ {0x40, 0x8617}, /* 40 *green - white balance - was 0x35 */ {0x0c, 0x8620}, /* 0c */ {0xc8, 0x8631}, /* c8 */ {0xc8, 0x8634}, /* c8 */ {0x23, 0x8635}, /* 23 */ {0x1f, 0x8636}, /* 1f */ {0xdd, 0x8637}, /* dd */ {0xe1, 0x8638}, /* e1 */ {0x1d, 0x8639}, /* 1d */ {0x21, 0x863a}, /* 21 */ {0xe3, 0x863b}, /* e3 */ {0xdf, 0x863c}, /* df */ {0xf0, 0x8505}, {0x32, 0x850a}, /* {0x99, 0x8700}, * - white balance - new (removed) */ /* HDG we used to do this in stop0, making the init state and the state after a start / stop different, so do this here instead. */ {0x29, 0x8118}, {} }; static void reg_w_val(struct gspca_dev *gspca_dev, __u16 index, __u8 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write: 0x%02x:0x%02x\n", index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); } static void write_vector(struct gspca_dev *gspca_dev, const __u16 data[][2]) { int i; i = 0; while (data[i][1] != 0) { reg_w_val(gspca_dev, data[i][1], data[i][0]); i++; } } /* read 'len' bytes to gspca_dev->usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, __u16 index, __u16 length) { usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0, /* request */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, length, 500); } /* write 'len' bytes from gspca_dev->usb_buf */ static void reg_w_buf(struct gspca_dev *gspca_dev, __u16 index, __u16 len) { usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, len, 500); } static void i2c_write(struct gspca_dev *gspca_dev, __u16 value, __u16 reg) { int retry = 60; reg_w_val(gspca_dev, 0x8801, reg); reg_w_val(gspca_dev, 0x8805, value); reg_w_val(gspca_dev, 0x8800, value >> 8); do { reg_r(gspca_dev, 0x8803, 1); if (!gspca_dev->usb_buf[0]) return; msleep(10); } while (--retry); } static int i2c_read(struct gspca_dev *gspca_dev, __u16 reg, __u8 mode) { int retry = 60; __u8 value; reg_w_val(gspca_dev, 0x8804, 0x92); reg_w_val(gspca_dev, 0x8801, reg); reg_w_val(gspca_dev, 0x8802, mode | 0x01); do { reg_r(gspca_dev, 0x8803, 1); if (!gspca_dev->usb_buf[0]) { reg_r(gspca_dev, 0x8800, 1); value = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8805, 1); return ((int) value << 8) | gspca_dev->usb_buf[0]; } msleep(10); } while (--retry); return -1; } static void sensor_mapwrite(struct gspca_dev *gspca_dev, const __u16 (*sensormap)[2]) { while ((*sensormap)[0]) { gspca_dev->usb_buf[0] = (*sensormap)[1]; gspca_dev->usb_buf[1] = (*sensormap)[1] >> 8; reg_w_buf(gspca_dev, (*sensormap)[0], 2); sensormap++; } } static void write_sensor_72a(struct gspca_dev *gspca_dev, const __u16 (*sensor)[2]) { while ((*sensor)[0]) { i2c_write(gspca_dev, (*sensor)[1], (*sensor)[0]); sensor++; } } static void init_161rev12A(struct gspca_dev *gspca_dev) { write_vector(gspca_dev, spca561_161rev12A_data1); sensor_mapwrite(gspca_dev, Pb100_1map8300); /*fixme: should be in sd_start*/ write_vector(gspca_dev, spca561_161rev12A_data2); sensor_mapwrite(gspca_dev, Pb100_2map8300); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; __u16 vendor, product; __u8 data1, data2; /* Read frm global register the USB product and vendor IDs, just to * prove that we can communicate with the device. This works, which * confirms at we are communicating properly and that the device * is a 561. */ reg_r(gspca_dev, 0x8104, 1); data1 = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8105, 1); data2 = gspca_dev->usb_buf[0]; vendor = (data2 << 8) | data1; reg_r(gspca_dev, 0x8106, 1); data1 = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8107, 1); data2 = gspca_dev->usb_buf[0]; product = (data2 << 8) | data1; if (vendor != id->idVendor || product != id->idProduct) { gspca_dbg(gspca_dev, D_PROBE, "Bad vendor / product from device\n"); return -EINVAL; } cam = &gspca_dev->cam; cam->needs_full_bandwidth = 1; sd->chip_revision = id->driver_info; if (sd->chip_revision == Rev012A) { cam->cam_mode = sif_012a_mode; cam->nmodes = ARRAY_SIZE(sif_012a_mode); } else { cam->cam_mode = sif_072a_mode; cam->nmodes = ARRAY_SIZE(sif_072a_mode); } sd->expo12a = EXPO12A_DEF; return 0; } /* this function is called at probe and resume time */ static int sd_init_12a(struct gspca_dev *gspca_dev) { gspca_dbg(gspca_dev, D_STREAM, "Chip revision: 012a\n"); init_161rev12A(gspca_dev); return 0; } static int sd_init_72a(struct gspca_dev *gspca_dev) { gspca_dbg(gspca_dev, D_STREAM, "Chip revision: 072a\n"); write_vector(gspca_dev, rev72a_reset); msleep(200); write_vector(gspca_dev, rev72a_init_data1); write_sensor_72a(gspca_dev, rev72a_init_sensor1); write_vector(gspca_dev, rev72a_init_data2); write_sensor_72a(gspca_dev, rev72a_init_sensor2); reg_w_val(gspca_dev, 0x8112, 0x30); return 0; } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; __u16 reg; if (sd->chip_revision == Rev012A) reg = 0x8610; else reg = 0x8611; reg_w_val(gspca_dev, reg + 0, val); /* R */ reg_w_val(gspca_dev, reg + 1, val); /* Gr */ reg_w_val(gspca_dev, reg + 2, val); /* B */ reg_w_val(gspca_dev, reg + 3, val); /* Gb */ } static void setwhite(struct gspca_dev *gspca_dev, s32 white, s32 contrast) { struct sd *sd = (struct sd *) gspca_dev; __u8 blue, red; __u16 reg; /* try to emulate MS-win as possible */ red = 0x20 + white * 3 / 8; blue = 0x90 - white * 5 / 8; if (sd->chip_revision == Rev012A) { reg = 0x8614; } else { reg = 0x8651; red += contrast - 0x20; blue += contrast - 0x20; reg_w_val(gspca_dev, 0x8652, contrast + 0x20); /* Gr */ reg_w_val(gspca_dev, 0x8654, contrast + 0x20); /* Gb */ } reg_w_val(gspca_dev, reg, red); reg_w_val(gspca_dev, reg + 2, blue); } /* rev 12a only */ static void setexposure(struct gspca_dev *gspca_dev, s32 val) { int i, expo = 0; /* Register 0x8309 controls exposure for the spca561, the basic exposure setting goes from 1-2047, where 1 is completely dark and 2047 is very bright. It not only influences exposure but also the framerate (to allow for longer exposure) from 1 - 300 it only raises the exposure time then from 300 - 600 it halves the framerate to be able to further raise the exposure time and for every 300 more it halves the framerate again. This allows for a maximum exposure time of circa 0.2 - 0.25 seconds (30 / (2000/3000) fps). Sometimes this is not enough, the 1-2047 uses bits 0-10, bits 11-12 configure a divider for the base framerate which us used at the exposure setting of 1-300. These bits configure the base framerate according to the following formula: fps = 60 / (value + 2) */ /* We choose to use the high bits setting the fixed framerate divisor asap, as setting high basic exposure setting without the fixed divider in combination with high gains makes the cam stop */ static const int table[] = { 0, 450, 550, 625, EXPOSURE_MAX }; for (i = 0; i < ARRAY_SIZE(table) - 1; i++) { if (val <= table[i + 1]) { expo = val - table[i]; if (i) expo += 300; expo |= i << 11; break; } } gspca_dev->usb_buf[0] = expo; gspca_dev->usb_buf[1] = expo >> 8; reg_w_buf(gspca_dev, 0x8309, 2); } /* rev 12a only */ static void setgain(struct gspca_dev *gspca_dev, s32 val) { /* gain reg low 6 bits 0-63 gain, bit 6 and 7, both double the sensitivity when set, so 31 + one of them set == 63, and 15 with both of them set == 63 */ if (val < 64) gspca_dev->usb_buf[0] = val; else if (val < 128) gspca_dev->usb_buf[0] = (val / 2) | 0x40; else gspca_dev->usb_buf[0] = (val / 4) | 0xc0; gspca_dev->usb_buf[1] = 0; reg_w_buf(gspca_dev, 0x8335, 2); } static void setautogain(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; if (val) sd->ag_cnt = AG_CNT_START; else sd->ag_cnt = -1; } static int sd_start_12a(struct gspca_dev *gspca_dev) { int mode; static const __u8 Reg8391[8] = {0x92, 0x30, 0x20, 0x00, 0x0c, 0x00, 0x00, 0x00}; mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; if (mode <= 1) { /* Use compression on 320x240 and above */ reg_w_val(gspca_dev, 0x8500, 0x10 | mode); } else { /* I couldn't get the compression to work below 320x240 * Fortunately at these resolutions the bandwidth * is sufficient to push raw frames at ~20fps */ reg_w_val(gspca_dev, 0x8500, mode); } /* -- qq@kuku.eu.org */ gspca_dev->usb_buf[0] = 0xaa; gspca_dev->usb_buf[1] = 0x00; reg_w_buf(gspca_dev, 0x8307, 2); /* clock - lower 0x8X values lead to fps > 30 */ reg_w_val(gspca_dev, 0x8700, 0x8a); /* 0x8f 0x85 0x27 clock */ reg_w_val(gspca_dev, 0x8112, 0x1e | 0x20); reg_w_val(gspca_dev, 0x850b, 0x03); memcpy(gspca_dev->usb_buf, Reg8391, 8); reg_w_buf(gspca_dev, 0x8391, 8); reg_w_buf(gspca_dev, 0x8390, 8); /* Led ON (bit 3 -> 0 */ reg_w_val(gspca_dev, 0x8114, 0x00); return 0; } static int sd_start_72a(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int Clck; int mode; write_vector(gspca_dev, rev72a_reset); msleep(200); write_vector(gspca_dev, rev72a_init_data1); write_sensor_72a(gspca_dev, rev72a_init_sensor1); mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; switch (mode) { default: case 0: Clck = 0x27; /* ms-win 0x87 */ break; case 1: Clck = 0x25; break; case 2: Clck = 0x22; break; case 3: Clck = 0x21; break; } reg_w_val(gspca_dev, 0x8700, Clck); /* 0x27 clock */ reg_w_val(gspca_dev, 0x8702, 0x81); reg_w_val(gspca_dev, 0x8500, mode); /* mode */ write_sensor_72a(gspca_dev, rev72a_init_sensor2); setwhite(gspca_dev, v4l2_ctrl_g_ctrl(sd->hue), v4l2_ctrl_g_ctrl(sd->contrast)); /* setbrightness(gspca_dev); * fixme: bad values */ setautogain(gspca_dev, v4l2_ctrl_g_ctrl(sd->autogain)); reg_w_val(gspca_dev, 0x8112, 0x10 | 0x20); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->chip_revision == Rev012A) { reg_w_val(gspca_dev, 0x8112, 0x0e); /* Led Off (bit 3 -> 1 */ reg_w_val(gspca_dev, 0x8114, 0x08); } else { reg_w_val(gspca_dev, 0x8112, 0x20); /* reg_w_val(gspca_dev, 0x8102, 0x00); ?? */ } } static void do_autogain(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int expotimes; int pixelclk; int gainG; __u8 R, Gr, Gb, B; int y; __u8 luma_mean = 110; __u8 luma_delta = 20; __u8 spring = 4; if (sd->ag_cnt < 0) return; if (--sd->ag_cnt >= 0) return; sd->ag_cnt = AG_CNT_START; switch (sd->chip_revision) { case Rev072A: reg_r(gspca_dev, 0x8621, 1); Gr = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8622, 1); R = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8623, 1); B = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8624, 1); Gb = gspca_dev->usb_buf[0]; y = (77 * R + 75 * (Gr + Gb) + 29 * B) >> 8; /* u= (128*B-(43*(Gr+Gb+R))) >> 8; */ /* v= (128*R-(53*(Gr+Gb))-21*B) >> 8; */ if (y < luma_mean - luma_delta || y > luma_mean + luma_delta) { expotimes = i2c_read(gspca_dev, 0x09, 0x10); pixelclk = 0x0800; expotimes = expotimes & 0x07ff; gainG = i2c_read(gspca_dev, 0x35, 0x10); expotimes += (luma_mean - y) >> spring; gainG += (luma_mean - y) / 50; if (gainG > 0x3f) gainG = 0x3f; else if (gainG < 3) gainG = 3; i2c_write(gspca_dev, gainG, 0x35); if (expotimes > 0x0256) expotimes = 0x0256; else if (expotimes < 3) expotimes = 3; i2c_write(gspca_dev, expotimes | pixelclk, 0x09); } break; } } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; len--; switch (*data++) { /* sequence number */ case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); /* This should never happen */ if (len < 2) { gspca_err(gspca_dev, "Short SOF packet, ignoring\n\n\n\n\n"); gspca_dev->last_packet_type = DISCARD_PACKET; return; } #if IS_ENABLED(CONFIG_INPUT) if (data[0] & 0x20) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 1); input_sync(gspca_dev->input_dev); input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); } #endif if (data[1] & 0x10) { /* compressed bayer */ gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); } else { /* raw bayer (with a header, which we skip) */ if (sd->chip_revision == Rev012A) { data += 20; len -= 20; } else { data += 16; len -= 16; } gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); } return; case 0xff: /* drop (empty mpackets) */ return; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: /* hue/contrast control cluster for 72a */ setwhite(gspca_dev, sd->hue->val, ctrl->val); break; case V4L2_CID_HUE: /* just plain hue control for 12a */ setwhite(gspca_dev, ctrl->val, 0); break; case V4L2_CID_EXPOSURE: setexposure(gspca_dev, ctrl->val); break; case V4L2_CID_GAIN: setgain(gspca_dev, ctrl->val); break; case V4L2_CID_AUTOGAIN: setautogain(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls_12a(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 3); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, 1, 0x7f, 1, 0x40); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, -128, 127, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_EXPOSURE, 1, EXPOSURE_MAX, 1, 700); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAIN, 0, 255, 1, 63); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } static int sd_init_controls_72a(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 4); sd->contrast = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 0x3f, 1, 0x20); sd->hue = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, 1, 0x7f, 1, 0x40); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 0x3f, 1, 0x20); sd->autogain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } v4l2_ctrl_cluster(2, &sd->contrast); return 0; } /* sub-driver description */ static const struct sd_desc sd_desc_12a = { .name = MODULE_NAME, .init_controls = sd_init_controls_12a, .config = sd_config, .init = sd_init_12a, .start = sd_start_12a, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, #if IS_ENABLED(CONFIG_INPUT) .other_input = 1, #endif }; static const struct sd_desc sd_desc_72a = { .name = MODULE_NAME, .init_controls = sd_init_controls_72a, .config = sd_config, .init = sd_init_72a, .start = sd_start_72a, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, .dq_callback = do_autogain, #if IS_ENABLED(CONFIG_INPUT) .other_input = 1, #endif }; static const struct sd_desc *sd_desc[2] = { &sd_desc_12a, &sd_desc_72a }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x041e, 0x401a), .driver_info = Rev072A}, {USB_DEVICE(0x041e, 0x403b), .driver_info = Rev012A}, {USB_DEVICE(0x0458, 0x7004), .driver_info = Rev072A}, {USB_DEVICE(0x0461, 0x0815), .driver_info = Rev072A}, {USB_DEVICE(0x046d, 0x0928), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x0929), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092a), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092b), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092c), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092d), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092e), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092f), .driver_info = Rev012A}, {USB_DEVICE(0x04fc, 0x0561), .driver_info = Rev072A}, {USB_DEVICE(0x060b, 0xa001), .driver_info = Rev072A}, {USB_DEVICE(0x10fd, 0x7e50), .driver_info = Rev072A}, {USB_DEVICE(0xabcd, 0xcdee), .driver_info = Rev072A}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, sd_desc[id->driver_info], sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
8 1 6 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 // SPDX-License-Identifier: GPL-2.0-only /* * xt_u32 - kernel module to match u32 packet content * * Original author: Don Cohen <don@isis.cs3-inc.com> * (C) CC Computer Consultants GmbH, 2007 */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/types.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_u32.h> static bool u32_match_it(const struct xt_u32 *data, const struct sk_buff *skb) { const struct xt_u32_test *ct; unsigned int testind; unsigned int nnums; unsigned int nvals; unsigned int i; __be32 n; u_int32_t pos; u_int32_t val; u_int32_t at; /* * Small example: "0 >> 28 == 4 && 8 & 0xFF0000 >> 16 = 6, 17" * (=IPv4 and (TCP or UDP)). Outer loop runs over the "&&" operands. */ for (testind = 0; testind < data->ntests; ++testind) { ct = &data->tests[testind]; at = 0; pos = ct->location[0].number; if (skb->len < 4 || pos > skb->len - 4) return false; if (skb_copy_bits(skb, pos, &n, sizeof(n)) < 0) BUG(); val = ntohl(n); nnums = ct->nnums; /* Inner loop runs over "&", "<<", ">>" and "@" operands */ for (i = 1; i < nnums; ++i) { u_int32_t number = ct->location[i].number; switch (ct->location[i].nextop) { case XT_U32_AND: val &= number; break; case XT_U32_LEFTSH: val <<= number; break; case XT_U32_RIGHTSH: val >>= number; break; case XT_U32_AT: if (at + val < at) return false; at += val; pos = number; if (at + 4 < at || skb->len < at + 4 || pos > skb->len - at - 4) return false; if (skb_copy_bits(skb, at + pos, &n, sizeof(n)) < 0) BUG(); val = ntohl(n); break; } } /* Run over the "," and ":" operands */ nvals = ct->nvalues; for (i = 0; i < nvals; ++i) if (ct->value[i].min <= val && val <= ct->value[i].max) break; if (i >= ct->nvalues) return false; } return true; } static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_u32 *data = par->matchinfo; bool ret; ret = u32_match_it(data, skb); return ret ^ data->invert; } static int u32_mt_checkentry(const struct xt_mtchk_param *par) { const struct xt_u32 *data = par->matchinfo; const struct xt_u32_test *ct; unsigned int i; if (data->ntests > ARRAY_SIZE(data->tests)) return -EINVAL; for (i = 0; i < data->ntests; ++i) { ct = &data->tests[i]; if (ct->nnums > ARRAY_SIZE(ct->location) || ct->nvalues > ARRAY_SIZE(ct->value)) return -EINVAL; } return 0; } static struct xt_match xt_u32_mt_reg __read_mostly = { .name = "u32", .revision = 0, .family = NFPROTO_UNSPEC, .match = u32_mt, .checkentry = u32_mt_checkentry, .matchsize = sizeof(struct xt_u32), .me = THIS_MODULE, }; static int __init u32_mt_init(void) { return xt_register_match(&xt_u32_mt_reg); } static void __exit u32_mt_exit(void) { xt_unregister_match(&xt_u32_mt_reg); } module_init(u32_mt_init); module_exit(u32_mt_exit); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: arbitrary byte matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_u32"); MODULE_ALIAS("ip6t_u32");
6 1 1 4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/io_uring.h> #include <linux/eventpoll.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "epoll.h" #if defined(CONFIG_EPOLL) struct io_epoll { struct file *file; int epfd; int op; int fd; struct epoll_event event; }; int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll); if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; epoll->epfd = READ_ONCE(sqe->fd); epoll->op = READ_ONCE(sqe->len); epoll->fd = READ_ONCE(sqe->off); if (ep_op_has_event(epoll->op)) { struct epoll_event __user *ev; ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); if (copy_from_user(&epoll->event, ev, sizeof(*ev))) return -EFAULT; } return 0; } int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags) { struct io_epoll *ie = io_kiocb_to_cmd(req, struct io_epoll); int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); if (force_nonblock && ret == -EAGAIN) return -EAGAIN; if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } #endif
6 2 2 2 15 3 1 2 5 2 2 3 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 // SPDX-License-Identifier: GPL-2.0-only /* * * Generic part shared by ipv4 and ipv6 backends. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_fib.h> #define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \ NFTA_FIB_F_PRESENT) const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { [NFTA_FIB_DREG] = { .type = NLA_U32 }, [NFTA_FIB_RESULT] = { .type = NLA_U32 }, [NFTA_FIB_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFTA_FIB_F_ALL), }; EXPORT_SYMBOL(nft_fib_policy); int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_fib *priv = nft_expr_priv(expr); unsigned int hooks; switch (priv->result) { case NFT_FIB_RESULT_OIF: case NFT_FIB_RESULT_OIFNAME: hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD); break; case NFT_FIB_RESULT_ADDRTYPE: if (priv->flags & NFTA_FIB_F_IIF) hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD); else if (priv->flags & NFTA_FIB_F_OIF) hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD); else hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING); break; default: return -EINVAL; } return nft_chain_validate_hooks(ctx->chain, hooks); } EXPORT_SYMBOL_GPL(nft_fib_validate); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_fib *priv = nft_expr_priv(expr); unsigned int len; int err; if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS]) return -EINVAL; priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS])); if (priv->flags == 0) return -EINVAL; if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) return -EINVAL; if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) == (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) return -EINVAL; if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0) return -EINVAL; priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (priv->result) { case NFT_FIB_RESULT_OIF: if (priv->flags & NFTA_FIB_F_OIF) return -EINVAL; len = sizeof(int); break; case NFT_FIB_RESULT_OIFNAME: if (priv->flags & NFTA_FIB_F_OIF) return -EINVAL; len = IFNAMSIZ; break; case NFT_FIB_RESULT_ADDRTYPE: len = sizeof(u32); break; default: return -EINVAL; } err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); if (err < 0) return err; return 0; } EXPORT_SYMBOL_GPL(nft_fib_init); int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_fib *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg)) return -1; if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result))) return -1; if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags))) return -1; return 0; } EXPORT_SYMBOL_GPL(nft_fib_dump); void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct net_device *dev) { u32 *dreg = reg; int index; switch (priv->result) { case NFT_FIB_RESULT_OIF: index = dev ? dev->ifindex : 0; if (priv->flags & NFTA_FIB_F_PRESENT) nft_reg_store8(dreg, !!index); else *dreg = index; break; case NFT_FIB_RESULT_OIFNAME: if (priv->flags & NFTA_FIB_F_PRESENT) nft_reg_store8(dreg, !!dev); else strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ); break; default: WARN_ON_ONCE(1); *dreg = 0; break; } } EXPORT_SYMBOL_GPL(nft_fib_store_result); bool nft_fib_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_fib *priv = nft_expr_priv(expr); unsigned int len = NFT_REG32_SIZE; const struct nft_fib *fib; switch (priv->result) { case NFT_FIB_RESULT_OIF: break; case NFT_FIB_RESULT_OIFNAME: if (priv->flags & NFTA_FIB_F_PRESENT) len = NFT_REG32_SIZE; else len = IFNAMSIZ; break; case NFT_FIB_RESULT_ADDRTYPE: break; default: WARN_ON_ONCE(1); break; } if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, len); return false; } fib = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->result != fib->result || priv->flags != fib->flags) { nft_reg_track_update(track, expr, priv->dreg, len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return false; } EXPORT_SYMBOL_GPL(nft_fib_reduce); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Query routing table from nftables"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 /* * Copyright (c) 2007-2011 Atheros Communications Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "core.h" #include "debug.h" #include "hif-ops.h" #define HTC_PACKET_CONTAINER_ALLOCATION 32 #define HTC_CONTROL_BUFFER_SIZE (HTC_MAX_CTRL_MSG_LEN + HTC_HDR_LENGTH) static int ath6kl_htc_pipe_tx(struct htc_target *handle, struct htc_packet *packet); static void ath6kl_htc_pipe_cleanup(struct htc_target *handle); /* htc pipe tx path */ static inline void restore_tx_packet(struct htc_packet *packet) { if (packet->info.tx.flags & HTC_FLAGS_TX_FIXUP_NETBUF) { skb_pull(packet->skb, sizeof(struct htc_frame_hdr)); packet->info.tx.flags &= ~HTC_FLAGS_TX_FIXUP_NETBUF; } } static void do_send_completion(struct htc_endpoint *ep, struct list_head *queue_to_indicate) { struct htc_packet *packet; if (list_empty(queue_to_indicate)) { /* nothing to indicate */ return; } if (ep->ep_cb.tx_comp_multi != NULL) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: calling ep %d, send complete multiple callback (%d pkts)\n", __func__, ep->eid, get_queue_depth(queue_to_indicate)); /* * a multiple send complete handler is being used, * pass the queue to the handler */ ep->ep_cb.tx_comp_multi(ep->target, queue_to_indicate); /* * all packets are now owned by the callback, * reset queue to be safe */ INIT_LIST_HEAD(queue_to_indicate); } else { /* using legacy EpTxComplete */ do { packet = list_first_entry(queue_to_indicate, struct htc_packet, list); list_del(&packet->list); ath6kl_dbg(ATH6KL_DBG_HTC, "%s: calling ep %d send complete callback on packet 0x%p\n", __func__, ep->eid, packet); ep->ep_cb.tx_complete(ep->target, packet); } while (!list_empty(queue_to_indicate)); } } static void send_packet_completion(struct htc_target *target, struct htc_packet *packet) { struct htc_endpoint *ep = &target->endpoint[packet->endpoint]; struct list_head container; restore_tx_packet(packet); INIT_LIST_HEAD(&container); list_add_tail(&packet->list, &container); /* do completion */ do_send_completion(ep, &container); } static void get_htc_packet_credit_based(struct htc_target *target, struct htc_endpoint *ep, struct list_head *queue) { int credits_required; int remainder; u8 send_flags; struct htc_packet *packet; unsigned int transfer_len; /* NOTE : the TX lock is held when this function is called */ /* loop until we can grab as many packets out of the queue as we can */ while (true) { send_flags = 0; if (list_empty(&ep->txq)) break; /* get packet at head, but don't remove it */ packet = list_first_entry(&ep->txq, struct htc_packet, list); ath6kl_dbg(ATH6KL_DBG_HTC, "%s: got head packet:0x%p , queue depth: %d\n", __func__, packet, get_queue_depth(&ep->txq)); transfer_len = packet->act_len + HTC_HDR_LENGTH; if (transfer_len <= target->tgt_cred_sz) { credits_required = 1; } else { /* figure out how many credits this message requires */ credits_required = transfer_len / target->tgt_cred_sz; remainder = transfer_len % target->tgt_cred_sz; if (remainder) credits_required++; } ath6kl_dbg(ATH6KL_DBG_HTC, "%s: creds required:%d got:%d\n", __func__, credits_required, ep->cred_dist.credits); if (ep->eid == ENDPOINT_0) { /* * endpoint 0 is special, it always has a credit and * does not require credit based flow control */ credits_required = 0; } else { if (ep->cred_dist.credits < credits_required) break; ep->cred_dist.credits -= credits_required; ep->ep_st.cred_cosumd += credits_required; /* check if we need credits back from the target */ if (ep->cred_dist.credits < ep->cred_dist.cred_per_msg) { /* tell the target we need credits ASAP! */ send_flags |= HTC_FLAGS_NEED_CREDIT_UPDATE; ep->ep_st.cred_low_indicate += 1; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: host needs credits\n", __func__); } } /* now we can fully dequeue */ packet = list_first_entry(&ep->txq, struct htc_packet, list); list_del(&packet->list); /* save the number of credits this packet consumed */ packet->info.tx.cred_used = credits_required; /* save send flags */ packet->info.tx.flags = send_flags; packet->info.tx.seqno = ep->seqno; ep->seqno++; /* queue this packet into the caller's queue */ list_add_tail(&packet->list, queue); } } static void get_htc_packet(struct htc_target *target, struct htc_endpoint *ep, struct list_head *queue, int resources) { struct htc_packet *packet; /* NOTE : the TX lock is held when this function is called */ /* loop until we can grab as many packets out of the queue as we can */ while (resources) { if (list_empty(&ep->txq)) break; packet = list_first_entry(&ep->txq, struct htc_packet, list); list_del(&packet->list); ath6kl_dbg(ATH6KL_DBG_HTC, "%s: got packet:0x%p , new queue depth: %d\n", __func__, packet, get_queue_depth(&ep->txq)); packet->info.tx.seqno = ep->seqno; packet->info.tx.flags = 0; packet->info.tx.cred_used = 0; ep->seqno++; /* queue this packet into the caller's queue */ list_add_tail(&packet->list, queue); resources--; } } static int htc_issue_packets(struct htc_target *target, struct htc_endpoint *ep, struct list_head *pkt_queue) { int status = 0; u16 payload_len; struct sk_buff *skb; struct htc_frame_hdr *htc_hdr; struct htc_packet *packet; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: queue: 0x%p, pkts %d\n", __func__, pkt_queue, get_queue_depth(pkt_queue)); while (!list_empty(pkt_queue)) { packet = list_first_entry(pkt_queue, struct htc_packet, list); list_del(&packet->list); skb = packet->skb; if (!skb) { WARN_ON_ONCE(1); status = -EINVAL; break; } payload_len = packet->act_len; /* setup HTC frame header */ htc_hdr = skb_push(skb, sizeof(*htc_hdr)); if (!htc_hdr) { WARN_ON_ONCE(1); status = -EINVAL; break; } packet->info.tx.flags |= HTC_FLAGS_TX_FIXUP_NETBUF; put_unaligned_le16(payload_len, &htc_hdr->payld_len); htc_hdr->flags = packet->info.tx.flags; htc_hdr->eid = (u8) packet->endpoint; htc_hdr->ctrl[0] = 0; htc_hdr->ctrl[1] = (u8) packet->info.tx.seqno; spin_lock_bh(&target->tx_lock); /* store in look up queue to match completions */ list_add_tail(&packet->list, &ep->pipe.tx_lookup_queue); ep->ep_st.tx_issued += 1; spin_unlock_bh(&target->tx_lock); status = ath6kl_hif_pipe_send(target->dev->ar, ep->pipe.pipeid_ul, NULL, skb); if (status != 0) { if (status != -ENOMEM) { /* TODO: if more than 1 endpoint maps to the * same PipeID, it is possible to run out of * resources in the HIF layer. * Don't emit the error */ ath6kl_dbg(ATH6KL_DBG_HTC, "%s: failed status:%d\n", __func__, status); } spin_lock_bh(&target->tx_lock); list_del(&packet->list); /* reclaim credits */ ep->cred_dist.credits += packet->info.tx.cred_used; spin_unlock_bh(&target->tx_lock); /* put it back into the callers queue */ list_add(&packet->list, pkt_queue); break; } } if (status != 0) { while (!list_empty(pkt_queue)) { if (status != -ENOMEM) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: failed pkt:0x%p status:%d\n", __func__, packet, status); } packet = list_first_entry(pkt_queue, struct htc_packet, list); list_del(&packet->list); packet->status = status; send_packet_completion(target, packet); } } return status; } static enum htc_send_queue_result htc_try_send(struct htc_target *target, struct htc_endpoint *ep, struct list_head *txq) { struct list_head send_queue; /* temp queue to hold packets */ struct htc_packet *packet, *tmp_pkt; struct ath6kl *ar = target->dev->ar; enum htc_send_full_action action; int tx_resources, overflow, txqueue_depth, i, good_pkts; u8 pipeid; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: (queue:0x%p depth:%d)\n", __func__, txq, (txq == NULL) ? 0 : get_queue_depth(txq)); /* init the local send queue */ INIT_LIST_HEAD(&send_queue); /* * txq equals to NULL means * caller didn't provide a queue, just wants us to * check queues and send */ if (txq != NULL) { if (list_empty(txq)) { /* empty queue */ return HTC_SEND_QUEUE_DROP; } spin_lock_bh(&target->tx_lock); txqueue_depth = get_queue_depth(&ep->txq); spin_unlock_bh(&target->tx_lock); if (txqueue_depth >= ep->max_txq_depth) { /* we've already overflowed */ overflow = get_queue_depth(txq); } else { /* get how much we will overflow by */ overflow = txqueue_depth; overflow += get_queue_depth(txq); /* get how much we will overflow the TX queue by */ overflow -= ep->max_txq_depth; } /* if overflow is negative or zero, we are okay */ if (overflow > 0) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: Endpoint %d, TX queue will overflow :%d, Tx Depth:%d, Max:%d\n", __func__, ep->eid, overflow, txqueue_depth, ep->max_txq_depth); } if ((overflow <= 0) || (ep->ep_cb.tx_full == NULL)) { /* * all packets will fit or caller did not provide send * full indication handler -- just move all of them * to the local send_queue object */ list_splice_tail_init(txq, &send_queue); } else { good_pkts = get_queue_depth(txq) - overflow; if (good_pkts < 0) { WARN_ON_ONCE(1); return HTC_SEND_QUEUE_DROP; } /* we have overflowed, and a callback is provided */ /* dequeue all non-overflow packets to the sendqueue */ for (i = 0; i < good_pkts; i++) { /* pop off caller's queue */ packet = list_first_entry(txq, struct htc_packet, list); /* move to local queue */ list_move_tail(&packet->list, &send_queue); } /* * the caller's queue has all the packets that won't fit * walk through the caller's queue and indicate each to * the send full handler */ list_for_each_entry_safe(packet, tmp_pkt, txq, list) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: Indicate overflowed TX pkts: %p\n", __func__, packet); action = ep->ep_cb.tx_full(ep->target, packet); if (action == HTC_SEND_FULL_DROP) { /* callback wants the packet dropped */ ep->ep_st.tx_dropped += 1; /* leave this one in the caller's queue * for cleanup */ } else { /* callback wants to keep this packet, * move from caller's queue to the send * queue */ list_move_tail(&packet->list, &send_queue); } } if (list_empty(&send_queue)) { /* no packets made it in, caller will cleanup */ return HTC_SEND_QUEUE_DROP; } } } if (!ep->pipe.tx_credit_flow_enabled) { tx_resources = ath6kl_hif_pipe_get_free_queue_number(ar, ep->pipe.pipeid_ul); } else { tx_resources = 0; } spin_lock_bh(&target->tx_lock); if (!list_empty(&send_queue)) { /* transfer packets to tail */ list_splice_tail_init(&send_queue, &ep->txq); if (!list_empty(&send_queue)) { WARN_ON_ONCE(1); spin_unlock_bh(&target->tx_lock); return HTC_SEND_QUEUE_DROP; } INIT_LIST_HEAD(&send_queue); } /* increment tx processing count on entry */ ep->tx_proc_cnt++; if (ep->tx_proc_cnt > 1) { /* * Another thread or task is draining the TX queues on this * endpoint that thread will reset the tx processing count * when the queue is drained. */ ep->tx_proc_cnt--; spin_unlock_bh(&target->tx_lock); return HTC_SEND_QUEUE_OK; } /***** beyond this point only 1 thread may enter ******/ /* * Now drain the endpoint TX queue for transmission as long as we have * enough transmit resources. */ while (true) { if (get_queue_depth(&ep->txq) == 0) break; if (ep->pipe.tx_credit_flow_enabled) { /* * Credit based mechanism provides flow control * based on target transmit resource availability, * we assume that the HIF layer will always have * bus resources greater than target transmit * resources. */ get_htc_packet_credit_based(target, ep, &send_queue); } else { /* * Get all packets for this endpoint that we can * for this pass. */ get_htc_packet(target, ep, &send_queue, tx_resources); } if (get_queue_depth(&send_queue) == 0) { /* * Didn't get packets due to out of resources or TX * queue was drained. */ break; } spin_unlock_bh(&target->tx_lock); /* send what we can */ htc_issue_packets(target, ep, &send_queue); if (!ep->pipe.tx_credit_flow_enabled) { pipeid = ep->pipe.pipeid_ul; tx_resources = ath6kl_hif_pipe_get_free_queue_number(ar, pipeid); } spin_lock_bh(&target->tx_lock); } /* done with this endpoint, we can clear the count */ ep->tx_proc_cnt = 0; spin_unlock_bh(&target->tx_lock); return HTC_SEND_QUEUE_OK; } /* htc control packet manipulation */ static void destroy_htc_txctrl_packet(struct htc_packet *packet) { struct sk_buff *skb; skb = packet->skb; dev_kfree_skb(skb); kfree(packet); } static struct htc_packet *build_htc_txctrl_packet(void) { struct htc_packet *packet = NULL; struct sk_buff *skb; packet = kzalloc(sizeof(struct htc_packet), GFP_KERNEL); if (packet == NULL) return NULL; skb = __dev_alloc_skb(HTC_CONTROL_BUFFER_SIZE, GFP_KERNEL); if (skb == NULL) { kfree(packet); return NULL; } packet->skb = skb; return packet; } static void htc_free_txctrl_packet(struct htc_target *target, struct htc_packet *packet) { destroy_htc_txctrl_packet(packet); } static struct htc_packet *htc_alloc_txctrl_packet(struct htc_target *target) { return build_htc_txctrl_packet(); } static void htc_txctrl_complete(struct htc_target *target, struct htc_packet *packet) { htc_free_txctrl_packet(target, packet); } #define MAX_MESSAGE_SIZE 1536 static int htc_setup_target_buffer_assignments(struct htc_target *target) { int status, credits, credit_per_maxmsg, i; struct htc_pipe_txcredit_alloc *entry; unsigned int hif_usbaudioclass = 0; credit_per_maxmsg = MAX_MESSAGE_SIZE / target->tgt_cred_sz; if (MAX_MESSAGE_SIZE % target->tgt_cred_sz) credit_per_maxmsg++; /* TODO, this should be configured by the caller! */ credits = target->tgt_creds; entry = &target->pipe.txcredit_alloc[0]; status = -ENOMEM; /* FIXME: hif_usbaudioclass is always zero */ if (hif_usbaudioclass) { ath6kl_dbg(ATH6KL_DBG_HTC, "%s: For USB Audio Class- Total:%d\n", __func__, credits); entry++; entry++; /* Setup VO Service To have Max Credits */ entry->service_id = WMI_DATA_VO_SVC; entry->credit_alloc = (credits - 6); if (entry->credit_alloc == 0) entry->credit_alloc++; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_CONTROL_SVC; entry->credit_alloc = credit_per_maxmsg; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; /* leftovers go to best effort */ entry++; entry++; entry->service_id = WMI_DATA_BE_SVC; entry->credit_alloc = (u8) credits; status = 0; } else { entry++; entry->service_id = WMI_DATA_VI_SVC; entry->credit_alloc = credits / 4; if (entry->credit_alloc == 0) entry->credit_alloc++; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_DATA_VO_SVC; entry->credit_alloc = credits / 4; if (entry->credit_alloc == 0) entry->credit_alloc++; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_CONTROL_SVC; entry->credit_alloc = credit_per_maxmsg; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; entry++; entry->service_id = WMI_DATA_BK_SVC; entry->credit_alloc = credit_per_maxmsg; credits -= (int) entry->credit_alloc; if (credits <= 0) return status; /* leftovers go to best effort */ entry++; entry->service_id = WMI_DATA_BE_SVC; entry->credit_alloc = (u8) credits; status = 0; } if (status == 0) { for (i = 0; i < ENDPOINT_MAX; i++) { if (target->pipe.txcredit_alloc[i].service_id != 0) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Service Index : %d TX : 0x%2.2X : alloc:%d\n", i, target->pipe.txcredit_alloc[i]. service_id, target->pipe.txcredit_alloc[i]. credit_alloc); } } } return status; } /* process credit reports and call distribution function */ static void htc_process_credit_report(struct htc_target *target, struct htc_credit_report *rpt, int num_entries, enum htc_endpoint_id from_ep) { int total_credits = 0, i; struct htc_endpoint *ep; /* lock out TX while we update credits */ spin_lock_bh(&target->tx_lock); for (i = 0; i < num_entries; i++, rpt++) { if (rpt->eid >= ENDPOINT_MAX) { WARN_ON_ONCE(1); spin_unlock_bh(&target->tx_lock); return; } ep = &target->endpoint[rpt->eid]; ep->cred_dist.credits += rpt->credits; if (ep->cred_dist.credits && get_queue_depth(&ep->txq)) { spin_unlock_bh(&target->tx_lock); htc_try_send(target, ep, NULL); spin_lock_bh(&target->tx_lock); } total_credits += rpt->credits; } ath6kl_dbg(ATH6KL_DBG_HTC, "Report indicated %d credits to distribute\n", total_credits); spin_unlock_bh(&target->tx_lock); } /* flush endpoint TX queue */ static void htc_flush_tx_endpoint(struct htc_target *target, struct htc_endpoint *ep, u16 tag) { struct htc_packet *packet; spin_lock_bh(&target->tx_lock); while (get_queue_depth(&ep->txq)) { packet = list_first_entry(&ep->txq, struct htc_packet, list); list_del(&packet->list); packet->status = 0; send_packet_completion(target, packet); } spin_unlock_bh(&target->tx_lock); } /* * In the adapted HIF layer, struct sk_buff * are passed between HIF and HTC, * since upper layers expects struct htc_packet containers we use the completed * skb and lookup it's corresponding HTC packet buffer from a lookup list. * This is extra overhead that can be fixed by re-aligning HIF interfaces with * HTC. */ static struct htc_packet *htc_lookup_tx_packet(struct htc_target *target, struct htc_endpoint *ep, struct sk_buff *skb) { struct htc_packet *packet, *tmp_pkt, *found_packet = NULL; spin_lock_bh(&target->tx_lock); /* * interate from the front of tx lookup queue * this lookup should be fast since lower layers completes in-order and * so the completed packet should be at the head of the list generally */ list_for_each_entry_safe(packet, tmp_pkt, &ep->pipe.tx_lookup_queue, list) { /* check for removal */ if (skb == packet->skb) { /* found it */ list_del(&packet->list); found_packet = packet; break; } } spin_unlock_bh(&target->tx_lock); return found_packet; } static int ath6kl_htc_pipe_tx_complete(struct ath6kl *ar, struct sk_buff *skb) { struct htc_target *target = ar->htc_target; struct htc_frame_hdr *htc_hdr; struct htc_endpoint *ep; struct htc_packet *packet; u8 ep_id, *netdata; netdata = skb->data; htc_hdr = (struct htc_frame_hdr *) netdata; ep_id = htc_hdr->eid; ep = &target->endpoint[ep_id]; packet = htc_lookup_tx_packet(target, ep, skb); if (packet == NULL) { /* may have already been flushed and freed */ ath6kl_err("HTC TX lookup failed!\n"); } else { /* will be giving this buffer back to upper layers */ packet->status = 0; send_packet_completion(target, packet); } skb = NULL; if (!ep->pipe.tx_credit_flow_enabled) { /* * note: when using TX credit flow, the re-checking of queues * happens when credits flow back from the target. in the * non-TX credit case, we recheck after the packet completes */ htc_try_send(target, ep, NULL); } return 0; } static int htc_send_packets_multiple(struct htc_target *target, struct list_head *pkt_queue) { struct htc_endpoint *ep; struct htc_packet *packet, *tmp_pkt; if (list_empty(pkt_queue)) return -EINVAL; /* get first packet to find out which ep the packets will go into */ packet = list_first_entry(pkt_queue, struct htc_packet, list); if (packet->endpoint >= ENDPOINT_MAX) { WARN_ON_ONCE(1); return -EINVAL; } ep = &target->endpoint[packet->endpoint]; htc_try_send(target, ep, pkt_queue); /* do completion on any packets that couldn't get in */ if (!list_empty(pkt_queue)) { list_for_each_entry_safe(packet, tmp_pkt, pkt_queue, list) { packet->status = -ENOMEM; } do_send_completion(ep, pkt_queue); } return 0; } /* htc pipe rx path */ static struct htc_packet *alloc_htc_packet_container(struct htc_target *target) { struct htc_packet *packet; spin_lock_bh(&target->rx_lock); if (target->pipe.htc_packet_pool == NULL) { spin_unlock_bh(&target->rx_lock); return NULL; } packet = target->pipe.htc_packet_pool; target->pipe.htc_packet_pool = (struct htc_packet *) packet->list.next; spin_unlock_bh(&target->rx_lock); packet->list.next = NULL; return packet; } static void free_htc_packet_container(struct htc_target *target, struct htc_packet *packet) { struct list_head *lh; spin_lock_bh(&target->rx_lock); if (target->pipe.htc_packet_pool == NULL) { target->pipe.htc_packet_pool = packet; packet->list.next = NULL; } else { lh = (struct list_head *) target->pipe.htc_packet_pool; packet->list.next = lh; target->pipe.htc_packet_pool = packet; } spin_unlock_bh(&target->rx_lock); } static int htc_process_trailer(struct htc_target *target, u8 *buffer, int len, enum htc_endpoint_id from_ep) { struct htc_credit_report *report; struct htc_record_hdr *record; u8 *record_buf; int status = 0; while (len > 0) { if (len < sizeof(struct htc_record_hdr)) { status = -EINVAL; break; } /* these are byte aligned structs */ record = (struct htc_record_hdr *) buffer; len -= sizeof(struct htc_record_hdr); buffer += sizeof(struct htc_record_hdr); if (record->len > len) { /* no room left in buffer for record */ ath6kl_dbg(ATH6KL_DBG_HTC, "invalid length: %d (id:%d) buffer has: %d bytes left\n", record->len, record->rec_id, len); status = -EINVAL; break; } /* start of record follows the header */ record_buf = buffer; switch (record->rec_id) { case HTC_RECORD_CREDITS: if (record->len < sizeof(struct htc_credit_report)) { WARN_ON_ONCE(1); return -EINVAL; } report = (struct htc_credit_report *) record_buf; htc_process_credit_report(target, report, record->len / sizeof(*report), from_ep); break; default: ath6kl_dbg(ATH6KL_DBG_HTC, "unhandled record: id:%d length:%d\n", record->rec_id, record->len); break; } /* advance buffer past this record for next time around */ buffer += record->len; len -= record->len; } return status; } static void do_recv_completion(struct htc_endpoint *ep, struct list_head *queue_to_indicate) { struct htc_packet *packet; if (list_empty(queue_to_indicate)) { /* nothing to indicate */ return; } /* using legacy EpRecv */ while (!list_empty(queue_to_indicate)) { packet = list_first_entry(queue_to_indicate, struct htc_packet, list); list_del(&packet->list); ep->ep_cb.rx(ep->target, packet); } return; } static void recv_packet_completion(struct htc_target *target, struct htc_endpoint *ep, struct htc_packet *packet) { struct list_head container; INIT_LIST_HEAD(&container); list_add_tail(&packet->list, &container); /* do completion */ do_recv_completion(ep, &container); } static int ath6kl_htc_pipe_rx_complete(struct ath6kl *ar, struct sk_buff *skb, u8 pipeid) { struct htc_target *target = ar->htc_target; u8 *netdata, *trailer, hdr_info; struct htc_frame_hdr *htc_hdr; u32 netlen, trailerlen = 0; struct htc_packet *packet; struct htc_endpoint *ep; u16 payload_len; int status = 0; /* * ar->htc_target can be NULL due to a race condition that can occur * during driver initialization(we do 'ath6kl_hif_power_on' before * initializing 'ar->htc_target' via 'ath6kl_htc_create'). * 'ath6kl_hif_power_on' assigns 'ath6kl_recv_complete' as * usb_complete_t/callback function for 'usb_fill_bulk_urb'. * Thus the possibility of ar->htc_target being NULL * via ath6kl_recv_complete -> ath6kl_usb_io_comp_work. */ if (!target) { ath6kl_dbg(ATH6KL_DBG_HTC, "Target not yet initialized\n"); status = -EINVAL; goto free_skb; } netdata = skb->data; netlen = skb->len; htc_hdr = (struct htc_frame_hdr *) netdata; if (htc_hdr->eid >= ENDPOINT_MAX) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Rx: invalid EndpointID=%d\n", htc_hdr->eid); status = -EINVAL; goto free_skb; } ep = &target->endpoint[htc_hdr->eid]; payload_len = le16_to_cpu(get_unaligned(&htc_hdr->payld_len)); if (netlen < (payload_len + HTC_HDR_LENGTH)) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Rx: insufficient length, got:%d expected =%zu\n", netlen, payload_len + HTC_HDR_LENGTH); status = -EINVAL; goto free_skb; } /* get flags to check for trailer */ hdr_info = htc_hdr->flags; if (hdr_info & HTC_FLG_RX_TRAILER) { /* extract the trailer length */ hdr_info = htc_hdr->ctrl[0]; if ((hdr_info < sizeof(struct htc_record_hdr)) || (hdr_info > payload_len)) { ath6kl_dbg(ATH6KL_DBG_HTC, "invalid header: payloadlen should be %d, CB[0]: %d\n", payload_len, hdr_info); status = -EINVAL; goto free_skb; } trailerlen = hdr_info; /* process trailer after hdr/apps payload */ trailer = (u8 *) htc_hdr + HTC_HDR_LENGTH + payload_len - hdr_info; status = htc_process_trailer(target, trailer, hdr_info, htc_hdr->eid); if (status != 0) goto free_skb; } if (((int) payload_len - (int) trailerlen) <= 0) { /* zero length packet with trailer, just drop these */ goto free_skb; } if (htc_hdr->eid == ENDPOINT_0) { /* handle HTC control message */ if (target->htc_flags & HTC_OP_STATE_SETUP_COMPLETE) { /* * fatal: target should not send unsolicited * messageson the endpoint 0 */ ath6kl_dbg(ATH6KL_DBG_HTC, "HTC ignores Rx Ctrl after setup complete\n"); status = -EINVAL; goto free_skb; } /* remove HTC header */ skb_pull(skb, HTC_HDR_LENGTH); netdata = skb->data; netlen = skb->len; spin_lock_bh(&target->rx_lock); target->pipe.ctrl_response_valid = true; target->pipe.ctrl_response_len = min_t(int, netlen, HTC_MAX_CTRL_MSG_LEN); memcpy(target->pipe.ctrl_response_buf, netdata, target->pipe.ctrl_response_len); spin_unlock_bh(&target->rx_lock); dev_kfree_skb(skb); skb = NULL; goto free_skb; } /* * TODO: the message based HIF architecture allocates net bufs * for recv packets since it bridges that HIF to upper layers, * which expects HTC packets, we form the packets here */ packet = alloc_htc_packet_container(target); if (packet == NULL) { status = -ENOMEM; goto free_skb; } packet->status = 0; packet->endpoint = htc_hdr->eid; packet->pkt_cntxt = skb; /* TODO: for backwards compatibility */ packet->buf = skb_push(skb, 0) + HTC_HDR_LENGTH; packet->act_len = netlen - HTC_HDR_LENGTH - trailerlen; /* * TODO: this is a hack because the driver layer will set the * actual len of the skb again which will just double the len */ skb_trim(skb, 0); recv_packet_completion(target, ep, packet); /* recover the packet container */ free_htc_packet_container(target, packet); skb = NULL; free_skb: dev_kfree_skb(skb); return status; } static void htc_flush_rx_queue(struct htc_target *target, struct htc_endpoint *ep) { struct list_head container; struct htc_packet *packet; spin_lock_bh(&target->rx_lock); while (1) { if (list_empty(&ep->rx_bufq)) break; packet = list_first_entry(&ep->rx_bufq, struct htc_packet, list); list_del(&packet->list); spin_unlock_bh(&target->rx_lock); packet->status = -ECANCELED; packet->act_len = 0; ath6kl_dbg(ATH6KL_DBG_HTC, "Flushing RX packet:0x%p, length:%d, ep:%d\n", packet, packet->buf_len, packet->endpoint); INIT_LIST_HEAD(&container); list_add_tail(&packet->list, &container); /* give the packet back */ do_recv_completion(ep, &container); spin_lock_bh(&target->rx_lock); } spin_unlock_bh(&target->rx_lock); } /* polling routine to wait for a control packet to be received */ static int htc_wait_recv_ctrl_message(struct htc_target *target) { int count = HTC_TARGET_RESPONSE_POLL_COUNT; while (count > 0) { spin_lock_bh(&target->rx_lock); if (target->pipe.ctrl_response_valid) { target->pipe.ctrl_response_valid = false; spin_unlock_bh(&target->rx_lock); break; } spin_unlock_bh(&target->rx_lock); count--; msleep_interruptible(HTC_TARGET_RESPONSE_POLL_WAIT); } if (count <= 0) { ath6kl_warn("htc pipe control receive timeout!\n"); return -ETIMEDOUT; } return 0; } static void htc_rxctrl_complete(struct htc_target *context, struct htc_packet *packet) { struct sk_buff *skb = packet->skb; if (packet->endpoint == ENDPOINT_0 && packet->status == -ECANCELED && skb != NULL) dev_kfree_skb(skb); } /* htc pipe initialization */ static void reset_endpoint_states(struct htc_target *target) { struct htc_endpoint *ep; int i; for (i = ENDPOINT_0; i < ENDPOINT_MAX; i++) { ep = &target->endpoint[i]; ep->svc_id = 0; ep->len_max = 0; ep->max_txq_depth = 0; ep->eid = i; INIT_LIST_HEAD(&ep->txq); INIT_LIST_HEAD(&ep->pipe.tx_lookup_queue); INIT_LIST_HEAD(&ep->rx_bufq); ep->target = target; ep->pipe.tx_credit_flow_enabled = true; } } /* start HTC, this is called after all services are connected */ static int htc_config_target_hif_pipe(struct htc_target *target) { return 0; } /* htc service functions */ static u8 htc_get_credit_alloc(struct htc_target *target, u16 service_id) { u8 allocation = 0; int i; for (i = 0; i < ENDPOINT_MAX; i++) { if (target->pipe.txcredit_alloc[i].service_id == service_id) allocation = target->pipe.txcredit_alloc[i].credit_alloc; } if (allocation == 0) { ath6kl_dbg(ATH6KL_DBG_HTC, "HTC Service TX : 0x%2.2X : allocation is zero!\n", service_id); } return allocation; } static int ath6kl_htc_pipe_conn_service(struct htc_target *target, struct htc_service_connect_req *conn_req, struct htc_service_connect_resp *conn_resp) { struct ath6kl *ar = target->dev->ar; struct htc_packet *packet = NULL; struct htc_conn_service_resp *resp_msg; struct htc_conn_service_msg *conn_msg; enum htc_endpoint_id assigned_epid = ENDPOINT_MAX; bool disable_credit_flowctrl = false; unsigned int max_msg_size = 0; struct htc_endpoint *ep; int length, status = 0; struct sk_buff *skb; u8 tx_alloc; u16 flags; if (conn_req->svc_id == 0) { WARN_ON_ONCE(1); status = -EINVAL; goto free_packet; } if (conn_req->svc_id == HTC_CTRL_RSVD_SVC) { /* special case for pseudo control service */ assigned_epid = ENDPOINT_0; max_msg_size = HTC_MAX_CTRL_MSG_LEN; tx_alloc = 0; } else { tx_alloc = htc_get_credit_alloc(target, conn_req->svc_id); if (tx_alloc == 0) { status = -ENOMEM; goto free_packet; } /* allocate a packet to send to the target */ packet = htc_alloc_txctrl_packet(target); if (packet == NULL) { WARN_ON_ONCE(1); status = -ENOMEM; goto free_packet; } skb = packet->skb; length = sizeof(struct htc_conn_service_msg); /* assemble connect service message */ conn_msg = skb_put(skb, length); if (conn_msg == NULL) { WARN_ON_ONCE(1); status = -EINVAL; goto free_packet; } memset(conn_msg, 0, sizeof(struct htc_conn_service_msg)); conn_msg->msg_id = cpu_to_le16(HTC_MSG_CONN_SVC_ID); conn_msg->svc_id = cpu_to_le16(conn_req->svc_id); conn_msg->conn_flags = cpu_to_le16(conn_req->conn_flags & ~HTC_CONN_FLGS_SET_RECV_ALLOC_MASK); /* tell target desired recv alloc for this ep */ flags = tx_alloc << HTC_CONN_FLGS_SET_RECV_ALLOC_SHIFT; conn_msg->conn_flags |= cpu_to_le16(flags); if (conn_req->conn_flags & HTC_CONN_FLGS_DISABLE_CRED_FLOW_CTRL) { disable_credit_flowctrl = true; } set_htc_pkt_info(packet, NULL, (u8 *) conn_msg, length, ENDPOINT_0, HTC_SERVICE_TX_PACKET_TAG); status = ath6kl_htc_pipe_tx(target, packet); /* we don't own it anymore */ packet = NULL; if (status != 0) goto free_packet; /* wait for response */ status = htc_wait_recv_ctrl_message(target); if (status != 0) goto free_packet; /* we controlled the buffer creation so it has to be * properly aligned */ resp_msg = (struct htc_conn_service_resp *) target->pipe.ctrl_response_buf; if (resp_msg->msg_id != cpu_to_le16(HTC_MSG_CONN_SVC_RESP_ID) || (target->pipe.ctrl_response_len < sizeof(*resp_msg))) { /* this message is not valid */ WARN_ON_ONCE(1); status = -EINVAL; goto free_packet; } ath6kl_dbg(ATH6KL_DBG_TRC, "%s: service 0x%X conn resp: status: %d ep: %d\n", __func__, resp_msg->svc_id, resp_msg->status, resp_msg->eid); conn_resp->resp_code = resp_msg->status; /* check response status */ if (resp_msg->status != HTC_SERVICE_SUCCESS) { ath6kl_dbg(ATH6KL_DBG_HTC, "Target failed service 0x%X connect request (status:%d)\n", resp_msg->svc_id, resp_msg->status); status = -EINVAL; goto free_packet; } assigned_epid = (enum htc_endpoint_id) resp_msg->eid; max_msg_size = le16_to_cpu(resp_msg->max_msg_sz); } /* the rest are parameter checks so set the error status */ status = -EINVAL; if (assigned_epid >= ENDPOINT_MAX) { WARN_ON_ONCE(1); goto free_packet; } if (max_msg_size == 0) { WARN_ON_ONCE(1); goto free_packet; } ep = &target->endpoint[assigned_epid]; ep->eid = assigned_epid; if (ep->svc_id != 0) { /* endpoint already in use! */ WARN_ON_ONCE(1); goto free_packet; } /* return assigned endpoint to caller */ conn_resp->endpoint = assigned_epid; conn_resp->len_max = max_msg_size; /* setup the endpoint */ ep->svc_id = conn_req->svc_id; /* this marks ep in use */ ep->max_txq_depth = conn_req->max_txq_depth; ep->len_max = max_msg_size; ep->cred_dist.credits = tx_alloc; ep->cred_dist.cred_sz = target->tgt_cred_sz; ep->cred_dist.cred_per_msg = max_msg_size / target->tgt_cred_sz; if (max_msg_size % target->tgt_cred_sz) ep->cred_dist.cred_per_msg++; /* copy all the callbacks */ ep->ep_cb = conn_req->ep_cb; /* initialize tx_drop_packet_threshold */ ep->tx_drop_packet_threshold = MAX_HI_COOKIE_NUM; status = ath6kl_hif_pipe_map_service(ar, ep->svc_id, &ep->pipe.pipeid_ul, &ep->pipe.pipeid_dl); if (status != 0) goto free_packet; ath6kl_dbg(ATH6KL_DBG_HTC, "SVC Ready: 0x%4.4X: ULpipe:%d DLpipe:%d id:%d\n", ep->svc_id, ep->pipe.pipeid_ul, ep->pipe.pipeid_dl, ep->eid); if (disable_credit_flowctrl && ep->pipe.tx_credit_flow_enabled) { ep->pipe.tx_credit_flow_enabled = false; ath6kl_dbg(ATH6KL_DBG_HTC, "SVC: 0x%4.4X ep:%d TX flow control off\n", ep->svc_id, assigned_epid); } free_packet: if (packet != NULL) htc_free_txctrl_packet(target, packet); return status; } /* htc export functions */ static void *ath6kl_htc_pipe_create(struct ath6kl *ar) { int status = 0; struct htc_endpoint *ep = NULL; struct htc_target *target = NULL; struct htc_packet *packet; int i; target = kzalloc(sizeof(struct htc_target), GFP_KERNEL); if (target == NULL) { ath6kl_err("htc create unable to allocate memory\n"); status = -ENOMEM; goto fail_htc_create; } spin_lock_init(&target->htc_lock); spin_lock_init(&target->rx_lock); spin_lock_init(&target->tx_lock); reset_endpoint_states(target); for (i = 0; i < HTC_PACKET_CONTAINER_ALLOCATION; i++) { packet = kzalloc(sizeof(struct htc_packet), GFP_KERNEL); if (packet != NULL) free_htc_packet_container(target, packet); } target->dev = kzalloc(sizeof(*target->dev), GFP_KERNEL); if (!target->dev) { ath6kl_err("unable to allocate memory\n"); status = -ENOMEM; goto fail_htc_create; } target->dev->ar = ar; target->dev->htc_cnxt = target; /* Get HIF default pipe for HTC message exchange */ ep = &target->endpoint[ENDPOINT_0]; ath6kl_hif_pipe_get_default(ar, &ep->pipe.pipeid_ul, &ep->pipe.pipeid_dl); return target; fail_htc_create: if (status != 0) { if (target != NULL) ath6kl_htc_pipe_cleanup(target); target = NULL; } return target; } /* cleanup the HTC instance */ static void ath6kl_htc_pipe_cleanup(struct htc_target *target) { struct htc_packet *packet; while (true) { packet = alloc_htc_packet_container(target); if (packet == NULL) break; kfree(packet); } kfree(target->dev); /* kfree our instance */ kfree(target); } static int ath6kl_htc_pipe_start(struct htc_target *target) { struct sk_buff *skb; struct htc_setup_comp_ext_msg *setup; struct htc_packet *packet; htc_config_target_hif_pipe(target); /* allocate a buffer to send */ packet = htc_alloc_txctrl_packet(target); if (packet == NULL) { WARN_ON_ONCE(1); return -ENOMEM; } skb = packet->skb; /* assemble setup complete message */ setup = skb_put(skb, sizeof(*setup)); memset(setup, 0, sizeof(struct htc_setup_comp_ext_msg)); setup->msg_id = cpu_to_le16(HTC_MSG_SETUP_COMPLETE_EX_ID); ath6kl_dbg(ATH6KL_DBG_HTC, "HTC using TX credit flow control\n"); set_htc_pkt_info(packet, NULL, (u8 *) setup, sizeof(struct htc_setup_comp_ext_msg), ENDPOINT_0, HTC_SERVICE_TX_PACKET_TAG); target->htc_flags |= HTC_OP_STATE_SETUP_COMPLETE; return ath6kl_htc_pipe_tx(target, packet); } static void ath6kl_htc_pipe_stop(struct htc_target *target) { int i; struct htc_endpoint *ep; /* cleanup endpoints */ for (i = 0; i < ENDPOINT_MAX; i++) { ep = &target->endpoint[i]; htc_flush_rx_queue(target, ep); htc_flush_tx_endpoint(target, ep, HTC_TX_PACKET_TAG_ALL); } reset_endpoint_states(target); target->htc_flags &= ~HTC_OP_STATE_SETUP_COMPLETE; } static int ath6kl_htc_pipe_get_rxbuf_num(struct htc_target *target, enum htc_endpoint_id endpoint) { int num; spin_lock_bh(&target->rx_lock); num = get_queue_depth(&(target->endpoint[endpoint].rx_bufq)); spin_unlock_bh(&target->rx_lock); return num; } static int ath6kl_htc_pipe_tx(struct htc_target *target, struct htc_packet *packet) { struct list_head queue; ath6kl_dbg(ATH6KL_DBG_HTC, "%s: endPointId: %d, buffer: 0x%p, length: %d\n", __func__, packet->endpoint, packet->buf, packet->act_len); INIT_LIST_HEAD(&queue); list_add_tail(&packet->list, &queue); return htc_send_packets_multiple(target, &queue); } static int ath6kl_htc_pipe_wait_target(struct htc_target *target) { struct htc_ready_ext_msg *ready_msg; struct htc_service_connect_req connect; struct htc_service_connect_resp resp; int status = 0; status = htc_wait_recv_ctrl_message(target); if (status != 0) return status; if (target->pipe.ctrl_response_len < sizeof(*ready_msg)) { ath6kl_warn("invalid htc pipe ready msg len: %d\n", target->pipe.ctrl_response_len); return -ECOMM; } ready_msg = (struct htc_ready_ext_msg *) target->pipe.ctrl_response_buf; if (ready_msg->ver2_0_info.msg_id != cpu_to_le16(HTC_MSG_READY_ID)) { ath6kl_warn("invalid htc pipe ready msg: 0x%x\n", ready_msg->ver2_0_info.msg_id); return -ECOMM; } ath6kl_dbg(ATH6KL_DBG_HTC, "Target Ready! : transmit resources : %d size:%d\n", ready_msg->ver2_0_info.cred_cnt, ready_msg->ver2_0_info.cred_sz); target->tgt_creds = le16_to_cpu(ready_msg->ver2_0_info.cred_cnt); target->tgt_cred_sz = le16_to_cpu(ready_msg->ver2_0_info.cred_sz); if ((target->tgt_creds == 0) || (target->tgt_cred_sz == 0)) return -ECOMM; htc_setup_target_buffer_assignments(target); /* setup our pseudo HTC control endpoint connection */ memset(&connect, 0, sizeof(connect)); memset(&resp, 0, sizeof(resp)); connect.ep_cb.tx_complete = htc_txctrl_complete; connect.ep_cb.rx = htc_rxctrl_complete; connect.max_txq_depth = NUM_CONTROL_TX_BUFFERS; connect.svc_id = HTC_CTRL_RSVD_SVC; /* connect fake service */ status = ath6kl_htc_pipe_conn_service(target, &connect, &resp); return status; } static void ath6kl_htc_pipe_flush_txep(struct htc_target *target, enum htc_endpoint_id endpoint, u16 tag) { struct htc_endpoint *ep = &target->endpoint[endpoint]; if (ep->svc_id == 0) { WARN_ON_ONCE(1); /* not in use.. */ return; } htc_flush_tx_endpoint(target, ep, tag); } static int ath6kl_htc_pipe_add_rxbuf_multiple(struct htc_target *target, struct list_head *pkt_queue) { struct htc_packet *packet, *tmp_pkt, *first; struct htc_endpoint *ep; int status = 0; if (list_empty(pkt_queue)) return -EINVAL; first = list_first_entry(pkt_queue, struct htc_packet, list); if (first->endpoint >= ENDPOINT_MAX) { WARN_ON_ONCE(1); return -EINVAL; } ath6kl_dbg(ATH6KL_DBG_HTC, "%s: epid: %d, cnt:%d, len: %d\n", __func__, first->endpoint, get_queue_depth(pkt_queue), first->buf_len); ep = &target->endpoint[first->endpoint]; spin_lock_bh(&target->rx_lock); /* store receive packets */ list_splice_tail_init(pkt_queue, &ep->rx_bufq); spin_unlock_bh(&target->rx_lock); if (status != 0) { /* walk through queue and mark each one canceled */ list_for_each_entry_safe(packet, tmp_pkt, pkt_queue, list) { packet->status = -ECANCELED; } do_recv_completion(ep, pkt_queue); } return status; } static void ath6kl_htc_pipe_activity_changed(struct htc_target *target, enum htc_endpoint_id ep, bool active) { /* TODO */ } static void ath6kl_htc_pipe_flush_rx_buf(struct htc_target *target) { struct htc_endpoint *endpoint; struct htc_packet *packet, *tmp_pkt; int i; for (i = ENDPOINT_0; i < ENDPOINT_MAX; i++) { endpoint = &target->endpoint[i]; spin_lock_bh(&target->rx_lock); list_for_each_entry_safe(packet, tmp_pkt, &endpoint->rx_bufq, list) { list_del(&packet->list); spin_unlock_bh(&target->rx_lock); ath6kl_dbg(ATH6KL_DBG_HTC, "htc rx flush pkt 0x%p len %d ep %d\n", packet, packet->buf_len, packet->endpoint); dev_kfree_skb(packet->pkt_cntxt); spin_lock_bh(&target->rx_lock); } spin_unlock_bh(&target->rx_lock); } } static int ath6kl_htc_pipe_credit_setup(struct htc_target *target, struct ath6kl_htc_credit_info *info) { return 0; } static const struct ath6kl_htc_ops ath6kl_htc_pipe_ops = { .create = ath6kl_htc_pipe_create, .wait_target = ath6kl_htc_pipe_wait_target, .start = ath6kl_htc_pipe_start, .conn_service = ath6kl_htc_pipe_conn_service, .tx = ath6kl_htc_pipe_tx, .stop = ath6kl_htc_pipe_stop, .cleanup = ath6kl_htc_pipe_cleanup, .flush_txep = ath6kl_htc_pipe_flush_txep, .flush_rx_buf = ath6kl_htc_pipe_flush_rx_buf, .activity_changed = ath6kl_htc_pipe_activity_changed, .get_rxbuf_num = ath6kl_htc_pipe_get_rxbuf_num, .add_rxbuf_multiple = ath6kl_htc_pipe_add_rxbuf_multiple, .credit_setup = ath6kl_htc_pipe_credit_setup, .tx_complete = ath6kl_htc_pipe_tx_complete, .rx_complete = ath6kl_htc_pipe_rx_complete, }; void ath6kl_htc_pipe_attach(struct ath6kl *ar) { ar->htc_ops = &ath6kl_htc_pipe_ops; }
11 11 5 107 508 39 11 3 9 40 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FIB_RULES_H #define __NET_FIB_RULES_H #include <linux/types.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> #include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> struct fib_kuid_range { kuid_t start; kuid_t end; }; struct fib_rule { struct list_head list; int iifindex; int oifindex; u32 mark; u32 mark_mask; u32 flags; u32 table; u8 action; u8 l3mdev; u8 proto; u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; struct net *fr_net; refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; struct fib_rule_port_range sport_range; struct fib_rule_port_range dport_range; struct rcu_head rcu; }; struct fib_lookup_arg { void *lookup_ptr; const void *lookup_data; void *result; struct fib_rule *rule; u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { int family; struct list_head list; int rule_size; int addr_size; int unresolved_rules; int nr_goto_rules; unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, struct nlattr **, struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush * the route cache if one exists. */ void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; struct list_head rules_list; struct module *owner; struct net *fro_net; struct rcu_head rcu; }; struct fib_rule_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_rule *rule; }; static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } #ifdef CONFIG_NET_L3_MASTER_DEV static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->l3mdev ? arg->table : rule->table; } #else static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->table; } #endif static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) return nla_get_u32(nla[FRA_TABLE]); return frh->table; } static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) { return range->start != 0 && range->end != 0; } static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, __be16 port) { return ntohs(port) >= a->start && ntohs(port) <= a->end; } static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) { return a->start != 0 && a->end != 0 && a->end < 0xffff && a->start <= a->end; } static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, struct fib_rule_port_range *b) { return a->start == b->start && a->end == b->end; } static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto || fib_rule_port_range_set(&rule->sport_range) || fib_rule_port_range_set(&rule->dport_range)); } struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); #endif
12 4 8 9 259 73 22 23 4 20 8 52 8 38 38 31 16 9 33 33 25 18 31 31 17 261 252 5 1 31 56 259 4 25 8 17 1 259 46 252 55 54 36 28 29 29 17 17 3 12 1 19 24 24 79 3 73 23 23 8 66 13 53 18 42 24 53 6 8 22 1 1 73 1 6 28 11 28 8 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/extents.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Handling of Extents both in catalog and extents overflow trees */ #include <linux/errno.h> #include <linux/fs.h> #include <linux/pagemap.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" /* Compare two extents keys, returns 0 on same, pos/neg for difference */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2) { __be32 k1id, k2id; __be32 k1s, k2s; k1id = k1->ext.cnid; k2id = k2->ext.cnid; if (k1id != k2id) return be32_to_cpu(k1id) < be32_to_cpu(k2id) ? -1 : 1; if (k1->ext.fork_type != k2->ext.fork_type) return k1->ext.fork_type < k2->ext.fork_type ? -1 : 1; k1s = k1->ext.start_block; k2s = k2->ext.start_block; if (k1s == k2s) return 0; return be32_to_cpu(k1s) < be32_to_cpu(k2s) ? -1 : 1; } static void hfsplus_ext_build_key(hfsplus_btree_key *key, u32 cnid, u32 block, u8 type) { key->key_len = cpu_to_be16(HFSPLUS_EXT_KEYLEN - 2); key->ext.cnid = cpu_to_be32(cnid); key->ext.start_block = cpu_to_be32(block); key->ext.fork_type = type; key->ext.pad = 0; } static u32 hfsplus_ext_find_block(struct hfsplus_extent *ext, u32 off) { int i; u32 count; for (i = 0; i < 8; ext++, i++) { count = be32_to_cpu(ext->block_count); if (off < count) return be32_to_cpu(ext->start_block) + off; off -= count; } /* panic? */ return 0; } static int hfsplus_ext_block_count(struct hfsplus_extent *ext) { int i; u32 count = 0; for (i = 0; i < 8; ext++, i++) count += be32_to_cpu(ext->block_count); return count; } static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) { int i; ext += 7; for (i = 0; i < 7; ext--, i++) if (ext->block_count) break; return be32_to_cpu(ext->start_block) + be32_to_cpu(ext->block_count); } static int __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) { struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res; WARN_ON(!mutex_is_locked(&hip->extents_lock)); hfsplus_ext_build_key(fd->search_key, inode->i_ino, hip->cached_start, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); res = hfs_brec_find(fd, hfs_find_rec_by_key); if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) return res; /* Fail early and avoid ENOSPC during the btree operation */ res = hfs_bmap_reserve(fd->tree, fd->tree->depth + 1); if (res) return res; hfs_brec_insert(fd, hip->cached_extents, sizeof(hfsplus_extent_rec)); hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); } else { if (res) return res; hfs_bnode_write(fd->bnode, hip->cached_extents, fd->entryoffset, fd->entrylength); hip->extent_state &= ~HFSPLUS_EXT_DIRTY; } /* * We can't just use hfsplus_mark_inode_dirty here, because we * also get called from hfsplus_write_inode, which should not * redirty the inode. Instead the callers have to be careful * to explicily mark the inode dirty, too. */ set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); return 0; } static int hfsplus_ext_write_extent_locked(struct inode *inode) { int res = 0; if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); if (res) return res; res = __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } return res; } int hfsplus_ext_write_extent(struct inode *inode) { int res; mutex_lock(&HFSPLUS_I(inode)->extents_lock); res = hfsplus_ext_write_extent_locked(inode); mutex_unlock(&HFSPLUS_I(inode)->extents_lock); return res; } static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, struct hfsplus_extent *extent, u32 cnid, u32 block, u8 type) { int res; hfsplus_ext_build_key(fd->search_key, cnid, block, type); fd->key->ext.cnid = 0; res = hfs_brec_find(fd, hfs_find_rec_by_key); if (res && res != -ENOENT) return res; if (fd->key->ext.cnid != fd->search_key->ext.cnid || fd->key->ext.fork_type != fd->search_key->ext.fork_type) return -ENOENT; if (fd->entrylength != sizeof(hfsplus_extent_rec)) return -EIO; hfs_bnode_read(fd->bnode, extent, fd->entryoffset, sizeof(hfsplus_extent_rec)); return 0; } static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) { struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res; WARN_ON(!mutex_is_locked(&hip->extents_lock)); if (hip->extent_state & HFSPLUS_EXT_DIRTY) { res = __hfsplus_ext_write_extent(inode, fd); if (res) return res; } res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, block, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); if (!res) { hip->cached_start = be32_to_cpu(fd->key->ext.start_block); hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); } else { hip->cached_start = hip->cached_blocks = 0; hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); } return res; } static int hfsplus_ext_read_extent(struct inode *inode, u32 block) { struct hfsplus_inode_info *hip = HFSPLUS_I(inode); struct hfs_find_data fd; int res; if (block >= hip->cached_start && block < hip->cached_start + hip->cached_blocks) return 0; res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); if (!res) { res = __hfsplus_ext_cache_extent(&fd, inode, block); hfs_find_exit(&fd); } return res; } /* Get a block at iblock for inode, possibly allocating if create */ int hfsplus_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { struct super_block *sb = inode->i_sb; struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res = -EIO; u32 ablock, dblock, mask; sector_t sector; int was_dirty = 0; /* Convert inode block to disk allocation block */ ablock = iblock >> sbi->fs_shift; if (iblock >= hip->fs_blocks) { if (!create) return 0; if (iblock > hip->fs_blocks) return -EIO; if (ablock >= hip->alloc_blocks) { res = hfsplus_file_extend(inode, false); if (res) return res; } } else create = 0; if (ablock < hip->first_blocks) { dblock = hfsplus_ext_find_block(hip->first_extents, ablock); goto done; } if (inode->i_ino == HFSPLUS_EXT_CNID) return -EIO; mutex_lock(&hip->extents_lock); /* * hfsplus_ext_read_extent will write out a cached extent into * the extents btree. In that case we may have to mark the inode * dirty even for a pure read of an extent here. */ was_dirty = (hip->extent_state & HFSPLUS_EXT_DIRTY); res = hfsplus_ext_read_extent(inode, ablock); if (res) { mutex_unlock(&hip->extents_lock); return -EIO; } dblock = hfsplus_ext_find_block(hip->cached_extents, ablock - hip->cached_start); mutex_unlock(&hip->extents_lock); done: hfs_dbg(EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); mask = (1 << sbi->fs_shift) - 1; sector = ((sector_t)dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask); map_bh(bh_result, sb, sector); if (create) { set_buffer_new(bh_result); hip->phys_size += sb->s_blocksize; hip->fs_blocks++; inode_add_bytes(inode, sb->s_blocksize); } if (create || was_dirty) mark_inode_dirty(inode); return 0; } static void hfsplus_dump_extent(struct hfsplus_extent *extent) { int i; hfs_dbg(EXTENT, " "); for (i = 0; i < 8; i++) hfs_dbg_cont(EXTENT, " %u:%u", be32_to_cpu(extent[i].start_block), be32_to_cpu(extent[i].block_count)); hfs_dbg_cont(EXTENT, "\n"); } static int hfsplus_add_extent(struct hfsplus_extent *extent, u32 offset, u32 alloc_block, u32 block_count) { u32 count, start; int i; hfsplus_dump_extent(extent); for (i = 0; i < 8; extent++, i++) { count = be32_to_cpu(extent->block_count); if (offset == count) { start = be32_to_cpu(extent->start_block); if (alloc_block != start + count) { if (++i >= 8) return -ENOSPC; extent++; extent->start_block = cpu_to_be32(alloc_block); } else block_count += count; extent->block_count = cpu_to_be32(block_count); return 0; } else if (offset < count) break; offset -= count; } /* panic? */ return -EIO; } static int hfsplus_free_extents(struct super_block *sb, struct hfsplus_extent *extent, u32 offset, u32 block_nr) { u32 count, start; int i; int err = 0; /* Mapping the allocation file may lock the extent tree */ WARN_ON(mutex_is_locked(&HFSPLUS_SB(sb)->ext_tree->tree_lock)); hfsplus_dump_extent(extent); for (i = 0; i < 8; extent++, i++) { count = be32_to_cpu(extent->block_count); if (offset == count) goto found; else if (offset < count) break; offset -= count; } /* panic? */ return -EIO; found: for (;;) { start = be32_to_cpu(extent->start_block); if (count <= block_nr) { err = hfsplus_block_free(sb, start, count); if (err) { pr_err("can't free extent\n"); hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } extent->block_count = 0; extent->start_block = 0; block_nr -= count; } else { count -= block_nr; err = hfsplus_block_free(sb, start + count, block_nr); if (err) { pr_err("can't free extent\n"); hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } extent->block_count = cpu_to_be32(count); block_nr = 0; } if (!block_nr || !i) { /* * Try to free all extents and * return only last error */ return err; } i--; extent--; count = be32_to_cpu(extent->block_count); } } int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw *fork, int type) { struct hfs_find_data fd; hfsplus_extent_rec ext_entry; u32 total_blocks, blocks, start; int res, i; total_blocks = be32_to_cpu(fork->total_blocks); if (!total_blocks) return 0; blocks = 0; for (i = 0; i < 8; i++) blocks += be32_to_cpu(fork->extents[i].block_count); res = hfsplus_free_extents(sb, fork->extents, blocks, blocks); if (res) return res; if (total_blocks == blocks) return 0; res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); if (res) return res; do { res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, total_blocks, type); if (res) break; start = be32_to_cpu(fd.key->ext.start_block); hfs_brec_remove(&fd); mutex_unlock(&fd.tree->tree_lock); hfsplus_free_extents(sb, ext_entry, total_blocks - start, total_blocks); total_blocks = start; mutex_lock_nested(&fd.tree->tree_lock, hfsplus_btree_lock_class(fd.tree)); } while (total_blocks > blocks); hfs_find_exit(&fd); return res; } int hfsplus_file_extend(struct inode *inode, bool zeroout) { struct super_block *sb = inode->i_sb; struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); u32 start, len, goal; int res; if (sbi->alloc_file->i_size * 8 < sbi->total_blocks - sbi->free_blocks + 8) { /* extend alloc file */ pr_err_ratelimited("extend alloc file! (%llu,%u,%u)\n", sbi->alloc_file->i_size * 8, sbi->total_blocks, sbi->free_blocks); return -ENOSPC; } mutex_lock(&hip->extents_lock); if (hip->alloc_blocks == hip->first_blocks) goal = hfsplus_ext_lastblock(hip->first_extents); else { res = hfsplus_ext_read_extent(inode, hip->alloc_blocks); if (res) goto out; goal = hfsplus_ext_lastblock(hip->cached_extents); } len = hip->clump_blocks; start = hfsplus_block_allocate(sb, sbi->total_blocks, goal, &len); if (start >= sbi->total_blocks) { start = hfsplus_block_allocate(sb, goal, 0, &len); if (start >= goal) { res = -ENOSPC; goto out; } } if (zeroout) { res = sb_issue_zeroout(sb, start, len, GFP_NOFS); if (res) goto out; } hfs_dbg(EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); if (hip->alloc_blocks <= hip->first_blocks) { if (!hip->first_blocks) { hfs_dbg(EXTENT, "first extents\n"); /* no extents yet */ hip->first_extents[0].start_block = cpu_to_be32(start); hip->first_extents[0].block_count = cpu_to_be32(len); res = 0; } else { /* try to append to extents in inode */ res = hfsplus_add_extent(hip->first_extents, hip->alloc_blocks, start, len); if (res == -ENOSPC) goto insert_extent; } if (!res) { hfsplus_dump_extent(hip->first_extents); hip->first_blocks += len; } } else { res = hfsplus_add_extent(hip->cached_extents, hip->alloc_blocks - hip->cached_start, start, len); if (!res) { hfsplus_dump_extent(hip->cached_extents); hip->extent_state |= HFSPLUS_EXT_DIRTY; hip->cached_blocks += len; } else if (res == -ENOSPC) goto insert_extent; } out: if (!res) { hip->alloc_blocks += len; mutex_unlock(&hip->extents_lock); hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); return 0; } mutex_unlock(&hip->extents_lock); return res; insert_extent: hfs_dbg(EXTENT, "insert new extent\n"); res = hfsplus_ext_write_extent_locked(inode); if (res) goto out; memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->cached_extents[0].start_block = cpu_to_be32(start); hip->cached_extents[0].block_count = cpu_to_be32(len); hfsplus_dump_extent(hip->cached_extents); hip->extent_state |= HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW; hip->cached_start = hip->alloc_blocks; hip->cached_blocks = len; res = 0; goto out; } void hfsplus_file_truncate(struct inode *inode) { struct super_block *sb = inode->i_sb; struct hfsplus_inode_info *hip = HFSPLUS_I(inode); struct hfs_find_data fd; u32 alloc_cnt, blk_cnt, start; int res; hfs_dbg(INODE, "truncate: %lu, %llu -> %llu\n", inode->i_ino, (long long)hip->phys_size, inode->i_size); if (inode->i_size > hip->phys_size) { struct address_space *mapping = inode->i_mapping; struct folio *folio; void *fsdata = NULL; loff_t size = inode->i_size; res = hfsplus_write_begin(NULL, mapping, size, 0, &folio, &fsdata); if (res) return; res = generic_write_end(NULL, mapping, size, 0, 0, folio, fsdata); if (res < 0) return; mark_inode_dirty(inode); return; } else if (inode->i_size == hip->phys_size) return; blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> HFSPLUS_SB(sb)->alloc_blksz_shift; mutex_lock(&hip->extents_lock); alloc_cnt = hip->alloc_blocks; if (blk_cnt == alloc_cnt) goto out_unlock; res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); if (res) { mutex_unlock(&hip->extents_lock); /* XXX: We lack error handling of hfsplus_file_truncate() */ return; } while (1) { if (alloc_cnt == hip->first_blocks) { mutex_unlock(&fd.tree->tree_lock); hfsplus_free_extents(sb, hip->first_extents, alloc_cnt, alloc_cnt - blk_cnt); hfsplus_dump_extent(hip->first_extents); hip->first_blocks = blk_cnt; mutex_lock_nested(&fd.tree->tree_lock, hfsplus_btree_lock_class(fd.tree)); break; } res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); if (res) break; start = hip->cached_start; if (blk_cnt <= start) hfs_brec_remove(&fd); mutex_unlock(&fd.tree->tree_lock); hfsplus_free_extents(sb, hip->cached_extents, alloc_cnt - start, alloc_cnt - blk_cnt); hfsplus_dump_extent(hip->cached_extents); mutex_lock_nested(&fd.tree->tree_lock, hfsplus_btree_lock_class(fd.tree)); if (blk_cnt > start) { hip->extent_state |= HFSPLUS_EXT_DIRTY; break; } alloc_cnt = start; hip->cached_start = hip->cached_blocks = 0; hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); } hfs_find_exit(&fd); hip->alloc_blocks = blk_cnt; out_unlock: mutex_unlock(&hip->extents_lock); hip->phys_size = inode->i_size; hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); }
40 40 40 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-or-later /* rxrpc network namespace handling. * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/proc_fs.h> #include "ar-internal.h" unsigned int rxrpc_net_id; static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) { struct rxrpc_net *rxnet = container_of(timer, struct rxrpc_net, service_conn_reap_timer); if (rxnet->live) rxrpc_queue_work(&rxnet->service_conn_reaper); } static void rxrpc_peer_keepalive_timeout(struct timer_list *timer) { struct rxrpc_net *rxnet = container_of(timer, struct rxrpc_net, peer_keepalive_timer); if (rxnet->live) rxrpc_queue_work(&rxnet->peer_keepalive_work); } /* * Initialise a per-network namespace record. */ static __net_init int rxrpc_init_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); int ret, i; rxnet->live = true; get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); rxnet->epoch |= RXRPC_RANDOM_EPOCH; INIT_LIST_HEAD(&rxnet->calls); spin_lock_init(&rxnet->call_lock); atomic_set(&rxnet->nr_calls, 1); atomic_set(&rxnet->nr_conns, 1); INIT_LIST_HEAD(&rxnet->bundle_proc_list); INIT_LIST_HEAD(&rxnet->conn_proc_list); INIT_LIST_HEAD(&rxnet->service_conns); rwlock_init(&rxnet->conn_lock); INIT_WORK(&rxnet->service_conn_reaper, rxrpc_service_connection_reaper); timer_setup(&rxnet->service_conn_reap_timer, rxrpc_service_conn_reap_timeout, 0); atomic_set(&rxnet->nr_client_conns, 0); INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); hash_init(rxnet->peer_hash); spin_lock_init(&rxnet->peer_hash_lock); for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++) INIT_LIST_HEAD(&rxnet->peer_keepalive[i]); INIT_LIST_HEAD(&rxnet->peer_keepalive_new); timer_setup(&rxnet->peer_keepalive_timer, rxrpc_peer_keepalive_timeout, 0); INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker); rxnet->peer_keepalive_base = ktime_get_seconds(); ret = -ENOMEM; rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); if (!rxnet->proc_net) goto err_proc; proc_create_net("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_ops, sizeof(struct seq_net_private)); proc_create_net("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_ops, sizeof(struct seq_net_private)); proc_create_net("bundles", 0444, rxnet->proc_net, &rxrpc_bundle_seq_ops, sizeof(struct seq_net_private)); proc_create_net("peers", 0444, rxnet->proc_net, &rxrpc_peer_seq_ops, sizeof(struct seq_net_private)); proc_create_net("locals", 0444, rxnet->proc_net, &rxrpc_local_seq_ops, sizeof(struct seq_net_private)); proc_create_net_single_write("stats", S_IFREG | 0644, rxnet->proc_net, rxrpc_stats_show, rxrpc_stats_clear, NULL); return 0; err_proc: rxnet->live = false; return ret; } /* * Clean up a per-network namespace record. */ static __net_exit void rxrpc_exit_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); /* Remove the timer again as the worker may have restarted it. */ del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); rxrpc_destroy_all_locals(rxnet); proc_remove(rxnet->proc_net); } struct pernet_operations rxrpc_net_ops = { .init = rxrpc_init_net, .exit = rxrpc_exit_net, .id = &rxrpc_net_id, .size = sizeof(struct rxrpc_net), };
7 7 10 10 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/preempt.h> #include <linux/smp.h> #include <linux/completion.h> #include <asm/msr.h> static void __rdmsr_on_cpu(void *info) { struct msr_info *rv = info; struct msr *reg; if (rv->msrs) reg = this_cpu_ptr(rv->msrs); else reg = &rv->reg; rdmsr(rv->msr_no, reg->l, reg->h); } static void __wrmsr_on_cpu(void *info) { struct msr_info *rv = info; struct msr *reg; if (rv->msrs) reg = this_cpu_ptr(rv->msrs); else reg = &rv->reg; wrmsr(rv->msr_no, reg->l, reg->h); } int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); *l = rv.reg.l; *h = rv.reg.h; return err; } EXPORT_SYMBOL(rdmsr_on_cpu); int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); *q = rv.reg.q; return err; } EXPORT_SYMBOL(rdmsrl_on_cpu); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.l = l; rv.reg.h = h; err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); return err; } EXPORT_SYMBOL(wrmsr_on_cpu); int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.q = q; err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); return err; } EXPORT_SYMBOL(wrmsrl_on_cpu); static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs, void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; memset(&rv, 0, sizeof(rv)); rv.msrs = msrs; rv.msr_no = msr_no; this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) msr_func(&rv); smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } /* rdmsr on a bunch of CPUs * * @mask: which CPUs * @msr_no: which MSR * @msrs: array of MSR values * */ void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs) { __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); } EXPORT_SYMBOL(rdmsr_on_cpus); /* * wrmsr on a bunch of CPUs * * @mask: which CPUs * @msr_no: which MSR * @msrs: array of MSR values * */ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs) { __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); struct msr_info_completion { struct msr_info msr; struct completion done; }; /* These "safe" variants are slower and should be used when the target MSR may not actually exist. */ static void __rdmsr_safe_on_cpu(void *info) { struct msr_info_completion *rv = info; rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, &rv->msr.reg.h); complete(&rv->done); } static void __wrmsr_safe_on_cpu(void *info) { struct msr_info *rv = info; rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); } int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { struct msr_info_completion rv; call_single_data_t csd; int err; INIT_CSD(&csd, __rdmsr_safe_on_cpu, &rv); memset(&rv, 0, sizeof(rv)); init_completion(&rv.done); rv.msr.msr_no = msr_no; err = smp_call_function_single_async(cpu, &csd); if (!err) { wait_for_completion(&rv.done); err = rv.msr.err; } *l = rv.msr.reg.l; *h = rv.msr.reg.h; return err; } EXPORT_SYMBOL(rdmsr_safe_on_cpu); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.l = l; rv.reg.h = h; err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(wrmsr_safe_on_cpu); int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.q = q; err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(wrmsrl_safe_on_cpu); int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { u32 low, high; int err; err = rdmsr_safe_on_cpu(cpu, msr_no, &low, &high); *q = (u64)high << 32 | low; return err; } EXPORT_SYMBOL(rdmsrl_safe_on_cpu); /* * These variants are significantly slower, but allows control over * the entire 32-bit GPR set. */ static void __rdmsr_safe_regs_on_cpu(void *info) { struct msr_regs_info *rv = info; rv->err = rdmsr_safe_regs(rv->regs); } static void __wrmsr_safe_regs_on_cpu(void *info) { struct msr_regs_info *rv = info; rv->err = wrmsr_safe_regs(rv->regs); } int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { int err; struct msr_regs_info rv; rv.regs = regs; rv.err = -EIO; err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { int err; struct msr_regs_info rv; rv.regs = regs; rv.err = -EIO; err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
2 7 20 2 2 9 4 7 9 32 15 2 2 2 2 8 8 7 2 6 6 8 6 6 7 7 5 2 3 2 2 5 2 2 16 17 10 17 1 1 7 7 1 6 1 8 1 1 2 22 1 9 9 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 // SPDX-License-Identifier: GPL-2.0-only /* * Sync File validation framework * * Copyright (C) 2012 Google, Inc. */ #include <linux/file.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/sync_file.h> #include "sync_debug.h" #define CREATE_TRACE_POINTS #include "sync_trace.h" /* * SW SYNC validation framework * * A sync object driver that uses a 32bit counter to coordinate * synchronization. Useful when there is no hardware primitive backing * the synchronization. * * To start the framework just open: * * <debugfs>/sync/sw_sync * * That will create a sync timeline, all fences created under this timeline * file descriptor will belong to the this timeline. * * The 'sw_sync' file can be opened many times as to create different * timelines. * * Fences can be created with SW_SYNC_IOC_CREATE_FENCE ioctl with struct * sw_sync_create_fence_data as parameter. * * To increment the timeline counter, SW_SYNC_IOC_INC ioctl should be used * with the increment as u32. This will update the last signaled value * from the timeline and signal any fence that has a seqno smaller or equal * to it. * * struct sw_sync_create_fence_data * @value: the seqno to initialise the fence with * @name: the name of the new sync point * @fence: return the fd of the new sync_file with the created fence */ struct sw_sync_create_fence_data { __u32 value; char name[32]; __s32 fence; /* fd of new fence */ }; /** * struct sw_sync_get_deadline - get the deadline hint of a sw_sync fence * @deadline_ns: absolute time of the deadline * @pad: must be zero * @fence_fd: the sw_sync fence fd (in) * * Return the earliest deadline set on the fence. The timebase for the * deadline is CLOCK_MONOTONIC (same as vblank). If there is no deadline * set on the fence, this ioctl will return -ENOENT. */ struct sw_sync_get_deadline { __u64 deadline_ns; __u32 pad; __s32 fence_fd; }; #define SW_SYNC_IOC_MAGIC 'W' #define SW_SYNC_IOC_CREATE_FENCE _IOWR(SW_SYNC_IOC_MAGIC, 0,\ struct sw_sync_create_fence_data) #define SW_SYNC_IOC_INC _IOW(SW_SYNC_IOC_MAGIC, 1, __u32) #define SW_SYNC_GET_DEADLINE _IOWR(SW_SYNC_IOC_MAGIC, 2, \ struct sw_sync_get_deadline) #define SW_SYNC_HAS_DEADLINE_BIT DMA_FENCE_FLAG_USER_BITS static const struct dma_fence_ops timeline_fence_ops; static inline struct sync_pt *dma_fence_to_sync_pt(struct dma_fence *fence) { if (fence->ops != &timeline_fence_ops) return NULL; return container_of(fence, struct sync_pt, base); } /** * sync_timeline_create() - creates a sync object * @name: sync_timeline name * * Creates a new sync_timeline. Returns the sync_timeline object or NULL in * case of error. */ static struct sync_timeline *sync_timeline_create(const char *name) { struct sync_timeline *obj; obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (!obj) return NULL; kref_init(&obj->kref); obj->context = dma_fence_context_alloc(1); strscpy(obj->name, name, sizeof(obj->name)); obj->pt_tree = RB_ROOT; INIT_LIST_HEAD(&obj->pt_list); spin_lock_init(&obj->lock); sync_timeline_debug_add(obj); return obj; } static void sync_timeline_free(struct kref *kref) { struct sync_timeline *obj = container_of(kref, struct sync_timeline, kref); sync_timeline_debug_remove(obj); kfree(obj); } static void sync_timeline_get(struct sync_timeline *obj) { kref_get(&obj->kref); } static void sync_timeline_put(struct sync_timeline *obj) { kref_put(&obj->kref, sync_timeline_free); } static const char *timeline_fence_get_driver_name(struct dma_fence *fence) { return "sw_sync"; } static const char *timeline_fence_get_timeline_name(struct dma_fence *fence) { struct sync_timeline *parent = dma_fence_parent(fence); return parent->name; } static void timeline_fence_release(struct dma_fence *fence) { struct sync_pt *pt = dma_fence_to_sync_pt(fence); struct sync_timeline *parent = dma_fence_parent(fence); unsigned long flags; spin_lock_irqsave(fence->lock, flags); if (!list_empty(&pt->link)) { list_del(&pt->link); rb_erase(&pt->node, &parent->pt_tree); } spin_unlock_irqrestore(fence->lock, flags); sync_timeline_put(parent); dma_fence_free(fence); } static bool timeline_fence_signaled(struct dma_fence *fence) { struct sync_timeline *parent = dma_fence_parent(fence); return !__dma_fence_is_later(fence->seqno, parent->value, fence->ops); } static bool timeline_fence_enable_signaling(struct dma_fence *fence) { return true; } static void timeline_fence_value_str(struct dma_fence *fence, char *str, int size) { snprintf(str, size, "%lld", fence->seqno); } static void timeline_fence_timeline_value_str(struct dma_fence *fence, char *str, int size) { struct sync_timeline *parent = dma_fence_parent(fence); snprintf(str, size, "%d", parent->value); } static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { struct sync_pt *pt = dma_fence_to_sync_pt(fence); unsigned long flags; spin_lock_irqsave(fence->lock, flags); if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { if (ktime_before(deadline, pt->deadline)) pt->deadline = deadline; } else { pt->deadline = deadline; __set_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags); } spin_unlock_irqrestore(fence->lock, flags); } static const struct dma_fence_ops timeline_fence_ops = { .get_driver_name = timeline_fence_get_driver_name, .get_timeline_name = timeline_fence_get_timeline_name, .enable_signaling = timeline_fence_enable_signaling, .signaled = timeline_fence_signaled, .release = timeline_fence_release, .fence_value_str = timeline_fence_value_str, .timeline_value_str = timeline_fence_timeline_value_str, .set_deadline = timeline_fence_set_deadline, }; /** * sync_timeline_signal() - signal a status change on a sync_timeline * @obj: sync_timeline to signal * @inc: num to increment on timeline->value * * A sync implementation should call this any time one of it's fences * has signaled or has an error condition. */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) { LIST_HEAD(signalled); struct sync_pt *pt, *next; trace_sync_timeline(obj); spin_lock_irq(&obj->lock); obj->value += inc; list_for_each_entry_safe(pt, next, &obj->pt_list, link) { if (!timeline_fence_signaled(&pt->base)) break; dma_fence_get(&pt->base); list_move_tail(&pt->link, &signalled); rb_erase(&pt->node, &obj->pt_tree); dma_fence_signal_locked(&pt->base); } spin_unlock_irq(&obj->lock); list_for_each_entry_safe(pt, next, &signalled, link) { list_del_init(&pt->link); dma_fence_put(&pt->base); } } /** * sync_pt_create() - creates a sync pt * @obj: parent sync_timeline * @value: value of the fence * * Creates a new sync_pt (fence) as a child of @parent. @size bytes will be * allocated allowing for implementation specific data to be kept after * the generic sync_timeline struct. Returns the sync_pt object or * NULL in case of error. */ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, unsigned int value) { struct sync_pt *pt; pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) return NULL; sync_timeline_get(obj); dma_fence_init(&pt->base, &timeline_fence_ops, &obj->lock, obj->context, value); INIT_LIST_HEAD(&pt->link); spin_lock_irq(&obj->lock); if (!dma_fence_is_signaled_locked(&pt->base)) { struct rb_node **p = &obj->pt_tree.rb_node; struct rb_node *parent = NULL; while (*p) { struct sync_pt *other; int cmp; parent = *p; other = rb_entry(parent, typeof(*pt), node); cmp = value - other->base.seqno; if (cmp > 0) { p = &parent->rb_right; } else if (cmp < 0) { p = &parent->rb_left; } else { if (dma_fence_get_rcu(&other->base)) { sync_timeline_put(obj); kfree(pt); pt = other; goto unlock; } p = &parent->rb_left; } } rb_link_node(&pt->node, parent, p); rb_insert_color(&pt->node, &obj->pt_tree); parent = rb_next(&pt->node); list_add_tail(&pt->link, parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); } unlock: spin_unlock_irq(&obj->lock); return pt; } /* * *WARNING* * * improper use of this can result in deadlocking kernel drivers from userspace. */ /* opening sw_sync create a new sync obj */ static int sw_sync_debugfs_open(struct inode *inode, struct file *file) { struct sync_timeline *obj; char task_comm[TASK_COMM_LEN]; get_task_comm(task_comm, current); obj = sync_timeline_create(task_comm); if (!obj) return -ENOMEM; file->private_data = obj; return 0; } static int sw_sync_debugfs_release(struct inode *inode, struct file *file) { struct sync_timeline *obj = file->private_data; struct sync_pt *pt, *next; spin_lock_irq(&obj->lock); list_for_each_entry_safe(pt, next, &obj->pt_list, link) { dma_fence_set_error(&pt->base, -ENOENT); dma_fence_signal_locked(&pt->base); } spin_unlock_irq(&obj->lock); sync_timeline_put(obj); return 0; } static long sw_sync_ioctl_create_fence(struct sync_timeline *obj, unsigned long arg) { int fd = get_unused_fd_flags(O_CLOEXEC); int err; struct sync_pt *pt; struct sync_file *sync_file; struct sw_sync_create_fence_data data; if (fd < 0) return fd; if (copy_from_user(&data, (void __user *)arg, sizeof(data))) { err = -EFAULT; goto err; } pt = sync_pt_create(obj, data.value); if (!pt) { err = -ENOMEM; goto err; } sync_file = sync_file_create(&pt->base); dma_fence_put(&pt->base); if (!sync_file) { err = -ENOMEM; goto err; } data.fence = fd; if (copy_to_user((void __user *)arg, &data, sizeof(data))) { fput(sync_file->file); err = -EFAULT; goto err; } fd_install(fd, sync_file->file); return 0; err: put_unused_fd(fd); return err; } static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg) { u32 value; if (copy_from_user(&value, (void __user *)arg, sizeof(value))) return -EFAULT; while (value > INT_MAX) { sync_timeline_signal(obj, INT_MAX); value -= INT_MAX; } sync_timeline_signal(obj, value); return 0; } static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long arg) { struct sw_sync_get_deadline data; struct dma_fence *fence; unsigned long flags; struct sync_pt *pt; int ret = 0; if (copy_from_user(&data, (void __user *)arg, sizeof(data))) return -EFAULT; if (data.deadline_ns || data.pad) return -EINVAL; fence = sync_file_get_fence(data.fence_fd); if (!fence) return -EINVAL; pt = dma_fence_to_sync_pt(fence); if (!pt) return -EINVAL; spin_lock_irqsave(fence->lock, flags); if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { data.deadline_ns = ktime_to_ns(pt->deadline); } else { ret = -ENOENT; } spin_unlock_irqrestore(fence->lock, flags); dma_fence_put(fence); if (ret) return ret; if (copy_to_user((void __user *)arg, &data, sizeof(data))) return -EFAULT; return 0; } static long sw_sync_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct sync_timeline *obj = file->private_data; switch (cmd) { case SW_SYNC_IOC_CREATE_FENCE: return sw_sync_ioctl_create_fence(obj, arg); case SW_SYNC_IOC_INC: return sw_sync_ioctl_inc(obj, arg); case SW_SYNC_GET_DEADLINE: return sw_sync_ioctl_get_deadline(obj, arg); default: return -ENOTTY; } } const struct file_operations sw_sync_debugfs_fops = { .open = sw_sync_debugfs_open, .release = sw_sync_debugfs_release, .unlocked_ioctl = sw_sync_ioctl, .compat_ioctl = compat_ptr_ioctl, };
10 9 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 // SPDX-License-Identifier: GPL-2.0 /* * Verification of builtin signatures * * Copyright 2019 Google LLC */ /* * This file implements verification of fs-verity builtin signatures. Please * take great care before using this feature. It is not the only way to do * signatures with fs-verity, and the alternatives (such as userspace signature * verification, and IMA appraisal) can be much better. For details about the * limitations of this feature, see Documentation/filesystems/fsverity.rst. */ #include "fsverity_private.h" #include <linux/cred.h> #include <linux/key.h> #include <linux/security.h> #include <linux/slab.h> #include <linux/verification.h> /* * /proc/sys/fs/verity/require_signatures * If 1, all verity files must have a valid builtin signature. */ int fsverity_require_signatures; /* * Keyring that contains the trusted X.509 certificates. * * Only root (kuid=0) can modify this. Also, root may use * keyctl_restrict_keyring() to prevent any more additions. */ static struct key *fsverity_keyring; /** * fsverity_verify_signature() - check a verity file's signature * @vi: the file's fsverity_info * @signature: the file's built-in signature * @sig_size: size of signature in bytes, or 0 if no signature * * If the file includes a signature of its fs-verity file digest, verify it * against the certificates in the fs-verity keyring. Note that signatures * are verified regardless of the state of the 'fsverity_require_signatures' * variable and the LSM subsystem relies on this behavior to help enforce * file integrity policies. Please discuss changes with the LSM list * (thank you!). * * Return: 0 on success (signature valid or not required); -errno on failure */ int fsverity_verify_signature(const struct fsverity_info *vi, const u8 *signature, size_t sig_size) { const struct inode *inode = vi->inode; const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg; struct fsverity_formatted_digest *d; int err; if (sig_size == 0) { if (fsverity_require_signatures) { fsverity_err(inode, "require_signatures=1, rejecting unsigned file!"); return -EPERM; } return 0; } if (fsverity_keyring->keys.nr_leaves_on_tree == 0) { /* * The ".fs-verity" keyring is empty, due to builtin signatures * being supported by the kernel but not actually being used. * In this case, verify_pkcs7_signature() would always return an * error, usually ENOKEY. It could also be EBADMSG if the * PKCS#7 is malformed, but that isn't very important to * distinguish. So, just skip to ENOKEY to avoid the attack * surface of the PKCS#7 parser, which would otherwise be * reachable by any task able to execute FS_IOC_ENABLE_VERITY. */ fsverity_err(inode, "fs-verity keyring is empty, rejecting signed file!"); return -ENOKEY; } d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL); if (!d) return -ENOMEM; memcpy(d->magic, "FSVerity", 8); d->digest_algorithm = cpu_to_le16(hash_alg - fsverity_hash_algs); d->digest_size = cpu_to_le16(hash_alg->digest_size); memcpy(d->digest, vi->file_digest, hash_alg->digest_size); err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size, signature, sig_size, fsverity_keyring, VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); kfree(d); if (err) { if (err == -ENOKEY) fsverity_err(inode, "File's signing cert isn't in the fs-verity keyring"); else if (err == -EKEYREJECTED) fsverity_err(inode, "Incorrect file signature"); else if (err == -EBADMSG) fsverity_err(inode, "Malformed file signature"); else fsverity_err(inode, "Error %d verifying file signature", err); return err; } err = security_inode_setintegrity(inode, LSM_INT_FSVERITY_BUILTINSIG_VALID, signature, sig_size); if (err) { fsverity_err(inode, "Error %d exposing file signature to LSMs", err); return err; } return 0; } void __init fsverity_init_signature(void) { fsverity_keyring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), KEY_POS_SEARCH | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | KEY_USR_SEARCH | KEY_USR_SETATTR, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(fsverity_keyring)) panic("failed to allocate \".fs-verity\" keyring"); }
1 1 1 1 2 2 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 // SPDX-License-Identifier: GPL-2.0+ /* * mxuport.c - MOXA UPort series driver * * Copyright (c) 2006 Moxa Technologies Co., Ltd. * Copyright (c) 2013 Andrew Lunn <andrew@lunn.ch> * * Supports the following Moxa USB to serial converters: * 2 ports : UPort 1250, UPort 1250I * 4 ports : UPort 1410, UPort 1450, UPort 1450I * 8 ports : UPort 1610-8, UPort 1650-8 * 16 ports : UPort 1610-16, UPort 1650-16 */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/firmware.h> #include <linux/jiffies.h> #include <linux/serial.h> #include <linux/serial_reg.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/unaligned.h> /* Definitions for the vendor ID and device ID */ #define MX_USBSERIAL_VID 0x110A #define MX_UPORT1250_PID 0x1250 #define MX_UPORT1251_PID 0x1251 #define MX_UPORT1410_PID 0x1410 #define MX_UPORT1450_PID 0x1450 #define MX_UPORT1451_PID 0x1451 #define MX_UPORT1618_PID 0x1618 #define MX_UPORT1658_PID 0x1658 #define MX_UPORT1613_PID 0x1613 #define MX_UPORT1653_PID 0x1653 /* Definitions for USB info */ #define HEADER_SIZE 4 #define EVENT_LENGTH 8 #define DOWN_BLOCK_SIZE 64 /* Definitions for firmware info */ #define VER_ADDR_1 0x20 #define VER_ADDR_2 0x24 #define VER_ADDR_3 0x28 /* Definitions for USB vendor request */ #define RQ_VENDOR_NONE 0x00 #define RQ_VENDOR_SET_BAUD 0x01 /* Set baud rate */ #define RQ_VENDOR_SET_LINE 0x02 /* Set line status */ #define RQ_VENDOR_SET_CHARS 0x03 /* Set Xon/Xoff chars */ #define RQ_VENDOR_SET_RTS 0x04 /* Set RTS */ #define RQ_VENDOR_SET_DTR 0x05 /* Set DTR */ #define RQ_VENDOR_SET_XONXOFF 0x06 /* Set auto Xon/Xoff */ #define RQ_VENDOR_SET_RX_HOST_EN 0x07 /* Set RX host enable */ #define RQ_VENDOR_SET_OPEN 0x08 /* Set open/close port */ #define RQ_VENDOR_PURGE 0x09 /* Purge Rx/Tx buffer */ #define RQ_VENDOR_SET_MCR 0x0A /* Set MCR register */ #define RQ_VENDOR_SET_BREAK 0x0B /* Set Break signal */ #define RQ_VENDOR_START_FW_DOWN 0x0C /* Start firmware download */ #define RQ_VENDOR_STOP_FW_DOWN 0x0D /* Stop firmware download */ #define RQ_VENDOR_QUERY_FW_READY 0x0E /* Query if new firmware ready */ #define RQ_VENDOR_SET_FIFO_DISABLE 0x0F /* Set fifo disable */ #define RQ_VENDOR_SET_INTERFACE 0x10 /* Set interface */ #define RQ_VENDOR_SET_HIGH_PERFOR 0x11 /* Set hi-performance */ #define RQ_VENDOR_ERASE_BLOCK 0x12 /* Erase flash block */ #define RQ_VENDOR_WRITE_PAGE 0x13 /* Write flash page */ #define RQ_VENDOR_PREPARE_WRITE 0x14 /* Prepare write flash */ #define RQ_VENDOR_CONFIRM_WRITE 0x15 /* Confirm write flash */ #define RQ_VENDOR_LOCATE 0x16 /* Locate the device */ #define RQ_VENDOR_START_ROM_DOWN 0x17 /* Start firmware download */ #define RQ_VENDOR_ROM_DATA 0x18 /* Rom file data */ #define RQ_VENDOR_STOP_ROM_DOWN 0x19 /* Stop firmware download */ #define RQ_VENDOR_FW_DATA 0x20 /* Firmware data */ #define RQ_VENDOR_RESET_DEVICE 0x23 /* Try to reset the device */ #define RQ_VENDOR_QUERY_FW_CONFIG 0x24 #define RQ_VENDOR_GET_VERSION 0x81 /* Get firmware version */ #define RQ_VENDOR_GET_PAGE 0x82 /* Read flash page */ #define RQ_VENDOR_GET_ROM_PROC 0x83 /* Get ROM process state */ #define RQ_VENDOR_GET_INQUEUE 0x84 /* Data in input buffer */ #define RQ_VENDOR_GET_OUTQUEUE 0x85 /* Data in output buffer */ #define RQ_VENDOR_GET_MSR 0x86 /* Get modem status register */ /* Definitions for UPort event type */ #define UPORT_EVENT_NONE 0 /* None */ #define UPORT_EVENT_TXBUF_THRESHOLD 1 /* Tx buffer threshold */ #define UPORT_EVENT_SEND_NEXT 2 /* Send next */ #define UPORT_EVENT_MSR 3 /* Modem status */ #define UPORT_EVENT_LSR 4 /* Line status */ #define UPORT_EVENT_MCR 5 /* Modem control */ /* Definitions for serial event type */ #define SERIAL_EV_CTS 0x0008 /* CTS changed state */ #define SERIAL_EV_DSR 0x0010 /* DSR changed state */ #define SERIAL_EV_RLSD 0x0020 /* RLSD changed state */ /* Definitions for modem control event type */ #define SERIAL_EV_XOFF 0x40 /* XOFF received */ /* Definitions for line control of communication */ #define MX_WORDLENGTH_5 5 #define MX_WORDLENGTH_6 6 #define MX_WORDLENGTH_7 7 #define MX_WORDLENGTH_8 8 #define MX_PARITY_NONE 0 #define MX_PARITY_ODD 1 #define MX_PARITY_EVEN 2 #define MX_PARITY_MARK 3 #define MX_PARITY_SPACE 4 #define MX_STOP_BITS_1 0 #define MX_STOP_BITS_1_5 1 #define MX_STOP_BITS_2 2 #define MX_RTS_DISABLE 0x0 #define MX_RTS_ENABLE 0x1 #define MX_RTS_HW 0x2 #define MX_RTS_NO_CHANGE 0x3 /* Flag, not valid register value*/ #define MX_INT_RS232 0 #define MX_INT_2W_RS485 1 #define MX_INT_RS422 2 #define MX_INT_4W_RS485 3 /* Definitions for holding reason */ #define MX_WAIT_FOR_CTS 0x0001 #define MX_WAIT_FOR_DSR 0x0002 #define MX_WAIT_FOR_DCD 0x0004 #define MX_WAIT_FOR_XON 0x0008 #define MX_WAIT_FOR_START_TX 0x0010 #define MX_WAIT_FOR_UNTHROTTLE 0x0020 #define MX_WAIT_FOR_LOW_WATER 0x0040 #define MX_WAIT_FOR_SEND_NEXT 0x0080 #define MX_UPORT_2_PORT BIT(0) #define MX_UPORT_4_PORT BIT(1) #define MX_UPORT_8_PORT BIT(2) #define MX_UPORT_16_PORT BIT(3) /* This structure holds all of the local port information */ struct mxuport_port { u8 mcr_state; /* Last MCR state */ u8 msr_state; /* Last MSR state */ struct mutex mutex; /* Protects mcr_state */ spinlock_t spinlock; /* Protects msr_state */ }; /* Table of devices that work with this driver */ static const struct usb_device_id mxuport_idtable[] = { { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1250_PID), .driver_info = MX_UPORT_2_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1251_PID), .driver_info = MX_UPORT_2_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1410_PID), .driver_info = MX_UPORT_4_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1450_PID), .driver_info = MX_UPORT_4_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1451_PID), .driver_info = MX_UPORT_4_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1618_PID), .driver_info = MX_UPORT_8_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1658_PID), .driver_info = MX_UPORT_8_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1613_PID), .driver_info = MX_UPORT_16_PORT }, { USB_DEVICE(MX_USBSERIAL_VID, MX_UPORT1653_PID), .driver_info = MX_UPORT_16_PORT }, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, mxuport_idtable); /* * Add a four byte header containing the port number and the number of * bytes of data in the message. Return the number of bytes in the * buffer. */ static int mxuport_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size) { u8 *buf = dest; int count; count = kfifo_out_locked(&port->write_fifo, buf + HEADER_SIZE, size - HEADER_SIZE, &port->lock); put_unaligned_be16(port->port_number, buf); put_unaligned_be16(count, buf + 2); dev_dbg(&port->dev, "%s - size %zd count %d\n", __func__, size, count); return count + HEADER_SIZE; } /* Read the given buffer in from the control pipe. */ static int mxuport_recv_ctrl_urb(struct usb_serial *serial, u8 request, u16 value, u16 index, u8 *data, size_t size) { int status; status = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), request, (USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE), value, index, data, size, USB_CTRL_GET_TIMEOUT); if (status < 0) { dev_err(&serial->interface->dev, "%s - usb_control_msg failed (%d)\n", __func__, status); return status; } if (status != size) { dev_err(&serial->interface->dev, "%s - short read (%d / %zd)\n", __func__, status, size); return -EIO; } return status; } /* Write the given buffer out to the control pipe. */ static int mxuport_send_ctrl_data_urb(struct usb_serial *serial, u8 request, u16 value, u16 index, u8 *data, size_t size) { int status; status = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), request, (USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE), value, index, data, size, USB_CTRL_SET_TIMEOUT); if (status < 0) { dev_err(&serial->interface->dev, "%s - usb_control_msg failed (%d)\n", __func__, status); return status; } return 0; } /* Send a vendor request without any data */ static int mxuport_send_ctrl_urb(struct usb_serial *serial, u8 request, u16 value, u16 index) { return mxuport_send_ctrl_data_urb(serial, request, value, index, NULL, 0); } /* * mxuport_throttle - throttle function of driver * * This function is called by the tty driver when it wants to stop the * data being read from the port. Since all the data comes over one * bulk in endpoint, we cannot stop submitting urbs by setting * port->throttle. Instead tell the device to stop sending us data for * the port. */ static void mxuport_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; dev_dbg(&port->dev, "%s\n", __func__); mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_RX_HOST_EN, 0, port->port_number); } /* * mxuport_unthrottle - unthrottle function of driver * * This function is called by the tty driver when it wants to resume * the data being read from the port. Tell the device it can resume * sending us received data from the port. */ static void mxuport_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; dev_dbg(&port->dev, "%s\n", __func__); mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_RX_HOST_EN, 1, port->port_number); } /* * Processes one chunk of data received for a port. Mostly a copy of * usb_serial_generic_process_read_urb(). */ static void mxuport_process_read_urb_data(struct usb_serial_port *port, char *data, int size) { int i; if (port->sysrq) { for (i = 0; i < size; i++, data++) { if (!usb_serial_handle_sysrq_char(port, *data)) tty_insert_flip_char(&port->port, *data, TTY_NORMAL); } } else { tty_insert_flip_string(&port->port, data, size); } tty_flip_buffer_push(&port->port); } static void mxuport_msr_event(struct usb_serial_port *port, u8 buf[4]) { struct mxuport_port *mxport = usb_get_serial_port_data(port); u8 rcv_msr_hold = buf[2] & 0xF0; u16 rcv_msr_event = get_unaligned_be16(buf); unsigned long flags; if (rcv_msr_event == 0) return; /* Update MSR status */ spin_lock_irqsave(&mxport->spinlock, flags); dev_dbg(&port->dev, "%s - current MSR status = 0x%x\n", __func__, mxport->msr_state); if (rcv_msr_hold & UART_MSR_CTS) { mxport->msr_state |= UART_MSR_CTS; dev_dbg(&port->dev, "%s - CTS high\n", __func__); } else { mxport->msr_state &= ~UART_MSR_CTS; dev_dbg(&port->dev, "%s - CTS low\n", __func__); } if (rcv_msr_hold & UART_MSR_DSR) { mxport->msr_state |= UART_MSR_DSR; dev_dbg(&port->dev, "%s - DSR high\n", __func__); } else { mxport->msr_state &= ~UART_MSR_DSR; dev_dbg(&port->dev, "%s - DSR low\n", __func__); } if (rcv_msr_hold & UART_MSR_DCD) { mxport->msr_state |= UART_MSR_DCD; dev_dbg(&port->dev, "%s - DCD high\n", __func__); } else { mxport->msr_state &= ~UART_MSR_DCD; dev_dbg(&port->dev, "%s - DCD low\n", __func__); } spin_unlock_irqrestore(&mxport->spinlock, flags); if (rcv_msr_event & (SERIAL_EV_CTS | SERIAL_EV_DSR | SERIAL_EV_RLSD)) { if (rcv_msr_event & SERIAL_EV_CTS) { port->icount.cts++; dev_dbg(&port->dev, "%s - CTS change\n", __func__); } if (rcv_msr_event & SERIAL_EV_DSR) { port->icount.dsr++; dev_dbg(&port->dev, "%s - DSR change\n", __func__); } if (rcv_msr_event & SERIAL_EV_RLSD) { port->icount.dcd++; dev_dbg(&port->dev, "%s - DCD change\n", __func__); } wake_up_interruptible(&port->port.delta_msr_wait); } } static void mxuport_lsr_event(struct usb_serial_port *port, u8 buf[4]) { u8 lsr_event = buf[2]; if (lsr_event & UART_LSR_BI) { port->icount.brk++; dev_dbg(&port->dev, "%s - break error\n", __func__); } if (lsr_event & UART_LSR_FE) { port->icount.frame++; dev_dbg(&port->dev, "%s - frame error\n", __func__); } if (lsr_event & UART_LSR_PE) { port->icount.parity++; dev_dbg(&port->dev, "%s - parity error\n", __func__); } if (lsr_event & UART_LSR_OE) { port->icount.overrun++; dev_dbg(&port->dev, "%s - overrun error\n", __func__); } } /* * When something interesting happens, modem control lines XON/XOFF * etc, the device sends an event. Process these events. */ static void mxuport_process_read_urb_event(struct usb_serial_port *port, u8 buf[4], u32 event) { dev_dbg(&port->dev, "%s - receive event : %04x\n", __func__, event); switch (event) { case UPORT_EVENT_SEND_NEXT: /* * Sent as part of the flow control on device buffers. * Not currently used. */ break; case UPORT_EVENT_MSR: mxuport_msr_event(port, buf); break; case UPORT_EVENT_LSR: mxuport_lsr_event(port, buf); break; case UPORT_EVENT_MCR: /* * Event to indicate a change in XON/XOFF from the * peer. Currently not used. We just continue * sending the device data and it will buffer it if * needed. This event could be used for flow control * between the host and the device. */ break; default: dev_dbg(&port->dev, "Unexpected event\n"); break; } } /* * One URB can contain data for multiple ports. Demultiplex the data, * checking the port exists, is opened and the message is valid. */ static void mxuport_process_read_urb_demux_data(struct urb *urb) { struct usb_serial_port *port = urb->context; struct usb_serial *serial = port->serial; u8 *data = urb->transfer_buffer; u8 *end = data + urb->actual_length; struct usb_serial_port *demux_port; u8 *ch; u16 rcv_port; u16 rcv_len; while (data < end) { if (data + HEADER_SIZE > end) { dev_warn(&port->dev, "%s - message with short header\n", __func__); return; } rcv_port = get_unaligned_be16(data); if (rcv_port >= serial->num_ports) { dev_warn(&port->dev, "%s - message for invalid port\n", __func__); return; } demux_port = serial->port[rcv_port]; rcv_len = get_unaligned_be16(data + 2); if (!rcv_len || data + HEADER_SIZE + rcv_len > end) { dev_warn(&port->dev, "%s - short data\n", __func__); return; } if (tty_port_initialized(&demux_port->port)) { ch = data + HEADER_SIZE; mxuport_process_read_urb_data(demux_port, ch, rcv_len); } else { dev_dbg(&demux_port->dev, "%s - data for closed port\n", __func__); } data += HEADER_SIZE + rcv_len; } } /* * One URB can contain events for multiple ports. Demultiplex the event, * checking the port exists, and is opened. */ static void mxuport_process_read_urb_demux_event(struct urb *urb) { struct usb_serial_port *port = urb->context; struct usb_serial *serial = port->serial; u8 *data = urb->transfer_buffer; u8 *end = data + urb->actual_length; struct usb_serial_port *demux_port; u8 *ch; u16 rcv_port; u16 rcv_event; while (data < end) { if (data + EVENT_LENGTH > end) { dev_warn(&port->dev, "%s - message with short event\n", __func__); return; } rcv_port = get_unaligned_be16(data); if (rcv_port >= serial->num_ports) { dev_warn(&port->dev, "%s - message for invalid port\n", __func__); return; } demux_port = serial->port[rcv_port]; if (tty_port_initialized(&demux_port->port)) { ch = data + HEADER_SIZE; rcv_event = get_unaligned_be16(data + 2); mxuport_process_read_urb_event(demux_port, ch, rcv_event); } else { dev_dbg(&demux_port->dev, "%s - event for closed port\n", __func__); } data += EVENT_LENGTH; } } /* * This is called when we have received data on the bulk in * endpoint. Depending on which port it was received on, it can * contain serial data or events. */ static void mxuport_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; struct usb_serial *serial = port->serial; if (port == serial->port[0]) mxuport_process_read_urb_demux_data(urb); if (port == serial->port[1]) mxuport_process_read_urb_demux_event(urb); } /* * Ask the device how many bytes it has queued to be sent out. If * there are none, return true. */ static bool mxuport_tx_empty(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; bool is_empty = true; u32 txlen; u8 *len_buf; int err; len_buf = kzalloc(4, GFP_KERNEL); if (!len_buf) goto out; err = mxuport_recv_ctrl_urb(serial, RQ_VENDOR_GET_OUTQUEUE, 0, port->port_number, len_buf, 4); if (err < 0) goto out; txlen = get_unaligned_be32(len_buf); dev_dbg(&port->dev, "%s - tx len = %u\n", __func__, txlen); if (txlen != 0) is_empty = false; out: kfree(len_buf); return is_empty; } static int mxuport_set_mcr(struct usb_serial_port *port, u8 mcr_state) { struct usb_serial *serial = port->serial; int err; dev_dbg(&port->dev, "%s - %02x\n", __func__, mcr_state); err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_MCR, mcr_state, port->port_number); if (err) dev_err(&port->dev, "%s - failed to change MCR\n", __func__); return err; } static int mxuport_set_dtr(struct usb_serial_port *port, int on) { struct mxuport_port *mxport = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; int err; mutex_lock(&mxport->mutex); err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_DTR, !!on, port->port_number); if (!err) { if (on) mxport->mcr_state |= UART_MCR_DTR; else mxport->mcr_state &= ~UART_MCR_DTR; } mutex_unlock(&mxport->mutex); return err; } static int mxuport_set_rts(struct usb_serial_port *port, u8 state) { struct mxuport_port *mxport = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; int err; u8 mcr_state; mutex_lock(&mxport->mutex); mcr_state = mxport->mcr_state; switch (state) { case MX_RTS_DISABLE: mcr_state &= ~UART_MCR_RTS; break; case MX_RTS_ENABLE: mcr_state |= UART_MCR_RTS; break; case MX_RTS_HW: /* * Do not update mxport->mcr_state when doing hardware * flow control. */ break; default: /* * Should not happen, but somebody might try passing * MX_RTS_NO_CHANGE, which is not valid. */ err = -EINVAL; goto out; } err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_RTS, state, port->port_number); if (!err) mxport->mcr_state = mcr_state; out: mutex_unlock(&mxport->mutex); return err; } static void mxuport_dtr_rts(struct usb_serial_port *port, int on) { struct mxuport_port *mxport = usb_get_serial_port_data(port); u8 mcr_state; int err; mutex_lock(&mxport->mutex); mcr_state = mxport->mcr_state; if (on) mcr_state |= (UART_MCR_RTS | UART_MCR_DTR); else mcr_state &= ~(UART_MCR_RTS | UART_MCR_DTR); err = mxuport_set_mcr(port, mcr_state); if (!err) mxport->mcr_state = mcr_state; mutex_unlock(&mxport->mutex); } static int mxuport_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct mxuport_port *mxport = usb_get_serial_port_data(port); int err; u8 mcr_state; mutex_lock(&mxport->mutex); mcr_state = mxport->mcr_state; if (set & TIOCM_RTS) mcr_state |= UART_MCR_RTS; if (set & TIOCM_DTR) mcr_state |= UART_MCR_DTR; if (clear & TIOCM_RTS) mcr_state &= ~UART_MCR_RTS; if (clear & TIOCM_DTR) mcr_state &= ~UART_MCR_DTR; err = mxuport_set_mcr(port, mcr_state); if (!err) mxport->mcr_state = mcr_state; mutex_unlock(&mxport->mutex); return err; } static int mxuport_tiocmget(struct tty_struct *tty) { struct mxuport_port *mxport; struct usb_serial_port *port = tty->driver_data; unsigned int result; unsigned long flags; unsigned int msr; unsigned int mcr; mxport = usb_get_serial_port_data(port); mutex_lock(&mxport->mutex); spin_lock_irqsave(&mxport->spinlock, flags); msr = mxport->msr_state; mcr = mxport->mcr_state; spin_unlock_irqrestore(&mxport->spinlock, flags); mutex_unlock(&mxport->mutex); result = (((mcr & UART_MCR_DTR) ? TIOCM_DTR : 0) | /* 0x002 */ ((mcr & UART_MCR_RTS) ? TIOCM_RTS : 0) | /* 0x004 */ ((msr & UART_MSR_CTS) ? TIOCM_CTS : 0) | /* 0x020 */ ((msr & UART_MSR_DCD) ? TIOCM_CAR : 0) | /* 0x040 */ ((msr & UART_MSR_RI) ? TIOCM_RI : 0) | /* 0x080 */ ((msr & UART_MSR_DSR) ? TIOCM_DSR : 0)); /* 0x100 */ dev_dbg(&port->dev, "%s - 0x%04x\n", __func__, result); return result; } static int mxuport_set_termios_flow(struct tty_struct *tty, const struct ktermios *old_termios, struct usb_serial_port *port, struct usb_serial *serial) { u8 xon = START_CHAR(tty); u8 xoff = STOP_CHAR(tty); int enable; int err; u8 *buf; u8 rts; buf = kmalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; /* S/W flow control settings */ if (I_IXOFF(tty) || I_IXON(tty)) { enable = 1; buf[0] = xon; buf[1] = xoff; err = mxuport_send_ctrl_data_urb(serial, RQ_VENDOR_SET_CHARS, 0, port->port_number, buf, 2); if (err) goto out; dev_dbg(&port->dev, "%s - XON = 0x%02x, XOFF = 0x%02x\n", __func__, xon, xoff); } else { enable = 0; } err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_XONXOFF, enable, port->port_number); if (err) goto out; rts = MX_RTS_NO_CHANGE; /* H/W flow control settings */ if (!old_termios || C_CRTSCTS(tty) != (old_termios->c_cflag & CRTSCTS)) { if (C_CRTSCTS(tty)) rts = MX_RTS_HW; else rts = MX_RTS_ENABLE; } if (C_BAUD(tty)) { if (old_termios && (old_termios->c_cflag & CBAUD) == B0) { /* Raise DTR and RTS */ if (C_CRTSCTS(tty)) rts = MX_RTS_HW; else rts = MX_RTS_ENABLE; mxuport_set_dtr(port, 1); } } else { /* Drop DTR and RTS */ rts = MX_RTS_DISABLE; mxuport_set_dtr(port, 0); } if (rts != MX_RTS_NO_CHANGE) err = mxuport_set_rts(port, rts); out: kfree(buf); return err; } static void mxuport_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct usb_serial *serial = port->serial; u8 *buf; u8 data_bits; u8 stop_bits; u8 parity; int baud; int err; if (old_termios && !tty_termios_hw_change(&tty->termios, old_termios) && tty->termios.c_iflag == old_termios->c_iflag) { dev_dbg(&port->dev, "%s - nothing to change\n", __func__); return; } buf = kmalloc(4, GFP_KERNEL); if (!buf) return; /* Set data bit of termios */ switch (C_CSIZE(tty)) { case CS5: data_bits = MX_WORDLENGTH_5; break; case CS6: data_bits = MX_WORDLENGTH_6; break; case CS7: data_bits = MX_WORDLENGTH_7; break; case CS8: default: data_bits = MX_WORDLENGTH_8; break; } /* Set parity of termios */ if (C_PARENB(tty)) { if (C_CMSPAR(tty)) { if (C_PARODD(tty)) parity = MX_PARITY_MARK; else parity = MX_PARITY_SPACE; } else { if (C_PARODD(tty)) parity = MX_PARITY_ODD; else parity = MX_PARITY_EVEN; } } else { parity = MX_PARITY_NONE; } /* Set stop bit of termios */ if (C_CSTOPB(tty)) stop_bits = MX_STOP_BITS_2; else stop_bits = MX_STOP_BITS_1; buf[0] = data_bits; buf[1] = parity; buf[2] = stop_bits; buf[3] = 0; err = mxuport_send_ctrl_data_urb(serial, RQ_VENDOR_SET_LINE, 0, port->port_number, buf, 4); if (err) goto out; err = mxuport_set_termios_flow(tty, old_termios, port, serial); if (err) goto out; baud = tty_get_baud_rate(tty); if (!baud) baud = 9600; /* Note: Little Endian */ put_unaligned_le32(baud, buf); err = mxuport_send_ctrl_data_urb(serial, RQ_VENDOR_SET_BAUD, 0, port->port_number, buf, 4); if (err) goto out; dev_dbg(&port->dev, "baud_rate : %d\n", baud); dev_dbg(&port->dev, "data_bits : %d\n", data_bits); dev_dbg(&port->dev, "parity : %d\n", parity); dev_dbg(&port->dev, "stop_bits : %d\n", stop_bits); out: kfree(buf); } /* * Determine how many ports this device has dynamically. It will be * called after the probe() callback is called, but before attach(). */ static int mxuport_calc_num_ports(struct usb_serial *serial, struct usb_serial_endpoints *epds) { unsigned long features = (unsigned long)usb_get_serial_data(serial); int num_ports; int i; if (features & MX_UPORT_2_PORT) { num_ports = 2; } else if (features & MX_UPORT_4_PORT) { num_ports = 4; } else if (features & MX_UPORT_8_PORT) { num_ports = 8; } else if (features & MX_UPORT_16_PORT) { num_ports = 16; } else { dev_warn(&serial->interface->dev, "unknown device, assuming two ports\n"); num_ports = 2; } /* * Setup bulk-out endpoint multiplexing. All ports share the same * bulk-out endpoint. */ BUILD_BUG_ON(ARRAY_SIZE(epds->bulk_out) < 16); for (i = 1; i < num_ports; ++i) epds->bulk_out[i] = epds->bulk_out[0]; epds->num_bulk_out = num_ports; return num_ports; } /* Get the version of the firmware currently running. */ static int mxuport_get_fw_version(struct usb_serial *serial, u32 *version) { u8 *ver_buf; int err; ver_buf = kzalloc(4, GFP_KERNEL); if (!ver_buf) return -ENOMEM; /* Get firmware version from SDRAM */ err = mxuport_recv_ctrl_urb(serial, RQ_VENDOR_GET_VERSION, 0, 0, ver_buf, 4); if (err != 4) { err = -EIO; goto out; } *version = (ver_buf[0] << 16) | (ver_buf[1] << 8) | ver_buf[2]; err = 0; out: kfree(ver_buf); return err; } /* Given a firmware blob, download it to the device. */ static int mxuport_download_fw(struct usb_serial *serial, const struct firmware *fw_p) { u8 *fw_buf; size_t txlen; size_t fwidx; int err; fw_buf = kmalloc(DOWN_BLOCK_SIZE, GFP_KERNEL); if (!fw_buf) return -ENOMEM; dev_dbg(&serial->interface->dev, "Starting firmware download...\n"); err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_START_FW_DOWN, 0, 0); if (err) goto out; fwidx = 0; do { txlen = min_t(size_t, (fw_p->size - fwidx), DOWN_BLOCK_SIZE); memcpy(fw_buf, &fw_p->data[fwidx], txlen); err = mxuport_send_ctrl_data_urb(serial, RQ_VENDOR_FW_DATA, 0, 0, fw_buf, txlen); if (err) { mxuport_send_ctrl_urb(serial, RQ_VENDOR_STOP_FW_DOWN, 0, 0); goto out; } fwidx += txlen; usleep_range(1000, 2000); } while (fwidx < fw_p->size); msleep(1000); err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_STOP_FW_DOWN, 0, 0); if (err) goto out; msleep(1000); err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_QUERY_FW_READY, 0, 0); out: kfree(fw_buf); return err; } static int mxuport_probe(struct usb_serial *serial, const struct usb_device_id *id) { u16 productid = le16_to_cpu(serial->dev->descriptor.idProduct); const struct firmware *fw_p = NULL; u32 version; int local_ver; char buf[32]; int err; /* Load our firmware */ err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_QUERY_FW_CONFIG, 0, 0); if (err) { mxuport_send_ctrl_urb(serial, RQ_VENDOR_RESET_DEVICE, 0, 0); return err; } err = mxuport_get_fw_version(serial, &version); if (err < 0) return err; dev_dbg(&serial->interface->dev, "Device firmware version v%x.%x.%x\n", (version & 0xff0000) >> 16, (version & 0xff00) >> 8, (version & 0xff)); snprintf(buf, sizeof(buf) - 1, "moxa/moxa-%04x.fw", productid); err = request_firmware(&fw_p, buf, &serial->interface->dev); if (err) { dev_warn(&serial->interface->dev, "Firmware %s not found\n", buf); /* Use the firmware already in the device */ err = 0; } else { local_ver = ((fw_p->data[VER_ADDR_1] << 16) | (fw_p->data[VER_ADDR_2] << 8) | fw_p->data[VER_ADDR_3]); dev_dbg(&serial->interface->dev, "Available firmware version v%x.%x.%x\n", fw_p->data[VER_ADDR_1], fw_p->data[VER_ADDR_2], fw_p->data[VER_ADDR_3]); if (local_ver > version) { err = mxuport_download_fw(serial, fw_p); if (err) goto out; err = mxuport_get_fw_version(serial, &version); if (err < 0) goto out; } } dev_info(&serial->interface->dev, "Using device firmware version v%x.%x.%x\n", (version & 0xff0000) >> 16, (version & 0xff00) >> 8, (version & 0xff)); /* * Contains the features of this hardware. Store away for * later use, eg, number of ports. */ usb_set_serial_data(serial, (void *)id->driver_info); out: if (fw_p) release_firmware(fw_p); return err; } static int mxuport_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct mxuport_port *mxport; int err; mxport = devm_kzalloc(&port->dev, sizeof(struct mxuport_port), GFP_KERNEL); if (!mxport) return -ENOMEM; mutex_init(&mxport->mutex); spin_lock_init(&mxport->spinlock); /* Set the port private data */ usb_set_serial_port_data(port, mxport); /* Set FIFO (Enable) */ err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_FIFO_DISABLE, 0, port->port_number); if (err) return err; /* Set transmission mode (Hi-Performance) */ err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_HIGH_PERFOR, 0, port->port_number); if (err) return err; /* Set interface (RS-232) */ return mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_INTERFACE, MX_INT_RS232, port->port_number); } static int mxuport_attach(struct usb_serial *serial) { struct usb_serial_port *port0 = serial->port[0]; struct usb_serial_port *port1 = serial->port[1]; int err; /* * All data from the ports is received on the first bulk in * endpoint, with a multiplex header. The second bulk in is * used for events. * * Start to read from the device. */ err = usb_serial_generic_submit_read_urbs(port0, GFP_KERNEL); if (err) return err; err = usb_serial_generic_submit_read_urbs(port1, GFP_KERNEL); if (err) { usb_serial_generic_close(port0); return err; } return 0; } static void mxuport_release(struct usb_serial *serial) { struct usb_serial_port *port0 = serial->port[0]; struct usb_serial_port *port1 = serial->port[1]; usb_serial_generic_close(port1); usb_serial_generic_close(port0); } static int mxuport_open(struct tty_struct *tty, struct usb_serial_port *port) { struct mxuport_port *mxport = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; int err; /* Set receive host (enable) */ err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_RX_HOST_EN, 1, port->port_number); if (err) return err; err = mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_OPEN, 1, port->port_number); if (err) { mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_RX_HOST_EN, 0, port->port_number); return err; } /* Initial port termios */ if (tty) mxuport_set_termios(tty, port, NULL); /* * TODO: use RQ_VENDOR_GET_MSR, once we know what it * returns. */ mxport->msr_state = 0; return err; } static void mxuport_close(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_OPEN, 0, port->port_number); mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_RX_HOST_EN, 0, port->port_number); } /* Send a break to the port. */ static int mxuport_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; int enable; if (break_state == -1) { enable = 1; dev_dbg(&port->dev, "%s - sending break\n", __func__); } else { enable = 0; dev_dbg(&port->dev, "%s - clearing break\n", __func__); } return mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_BREAK, enable, port->port_number); } static int mxuport_resume(struct usb_serial *serial) { struct usb_serial_port *port; int c = 0; int i; int r; for (i = 0; i < 2; i++) { port = serial->port[i]; r = usb_serial_generic_submit_read_urbs(port, GFP_NOIO); if (r < 0) c++; } for (i = 0; i < serial->num_ports; i++) { port = serial->port[i]; if (!tty_port_initialized(&port->port)) continue; r = usb_serial_generic_write_start(port, GFP_NOIO); if (r < 0) c++; } return c ? -EIO : 0; } static struct usb_serial_driver mxuport_device = { .driver = { .name = "mxuport", }, .description = "MOXA UPort", .id_table = mxuport_idtable, .num_bulk_in = 2, .num_bulk_out = 1, .probe = mxuport_probe, .port_probe = mxuport_port_probe, .attach = mxuport_attach, .release = mxuport_release, .calc_num_ports = mxuport_calc_num_ports, .open = mxuport_open, .close = mxuport_close, .set_termios = mxuport_set_termios, .break_ctl = mxuport_break_ctl, .tx_empty = mxuport_tx_empty, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .throttle = mxuport_throttle, .unthrottle = mxuport_unthrottle, .tiocmget = mxuport_tiocmget, .tiocmset = mxuport_tiocmset, .dtr_rts = mxuport_dtr_rts, .process_read_urb = mxuport_process_read_urb, .prepare_write_buffer = mxuport_prepare_write_buffer, .resume = mxuport_resume, }; static struct usb_serial_driver *const serial_drivers[] = { &mxuport_device, NULL }; module_usb_serial_driver(serial_drivers, mxuport_idtable); MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>"); MODULE_AUTHOR("<support@moxa.com>"); MODULE_DESCRIPTION("Moxa UPORT USB Serial driver"); MODULE_LICENSE("GPL");
79 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_RTNH_H #define __NET_RTNH_H #include <linux/rtnetlink.h> #include <net/netlink.h> static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining) { return remaining >= (int)sizeof(*rtnh) && rtnh->rtnh_len >= sizeof(*rtnh) && rtnh->rtnh_len <= remaining; } static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh, int *remaining) { int totlen = NLA_ALIGN(rtnh->rtnh_len); *remaining -= totlen; return (struct rtnexthop *) ((char *) rtnh + totlen); } static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh) { return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh))); } static inline int rtnh_attrlen(const struct rtnexthop *rtnh) { return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh)); } #endif
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 // SPDX-License-Identifier: GPL-2.0 /* * HID driver for Maltron L90 * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2008 Jiri Slaby * Copyright (c) 2012 David Dillow <dave@thedillows.org> * Copyright (c) 2006-2013 Jiri Kosina * Copyright (c) 2013 Colin Leitner <colin.leitner@gmail.com> * Copyright (c) 2014-2016 Frank Praznik <frank.praznik@gmail.com> * Copyright (c) 2010 Richard Nauber <Richard.Nauber@gmail.com> * Copyright (c) 2016 Yuxuan Shui <yshuiv7@gmail.com> * Copyright (c) 2018 William Whistler <wtbw@wtbw.co.uk> */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" /* The original buggy USB descriptor */ static const u8 maltron_rdesc_o[] = { 0x05, 0x01, /* Usage Page (Generic Desktop Ctrls) */ 0x09, 0x80, /* Usage (Sys Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x02, /* Report ID (2) */ 0x75, 0x01, /* Report Size (1) */ 0x95, 0x01, /* Report Count (1) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x83, /* Usage (Sys Wake Up) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x75, 0x05, /* Report Size (5) */ 0x81, 0x01, /* Input (Const,Array,Abs) */ 0xC0, /* End Collection */ 0x05, 0x0C, /* Usage Page (Consumer) */ 0x09, 0x01, /* Usage (Consumer Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x03, /* Report ID (3) */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (Unassigned) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0xC0, /* End Collection */ 0x06, 0x7F, 0xFF, /* Usage Page (Vendor Defined 0xFF7F) */ 0x09, 0x01, /* Usage (0x01) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x04, /* Report ID (4) */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (0x00) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0x75, 0x02, /* Report Size (2) */ 0x25, 0x02, /* Logical Maximum (2) */ 0x09, 0x90, /* Usage (0x90) */ 0xB1, 0x02, /* Feature (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0xB1, 0x01, /* Feature (Const,Array,Abs) */ 0x75, 0x01, /* Report Size (1) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x05, 0x08, /* Usage Page (LEDs) */ 0x09, 0x2A, /* Usage (On-Line) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x09, 0x4B, /* Usage (Generic Indicator) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0x95, 0x01, /* Report Count (1) */ 0x91, 0x01, /* Output (Const,Array,Abs) */ 0xC0 /* End Collection */ }; /* The patched descriptor, allowing media key events to be accepted as valid */ static const u8 maltron_rdesc[] = { 0x05, 0x01, /* Usage Page (Generic Desktop Ctrls) */ 0x09, 0x80, /* Usage (Sys Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x02, /* Report ID (2) */ 0x75, 0x01, /* Report Size (1) */ 0x95, 0x01, /* Report Count (1) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x82, /* Usage (Sys Sleep) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x09, 0x83, /* Usage (Sys Wake Up) */ 0x81, 0x06, /* Input (Data,Var,Rel) */ 0x75, 0x05, /* Report Size (5) */ 0x81, 0x01, /* Input (Const,Array,Abs) */ 0xC0, /* End Collection */ 0x05, 0x0C, /* Usage Page (Consumer) */ 0x09, 0x01, /* Usage (Consumer Control) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x03, /* Report ID (3) */ 0x15, 0x00, /* Logical Minimum (0) - changed */ 0x26, 0xFF, 0x7F, /* Logical Maximum (32767) - changed */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (Unassigned) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0xC0, /* End Collection */ 0x06, 0x7F, 0xFF, /* Usage Page (Vendor Defined 0xFF7F) */ 0x09, 0x01, /* Usage (0x01) */ 0xA1, 0x01, /* Collection (Application) */ 0x85, 0x04, /* Report ID (4) */ 0x95, 0x01, /* Report Count (1) */ 0x75, 0x10, /* Report Size (16) */ 0x19, 0x00, /* Usage Minimum (0x00) */ 0x2A, 0xFF, 0x7F, /* Usage Maximum (0x7FFF) */ 0x81, 0x00, /* Input (Data,Array,Abs) */ 0x75, 0x02, /* Report Size (2) */ 0x25, 0x02, /* Logical Maximum (2) */ 0x09, 0x90, /* Usage (0x90) */ 0xB1, 0x02, /* Feature (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0xB1, 0x01, /* Feature (Const,Array,Abs) */ 0x75, 0x01, /* Report Size (1) */ 0x25, 0x01, /* Logical Maximum (1) */ 0x05, 0x08, /* Usage Page (LEDs) */ 0x09, 0x2A, /* Usage (On-Line) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x09, 0x4B, /* Usage (Generic Indicator) */ 0x91, 0x02, /* Output (Data,Var,Abs) */ 0x75, 0x06, /* Report Size (6) */ 0x95, 0x01, /* Report Count (1) */ 0x91, 0x01, /* Output (Const,Array,Abs) */ 0xC0 /* End Collection */ }; static const __u8 *maltron_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize == sizeof(maltron_rdesc_o) && !memcmp(maltron_rdesc_o, rdesc, sizeof(maltron_rdesc_o))) { hid_info(hdev, "Replacing Maltron L90 keyboard report descriptor\n"); *rsize = sizeof(maltron_rdesc); return maltron_rdesc; } return rdesc; } static const struct hid_device_id maltron_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ALCOR, USB_DEVICE_ID_ALCOR_MALTRON_KB)}, { } }; MODULE_DEVICE_TABLE(hid, maltron_devices); static struct hid_driver maltron_driver = { .name = "maltron", .id_table = maltron_devices, .report_fixup = maltron_report_fixup }; module_hid_driver(maltron_driver); MODULE_DESCRIPTION("HID driver for Maltron L90"); MODULE_LICENSE("GPL");
1 278 219 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_UTSNAME_H #define _LINUX_UTSNAME_H #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/err.h> #include <uapi/linux/utsname.h> enum uts_proc { UTS_PROC_ARCH, UTS_PROC_OSTYPE, UTS_PROC_OSRELEASE, UTS_PROC_VERSION, UTS_PROC_HOSTNAME, UTS_PROC_DOMAINNAME, }; struct user_namespace; extern struct user_namespace init_user_ns; struct uts_namespace { struct new_utsname name; struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; } __randomize_layout; extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS static inline void get_uts_ns(struct uts_namespace *ns) { refcount_inc(&ns->ns.count); } extern struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns); extern void free_uts_ns(struct uts_namespace *ns); static inline void put_uts_ns(struct uts_namespace *ns) { if (refcount_dec_and_test(&ns->ns.count)) free_uts_ns(ns); } void uts_ns_init(void); #else static inline void get_uts_ns(struct uts_namespace *ns) { } static inline void put_uts_ns(struct uts_namespace *ns) { } static inline struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns) { if (flags & CLONE_NEWUTS) return ERR_PTR(-EINVAL); return old_ns; } static inline void uts_ns_init(void) { } #endif #ifdef CONFIG_PROC_SYSCTL extern void uts_proc_notify(enum uts_proc proc); #else static inline void uts_proc_notify(enum uts_proc proc) { } #endif static inline struct new_utsname *utsname(void) { return &current->nsproxy->uts_ns->name; } static inline struct new_utsname *init_utsname(void) { return &init_uts_ns.name; } extern struct rw_semaphore uts_sem; #endif /* _LINUX_UTSNAME_H */
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_blackhole.c Black hole queue * * Authors: Thomas Graf <tgraf@suug.ch> * * Note: Quantum tunneling is not supported. */ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { qdisc_drop(skb, sch, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } static struct sk_buff *blackhole_dequeue(struct Qdisc *sch) { return NULL; } static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = { .id = "blackhole", .priv_size = 0, .enqueue = blackhole_enqueue, .dequeue = blackhole_dequeue, .peek = blackhole_dequeue, .owner = THIS_MODULE, }; static int __init blackhole_init(void) { return register_qdisc(&blackhole_qdisc_ops); } device_initcall(blackhole_init)
26 9 3 7 19 1 30 5 5 8 24 24 1 23 9 6 3 9 8 8 4 4 8 8 23 14 14 14 14 14 12 2 17 17 1 1 5 8 6 2 2 1 2 4 2 2 10 9 2 19 1 18 13 1 1 2 2 22 21 2 12 1 11 8 6 13 2 23 4 2 3 4 3 12 5 9 1 2 2 3 4 4 8 4 2 3 2 2 1 3 2 1 11 3 10 5 6 4 5 1 2 1 3 1 29 2 4 5 3 4 39 39 28 2 26 6 1 18 3 17 3 19 2 19 28 11 11 9 9 6 3 2 2 11 11 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 // SPDX-License-Identifier: GPL-2.0 /* * linux/ipc/shm.c * Copyright (C) 1992, 1993 Krishna Balasubramanian * Many improvements/fixes by Bruno Haible. * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. * * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> * BIGMEM support, Andrea Arcangeli <andrea@suse.de> * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> * HIGHMEM support, Ingo Molnar <mingo@redhat.com> * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> * * support for audit of ipc object properties and permission changes * Dustin Kirkland <dustin.kirkland@us.ibm.com> * * namespaces support * OpenVZ, SWsoft Inc. * Pavel Emelianov <xemul@openvz.org> * * Better ipc lock (kern_ipc_perm.lock) handling * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. */ #include <linux/slab.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/shm.h> #include <uapi/linux/shm.h> #include <linux/init.h> #include <linux/file.h> #include <linux/mman.h> #include <linux/shmem_fs.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/audit.h> #include <linux/capability.h> #include <linux/ptrace.h> #include <linux/seq_file.h> #include <linux/rwsem.h> #include <linux/nsproxy.h> #include <linux/mount.h> #include <linux/ipc_namespace.h> #include <linux/rhashtable.h> #include <linux/uaccess.h> #include "util.h" struct shmid_kernel /* private to the kernel */ { struct kern_ipc_perm shm_perm; struct file *shm_file; unsigned long shm_nattch; unsigned long shm_segsz; time64_t shm_atim; time64_t shm_dtim; time64_t shm_ctim; struct pid *shm_cprid; struct pid *shm_lprid; struct ucounts *mlock_ucounts; /* * The task created the shm object, for * task_lock(shp->shm_creator) */ struct task_struct *shm_creator; /* * List by creator. task_lock(->shm_creator) required for read/write. * If list_empty(), then the creator is dead already. */ struct list_head shm_clist; struct ipc_namespace *ns; } __randomize_layout; /* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ #define SHM_LOCKED 02000 /* segment will not be swapped */ struct shm_file_data { int id; struct ipc_namespace *ns; struct file *file; const struct vm_operations_struct *vm_ops; }; #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) static const struct file_operations shm_file_operations; static const struct vm_operations_struct shm_vm_ops; #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) #define shm_unlock(shp) \ ipc_unlock(&(shp)->shm_perm) static int newseg(struct ipc_namespace *, struct ipc_params *); static void shm_open(struct vm_area_struct *vma); static void shm_close(struct vm_area_struct *vma); static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif void shm_init_ns(struct ipc_namespace *ns) { ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; ns->shm_rmid_forced = 0; ns->shm_tot = 0; ipc_init_ids(&shm_ids(ns)); } /* * Called with shm_ids.rwsem (writer) and the shp structure locked. * Only shm_ids.rwsem remains locked on exit. */ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { struct shmid_kernel *shp; shp = container_of(ipcp, struct shmid_kernel, shm_perm); WARN_ON(ns != shp->ns); if (shp->shm_nattch) { shp->shm_perm.mode |= SHM_DEST; /* Do not find it any more */ ipc_set_key_private(&shm_ids(ns), &shp->shm_perm); shm_unlock(shp); } else shm_destroy(ns, shp); } #ifdef CONFIG_IPC_NS void shm_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &shm_ids(ns), do_shm_rmid); idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); } #endif static int __init ipc_ns_init(void) { shm_init_ns(&init_ipc_ns); return 0; } pure_initcall(ipc_ns_init); void __init shm_init(void) { ipc_init_proc_interface("sysvipc/shm", #if BITS_PER_LONG <= 32 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", #else " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", #endif IPC_SHM_IDS, sysvipc_shm_proc_show); } static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); return container_of(ipcp, struct shmid_kernel, shm_perm); } static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); return container_of(ipcp, struct shmid_kernel, shm_perm); } /* * shm_lock_(check_) routines are called in the paths where the rwsem * is not necessarily held. */ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp; rcu_read_lock(); ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); if (IS_ERR(ipcp)) goto err; ipc_lock_object(ipcp); /* * ipc_rmid() may have already freed the ID while ipc_lock_object() * was spinning: here verify that the structure is still valid. * Upon races with RMID, return -EIDRM, thus indicating that * the ID points to a removed identifier. */ if (ipc_valid_object(ipcp)) { /* return a locked ipc object upon success */ return container_of(ipcp, struct shmid_kernel, shm_perm); } ipc_unlock_object(ipcp); ipcp = ERR_PTR(-EIDRM); err: rcu_read_unlock(); /* * Callers of shm_lock() must validate the status of the returned ipc * object pointer and error out as appropriate. */ return ERR_CAST(ipcp); } static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) { rcu_read_lock(); ipc_lock_object(&ipcp->shm_perm); } static void shm_rcu_free(struct rcu_head *head) { struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm, rcu); struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel, shm_perm); security_shm_free(&shp->shm_perm); kfree(shp); } /* * It has to be called with shp locked. * It must be called before ipc_rmid() */ static inline void shm_clist_rm(struct shmid_kernel *shp) { struct task_struct *creator; /* ensure that shm_creator does not disappear */ rcu_read_lock(); /* * A concurrent exit_shm may do a list_del_init() as well. * Just do nothing if exit_shm already did the work */ if (!list_empty(&shp->shm_clist)) { /* * shp->shm_creator is guaranteed to be valid *only* * if shp->shm_clist is not empty. */ creator = shp->shm_creator; task_lock(creator); /* * list_del_init() is a nop if the entry was already removed * from the list. */ list_del_init(&shp->shm_clist); task_unlock(creator); } rcu_read_unlock(); } static inline void shm_rmid(struct shmid_kernel *s) { shm_clist_rm(s); ipc_rmid(&shm_ids(s->ns), &s->shm_perm); } static int __shm_open(struct shm_file_data *sfd) { struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); if (IS_ERR(shp)) return PTR_ERR(shp); if (shp->shm_file != sfd->file) { /* ID was reused */ shm_unlock(shp); return -EINVAL; } shp->shm_atim = ktime_get_real_seconds(); ipc_update_pid(&shp->shm_lprid, task_tgid(current)); shp->shm_nattch++; shm_unlock(shp); return 0; } /* This is called by fork, once for every shm attach. */ static void shm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); int err; /* Always call underlying open if present */ if (sfd->vm_ops->open) sfd->vm_ops->open(vma); err = __shm_open(sfd); /* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted. */ WARN_ON_ONCE(err); } /* * shm_destroy - free the struct shmid_kernel * * @ns: namespace * @shp: struct to free * * It has to be called with shp and shm_ids.rwsem (writer) locked, * but returns with shp unlocked and freed. */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { struct file *shm_file; shm_file = shp->shm_file; shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid(shp); shm_unlock(shp); if (!is_file_hugepages(shm_file)) shmem_lock(shm_file, 0, shp->mlock_ucounts); fput(shm_file); ipc_update_pid(&shp->shm_cprid, NULL); ipc_update_pid(&shp->shm_lprid, NULL); ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); } /* * shm_may_destroy - identifies whether shm segment should be destroyed now * * Returns true if and only if there are no active users of the segment and * one of the following is true: * * 1) shmctl(id, IPC_RMID, NULL) was called for this shp * * 2) sysctl kernel.shm_rmid_forced is set to 1. */ static bool shm_may_destroy(struct shmid_kernel *shp) { return (shp->shm_nattch == 0) && (shp->ns->shm_rmid_forced || (shp->shm_perm.mode & SHM_DEST)); } /* * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d. */ static void __shm_close(struct shm_file_data *sfd) { struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ shp = shm_lock(ns, sfd->id); /* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted. */ if (WARN_ON_ONCE(IS_ERR(shp))) goto done; /* no-op */ ipc_update_pid(&shp->shm_lprid, task_tgid(current)); shp->shm_dtim = ktime_get_real_seconds(); shp->shm_nattch--; if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); done: up_write(&shm_ids(ns).rwsem); } static void shm_close(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); /* Always call underlying close if present */ if (sfd->vm_ops->close) sfd->vm_ops->close(vma); __shm_close(sfd); } /* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_orphaned(int id, void *p, void *data) { struct ipc_namespace *ns = data; struct kern_ipc_perm *ipcp = p; struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); /* * We want to destroy segments without users and with already * exit'ed originating process. * * As shp->* are changed under rwsem, it's safe to skip shp locking. */ if (!list_empty(&shp->shm_clist)) return 0; if (shm_may_destroy(shp)) { shm_lock_by_ptr(shp); shm_destroy(ns, shp); } return 0; } void shm_destroy_orphaned(struct ipc_namespace *ns) { down_write(&shm_ids(ns).rwsem); if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); up_write(&shm_ids(ns).rwsem); } /* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task) { for (;;) { struct shmid_kernel *shp; struct ipc_namespace *ns; task_lock(task); if (list_empty(&task->sysvshm.shm_clist)) { task_unlock(task); break; } shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel, shm_clist); /* * 1) Get pointer to the ipc namespace. It is worth to say * that this pointer is guaranteed to be valid because * shp lifetime is always shorter than namespace lifetime * in which shp lives. * We taken task_lock it means that shp won't be freed. */ ns = shp->ns; /* * 2) If kernel.shm_rmid_forced is not set then only keep track of * which shmids are orphaned, so that a later set of the sysctl * can clean them up. */ if (!ns->shm_rmid_forced) goto unlink_continue; /* * 3) get a reference to the namespace. * The refcount could be already 0. If it is 0, then * the shm objects will be free by free_ipc_work(). */ ns = get_ipc_ns_not_zero(ns); if (!ns) { unlink_continue: list_del_init(&shp->shm_clist); task_unlock(task); continue; } /* * 4) get a reference to shp. * This cannot fail: shm_clist_rm() is called before * ipc_rmid(), thus the refcount cannot be 0. */ WARN_ON(!ipc_rcu_getref(&shp->shm_perm)); /* * 5) unlink the shm segment from the list of segments * created by current. * This must be done last. After unlinking, * only the refcounts obtained above prevent IPC_RMID * from destroying the segment or the namespace. */ list_del_init(&shp->shm_clist); task_unlock(task); /* * 6) we have all references * Thus lock & if needed destroy shp. */ down_write(&shm_ids(ns).rwsem); shm_lock_by_ptr(shp); /* * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's * safe to call ipc_rcu_putref here */ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); if (ipc_valid_object(&shp->shm_perm)) { if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); } else { /* * Someone else deleted the shp from namespace * idr/kht while we have waited. * Just unlock and continue. */ shm_unlock(shp); } up_write(&shm_ids(ns).rwsem); put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */ } } static vm_fault_t shm_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); return sfd->vm_ops->fault(vmf); } static int shm_may_split(struct vm_area_struct *vma, unsigned long addr) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); if (sfd->vm_ops->may_split) return sfd->vm_ops->may_split(vma, addr); return 0; } static unsigned long shm_pagesize(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); if (sfd->vm_ops->pagesize) return sfd->vm_ops->pagesize(vma); return PAGE_SIZE; } #ifdef CONFIG_NUMA static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) { struct shm_file_data *sfd = shm_file_data(vma->vm_file); int err = 0; if (sfd->vm_ops->set_policy) err = sfd->vm_ops->set_policy(vma, mpol); return err; } static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx) { struct shm_file_data *sfd = shm_file_data(vma->vm_file); struct mempolicy *mpol = vma->vm_policy; if (sfd->vm_ops->get_policy) mpol = sfd->vm_ops->get_policy(vma, addr, ilx); return mpol; } #endif static int shm_mmap(struct file *file, struct vm_area_struct *vma) { struct shm_file_data *sfd = shm_file_data(file); int ret; /* * In case of remap_file_pages() emulation, the file can represent an * IPC ID that was removed, and possibly even reused by another shm * segment already. Propagate this case as an error to caller. */ ret = __shm_open(sfd); if (ret) return ret; ret = call_mmap(sfd->file, vma); if (ret) { __shm_close(sfd); return ret; } sfd->vm_ops = vma->vm_ops; #ifdef CONFIG_MMU WARN_ON(!sfd->vm_ops->fault); #endif vma->vm_ops = &shm_vm_ops; return 0; } static int shm_release(struct inode *ino, struct file *file) { struct shm_file_data *sfd = shm_file_data(file); put_ipc_ns(sfd->ns); fput(sfd->file); shm_file_data(file) = NULL; kfree(sfd); return 0; } static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct shm_file_data *sfd = shm_file_data(file); if (!sfd->file->f_op->fsync) return -EINVAL; return sfd->file->f_op->fsync(sfd->file, start, end, datasync); } static long shm_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct shm_file_data *sfd = shm_file_data(file); if (!sfd->file->f_op->fallocate) return -EOPNOTSUPP; return sfd->file->f_op->fallocate(file, mode, offset, len); } static unsigned long shm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct shm_file_data *sfd = shm_file_data(file); return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, pgoff, flags); } static const struct file_operations shm_file_operations = { .mmap = shm_mmap, .fsync = shm_fsync, .release = shm_release, .get_unmapped_area = shm_get_unmapped_area, .llseek = noop_llseek, .fallocate = shm_fallocate, }; /* * shm_file_operations_huge is now identical to shm_file_operations * except for fop_flags */ static const struct file_operations shm_file_operations_huge = { .mmap = shm_mmap, .fsync = shm_fsync, .release = shm_release, .get_unmapped_area = shm_get_unmapped_area, .llseek = noop_llseek, .fallocate = shm_fallocate, .fop_flags = FOP_HUGE_PAGES, }; static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ .fault = shm_fault, .may_split = shm_may_split, .pagesize = shm_pagesize, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, #endif }; /** * newseg - Create a new shared memory segment * @ns: namespace * @params: ptr to the structure that contains key, size and shmflg * * Called with shm_ids.rwsem held as a writer. */ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) { key_t key = params->key; int shmflg = params->flg; size_t size = params->u.size; int error; struct shmid_kernel *shp; size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; struct file *file; char name[13]; vm_flags_t acctflag = 0; if (size < SHMMIN || size > ns->shm_ctlmax) return -EINVAL; if (numpages << PAGE_SHIFT < size) return -ENOSPC; if (ns->shm_tot + numpages < ns->shm_tot || ns->shm_tot + numpages > ns->shm_ctlall) return -ENOSPC; shp = kmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT); if (unlikely(!shp)) return -ENOMEM; shp->shm_perm.key = key; shp->shm_perm.mode = (shmflg & S_IRWXUGO); shp->mlock_ucounts = NULL; shp->shm_perm.security = NULL; error = security_shm_alloc(&shp->shm_perm); if (error) { kfree(shp); return error; } sprintf(name, "SYSV%08x", key); if (shmflg & SHM_HUGETLB) { struct hstate *hs; size_t hugesize; hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); if (!hs) { error = -EINVAL; goto no_file; } hugesize = ALIGN(size, huge_page_size(hs)); /* hugetlb_file_setup applies strict accounting */ if (shmflg & SHM_NORESERVE) acctflag = VM_NORESERVE; file = hugetlb_file_setup(name, hugesize, acctflag, HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); } else { /* * Do not allow no accounting for OVERCOMMIT_NEVER, even * if it's asked for. */ if ((shmflg & SHM_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) acctflag = VM_NORESERVE; file = shmem_kernel_file_setup(name, size, acctflag); } error = PTR_ERR(file); if (IS_ERR(file)) goto no_file; shp->shm_cprid = get_pid(task_tgid(current)); shp->shm_lprid = NULL; shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = ktime_get_real_seconds(); shp->shm_segsz = size; shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; /* ipc_addid() locks shp upon success. */ error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); if (error < 0) goto no_id; shp->ns = ns; task_lock(current); list_add(&shp->shm_clist, &current->sysvshm.shm_clist); task_unlock(current); /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. */ file_inode(file)->i_ino = shp->shm_perm.id; ns->shm_tot += numpages; error = shp->shm_perm.id; ipc_unlock_object(&shp->shm_perm); rcu_read_unlock(); return error; no_id: ipc_update_pid(&shp->shm_cprid, NULL); ipc_update_pid(&shp->shm_lprid, NULL); fput(file); ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); return error; no_file: call_rcu(&shp->shm_perm.rcu, shm_rcu_free); return error; } /* * Called with shm_ids.rwsem and ipcp locked. */ static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) { struct shmid_kernel *shp; shp = container_of(ipcp, struct shmid_kernel, shm_perm); if (shp->shm_segsz < params->u.size) return -EINVAL; return 0; } long ksys_shmget(key_t key, size_t size, int shmflg) { struct ipc_namespace *ns; static const struct ipc_ops shm_ops = { .getnew = newseg, .associate = security_shm_associate, .more_checks = shm_more_checks, }; struct ipc_params shm_params; ns = current->nsproxy->ipc_ns; shm_params.key = key; shm_params.flg = shmflg; shm_params.u.size = size; return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); } SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) { return ksys_shmget(key, size, shmflg); } static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) { switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: { struct shmid_ds out; memset(&out, 0, sizeof(out)); ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); out.shm_segsz = in->shm_segsz; out.shm_atime = in->shm_atime; out.shm_dtime = in->shm_dtime; out.shm_ctime = in->shm_ctime; out.shm_cpid = in->shm_cpid; out.shm_lpid = in->shm_lpid; out.shm_nattch = in->shm_nattch; return copy_to_user(buf, &out, sizeof(out)); } default: return -EINVAL; } } static inline unsigned long copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) { switch (version) { case IPC_64: if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; return 0; case IPC_OLD: { struct shmid_ds tbuf_old; if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; out->shm_perm.uid = tbuf_old.shm_perm.uid; out->shm_perm.gid = tbuf_old.shm_perm.gid; out->shm_perm.mode = tbuf_old.shm_perm.mode; return 0; } default: return -EINVAL; } } static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) { switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: { struct shminfo out; if (in->shmmax > INT_MAX) out.shmmax = INT_MAX; else out.shmmax = (int)in->shmmax; out.shmmin = in->shmmin; out.shmmni = in->shmmni; out.shmseg = in->shmseg; out.shmall = in->shmall; return copy_to_user(buf, &out, sizeof(out)); } default: return -EINVAL; } } /* * Calculate and add used RSS and swap pages of a shm. * Called with shm_ids.rwsem held as a reader */ static void shm_add_rss_swap(struct shmid_kernel *shp, unsigned long *rss_add, unsigned long *swp_add) { struct inode *inode; inode = file_inode(shp->shm_file); if (is_file_hugepages(shp->shm_file)) { struct address_space *mapping = inode->i_mapping; struct hstate *h = hstate_file(shp->shm_file); *rss_add += pages_per_huge_page(h) * mapping->nrpages; } else { #ifdef CONFIG_SHMEM struct shmem_inode_info *info = SHMEM_I(inode); spin_lock_irq(&info->lock); *rss_add += inode->i_mapping->nrpages; *swp_add += info->swapped; spin_unlock_irq(&info->lock); #else *rss_add += inode->i_mapping->nrpages; #endif } } /* * Called with shm_ids.rwsem held as a reader */ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, unsigned long *swp) { int next_id; int total, in_use; *rss = 0; *swp = 0; in_use = shm_ids(ns).in_use; for (total = 0, next_id = 0; total < in_use; next_id++) { struct kern_ipc_perm *ipc; struct shmid_kernel *shp; ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); if (ipc == NULL) continue; shp = container_of(ipc, struct shmid_kernel, shm_perm); shm_add_rss_swap(shp, rss, swp); total++; } } /* * This function handles some shmctl commands which require the rwsem * to be held in write mode. * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *shmid64) { struct kern_ipc_perm *ipcp; struct shmid_kernel *shp; int err; down_write(&shm_ids(ns).rwsem); rcu_read_lock(); ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd, &shmid64->shm_perm, 0); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); goto out_unlock1; } shp = container_of(ipcp, struct shmid_kernel, shm_perm); err = security_shm_shmctl(&shp->shm_perm, cmd); if (err) goto out_unlock1; switch (cmd) { case IPC_RMID: ipc_lock_object(&shp->shm_perm); /* do_shm_rmid unlocks the ipc object and rcu */ do_shm_rmid(ns, ipcp); goto out_up; case IPC_SET: ipc_lock_object(&shp->shm_perm); err = ipc_update_perm(&shmid64->shm_perm, ipcp); if (err) goto out_unlock0; shp->shm_ctim = ktime_get_real_seconds(); break; default: err = -EINVAL; goto out_unlock1; } out_unlock0: ipc_unlock_object(&shp->shm_perm); out_unlock1: rcu_read_unlock(); out_up: up_write(&shm_ids(ns).rwsem); return err; } static int shmctl_ipc_info(struct ipc_namespace *ns, struct shminfo64 *shminfo) { int err = security_shm_shmctl(NULL, IPC_INFO); if (!err) { memset(shminfo, 0, sizeof(*shminfo)); shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni; shminfo->shmmax = ns->shm_ctlmax; shminfo->shmall = ns->shm_ctlall; shminfo->shmmin = SHMMIN; down_read(&shm_ids(ns).rwsem); err = ipc_get_maxidx(&shm_ids(ns)); up_read(&shm_ids(ns).rwsem); if (err < 0) err = 0; } return err; } static int shmctl_shm_info(struct ipc_namespace *ns, struct shm_info *shm_info) { int err = security_shm_shmctl(NULL, SHM_INFO); if (!err) { memset(shm_info, 0, sizeof(*shm_info)); down_read(&shm_ids(ns).rwsem); shm_info->used_ids = shm_ids(ns).in_use; shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp); shm_info->shm_tot = ns->shm_tot; shm_info->swap_attempts = 0; shm_info->swap_successes = 0; err = ipc_get_maxidx(&shm_ids(ns)); up_read(&shm_ids(ns).rwsem); if (err < 0) err = 0; } return err; } static int shmctl_stat(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *tbuf) { struct shmid_kernel *shp; int err; memset(tbuf, 0, sizeof(*tbuf)); rcu_read_lock(); if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) { shp = shm_obtain_object(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out_unlock; } } else { /* IPC_STAT */ shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out_unlock; } } /* * Semantically SHM_STAT_ANY ought to be identical to * that functionality provided by the /proc/sysvipc/ * interface. As such, only audit these calls and * do not do traditional S_IRUGO permission checks on * the ipc object. */ if (cmd == SHM_STAT_ANY) audit_ipc_obj(&shp->shm_perm); else { err = -EACCES; if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) goto out_unlock; } err = security_shm_shmctl(&shp->shm_perm, cmd); if (err) goto out_unlock; ipc_lock_object(&shp->shm_perm); if (!ipc_valid_object(&shp->shm_perm)) { ipc_unlock_object(&shp->shm_perm); err = -EIDRM; goto out_unlock; } kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm); tbuf->shm_segsz = shp->shm_segsz; tbuf->shm_atime = shp->shm_atim; tbuf->shm_dtime = shp->shm_dtim; tbuf->shm_ctime = shp->shm_ctim; #ifndef CONFIG_64BIT tbuf->shm_atime_high = shp->shm_atim >> 32; tbuf->shm_dtime_high = shp->shm_dtim >> 32; tbuf->shm_ctime_high = shp->shm_ctim >> 32; #endif tbuf->shm_cpid = pid_vnr(shp->shm_cprid); tbuf->shm_lpid = pid_vnr(shp->shm_lprid); tbuf->shm_nattch = shp->shm_nattch; if (cmd == IPC_STAT) { /* * As defined in SUS: * Return 0 on success */ err = 0; } else { /* * SHM_STAT and SHM_STAT_ANY (both Linux specific) * Return the full id, including the sequence number */ err = shp->shm_perm.id; } ipc_unlock_object(&shp->shm_perm); out_unlock: rcu_read_unlock(); return err; } static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) { struct shmid_kernel *shp; struct file *shm_file; int err; rcu_read_lock(); shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out_unlock1; } audit_ipc_obj(&(shp->shm_perm)); err = security_shm_shmctl(&shp->shm_perm, cmd); if (err) goto out_unlock1; ipc_lock_object(&shp->shm_perm); /* check if shm_destroy() is tearing down shp */ if (!ipc_valid_object(&shp->shm_perm)) { err = -EIDRM; goto out_unlock0; } if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { kuid_t euid = current_euid(); if (!uid_eq(euid, shp->shm_perm.uid) && !uid_eq(euid, shp->shm_perm.cuid)) { err = -EPERM; goto out_unlock0; } if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { err = -EPERM; goto out_unlock0; } } shm_file = shp->shm_file; if (is_file_hugepages(shm_file)) goto out_unlock0; if (cmd == SHM_LOCK) { struct ucounts *ucounts = current_ucounts(); err = shmem_lock(shm_file, 1, ucounts); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { shp->shm_perm.mode |= SHM_LOCKED; shp->mlock_ucounts = ucounts; } goto out_unlock0; } /* SHM_UNLOCK */ if (!(shp->shm_perm.mode & SHM_LOCKED)) goto out_unlock0; shmem_lock(shm_file, 0, shp->mlock_ucounts); shp->shm_perm.mode &= ~SHM_LOCKED; shp->mlock_ucounts = NULL; get_file(shm_file); ipc_unlock_object(&shp->shm_perm); rcu_read_unlock(); shmem_unlock_mapping(shm_file->f_mapping); fput(shm_file); return err; out_unlock0: ipc_unlock_object(&shp->shm_perm); out_unlock1: rcu_read_unlock(); return err; } static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version) { int err; struct ipc_namespace *ns; struct shmid64_ds sem64; if (cmd < 0 || shmid < 0) return -EINVAL; ns = current->nsproxy->ipc_ns; switch (cmd) { case IPC_INFO: { struct shminfo64 shminfo; err = shmctl_ipc_info(ns, &shminfo); if (err < 0) return err; if (copy_shminfo_to_user(buf, &shminfo, version)) err = -EFAULT; return err; } case SHM_INFO: { struct shm_info shm_info; err = shmctl_shm_info(ns, &shm_info); if (err < 0) return err; if (copy_to_user(buf, &shm_info, sizeof(shm_info))) err = -EFAULT; return err; } case SHM_STAT: case SHM_STAT_ANY: case IPC_STAT: { err = shmctl_stat(ns, shmid, cmd, &sem64); if (err < 0) return err; if (copy_shmid_to_user(buf, &sem64, version)) err = -EFAULT; return err; } case IPC_SET: if (copy_shmid_from_user(&sem64, buf, version)) return -EFAULT; fallthrough; case IPC_RMID: return shmctl_down(ns, shmid, cmd, &sem64); case SHM_LOCK: case SHM_UNLOCK: return shmctl_do_lock(ns, shmid, cmd); default: return -EINVAL; } } SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { return ksys_shmctl(shmid, cmd, buf, IPC_64); } #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) { int version = ipc_parse_version(&cmd); return ksys_shmctl(shmid, cmd, buf, version); } SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { return ksys_old_shmctl(shmid, cmd, buf); } #endif #ifdef CONFIG_COMPAT struct compat_shmid_ds { struct compat_ipc_perm shm_perm; int shm_segsz; old_time32_t shm_atime; old_time32_t shm_dtime; old_time32_t shm_ctime; compat_ipc_pid_t shm_cpid; compat_ipc_pid_t shm_lpid; unsigned short shm_nattch; unsigned short shm_unused; compat_uptr_t shm_unused2; compat_uptr_t shm_unused3; }; struct compat_shminfo64 { compat_ulong_t shmmax; compat_ulong_t shmmin; compat_ulong_t shmmni; compat_ulong_t shmseg; compat_ulong_t shmall; compat_ulong_t __unused1; compat_ulong_t __unused2; compat_ulong_t __unused3; compat_ulong_t __unused4; }; struct compat_shm_info { compat_int_t used_ids; compat_ulong_t shm_tot, shm_rss, shm_swp; compat_ulong_t swap_attempts, swap_successes; }; static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) { if (in->shmmax > INT_MAX) in->shmmax = INT_MAX; if (version == IPC_64) { struct compat_shminfo64 info; memset(&info, 0, sizeof(info)); info.shmmax = in->shmmax; info.shmmin = in->shmmin; info.shmmni = in->shmmni; info.shmseg = in->shmseg; info.shmall = in->shmall; return copy_to_user(buf, &info, sizeof(info)); } else { struct shminfo info; memset(&info, 0, sizeof(info)); info.shmmax = in->shmmax; info.shmmin = in->shmmin; info.shmmni = in->shmmni; info.shmseg = in->shmseg; info.shmall = in->shmall; return copy_to_user(buf, &info, sizeof(info)); } } static int put_compat_shm_info(struct shm_info *ip, struct compat_shm_info __user *uip) { struct compat_shm_info info; memset(&info, 0, sizeof(info)); info.used_ids = ip->used_ids; info.shm_tot = ip->shm_tot; info.shm_rss = ip->shm_rss; info.shm_swp = ip->shm_swp; info.swap_attempts = ip->swap_attempts; info.swap_successes = ip->swap_successes; return copy_to_user(uip, &info, sizeof(info)); } static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) { if (version == IPC_64) { struct compat_shmid64_ds v; memset(&v, 0, sizeof(v)); to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm); v.shm_atime = lower_32_bits(in->shm_atime); v.shm_atime_high = upper_32_bits(in->shm_atime); v.shm_dtime = lower_32_bits(in->shm_dtime); v.shm_dtime_high = upper_32_bits(in->shm_dtime); v.shm_ctime = lower_32_bits(in->shm_ctime); v.shm_ctime_high = upper_32_bits(in->shm_ctime); v.shm_segsz = in->shm_segsz; v.shm_nattch = in->shm_nattch; v.shm_cpid = in->shm_cpid; v.shm_lpid = in->shm_lpid; return copy_to_user(buf, &v, sizeof(v)); } else { struct compat_shmid_ds v; memset(&v, 0, sizeof(v)); to_compat_ipc_perm(&v.shm_perm, &in->shm_perm); v.shm_perm.key = in->shm_perm.key; v.shm_atime = in->shm_atime; v.shm_dtime = in->shm_dtime; v.shm_ctime = in->shm_ctime; v.shm_segsz = in->shm_segsz; v.shm_nattch = in->shm_nattch; v.shm_cpid = in->shm_cpid; v.shm_lpid = in->shm_lpid; return copy_to_user(buf, &v, sizeof(v)); } } static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) { memset(out, 0, sizeof(*out)); if (version == IPC_64) { struct compat_shmid64_ds __user *p = buf; return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm); } else { struct compat_shmid_ds __user *p = buf; return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm); } } static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version) { struct ipc_namespace *ns; struct shmid64_ds sem64; int err; ns = current->nsproxy->ipc_ns; if (cmd < 0 || shmid < 0) return -EINVAL; switch (cmd) { case IPC_INFO: { struct shminfo64 shminfo; err = shmctl_ipc_info(ns, &shminfo); if (err < 0) return err; if (copy_compat_shminfo_to_user(uptr, &shminfo, version)) err = -EFAULT; return err; } case SHM_INFO: { struct shm_info shm_info; err = shmctl_shm_info(ns, &shm_info); if (err < 0) return err; if (put_compat_shm_info(&shm_info, uptr)) err = -EFAULT; return err; } case IPC_STAT: case SHM_STAT_ANY: case SHM_STAT: err = shmctl_stat(ns, shmid, cmd, &sem64); if (err < 0) return err; if (copy_compat_shmid_to_user(uptr, &sem64, version)) err = -EFAULT; return err; case IPC_SET: if (copy_compat_shmid_from_user(&sem64, uptr, version)) return -EFAULT; fallthrough; case IPC_RMID: return shmctl_down(ns, shmid, cmd, &sem64); case SHM_LOCK: case SHM_UNLOCK: return shmctl_do_lock(ns, shmid, cmd); default: return -EINVAL; } return err; } COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) { return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64); } #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr) { int version = compat_ipc_parse_version(&cmd); return compat_ksys_shmctl(shmid, cmd, uptr, version); } COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr) { return compat_ksys_old_shmctl(shmid, cmd, uptr); } #endif #endif /* * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. * * NOTE! Despite the name, this is NOT a direct system call entrypoint. The * "raddr" thing points to kernel space, and there has to be a wrapper around * this. */ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, unsigned long shmlba) { struct shmid_kernel *shp; unsigned long addr = (unsigned long)shmaddr; unsigned long size; struct file *file, *base; int err; unsigned long flags = MAP_SHARED; unsigned long prot; int acc_mode; struct ipc_namespace *ns; struct shm_file_data *sfd; int f_flags; unsigned long populate = 0; err = -EINVAL; if (shmid < 0) goto out; if (addr) { if (addr & (shmlba - 1)) { if (shmflg & SHM_RND) { addr &= ~(shmlba - 1); /* round down */ /* * Ensure that the round-down is non-nil * when remapping. This can happen for * cases when addr < shmlba. */ if (!addr && (shmflg & SHM_REMAP)) goto out; } else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) #endif goto out; } flags |= MAP_FIXED; } else if ((shmflg & SHM_REMAP)) goto out; if (shmflg & SHM_RDONLY) { prot = PROT_READ; acc_mode = S_IRUGO; f_flags = O_RDONLY; } else { prot = PROT_READ | PROT_WRITE; acc_mode = S_IRUGO | S_IWUGO; f_flags = O_RDWR; } if (shmflg & SHM_EXEC) { prot |= PROT_EXEC; acc_mode |= S_IXUGO; } /* * We cannot rely on the fs check since SYSV IPC does have an * additional creator id... */ ns = current->nsproxy->ipc_ns; rcu_read_lock(); shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out_unlock; } err = -EACCES; if (ipcperms(ns, &shp->shm_perm, acc_mode)) goto out_unlock; err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg); if (err) goto out_unlock; ipc_lock_object(&shp->shm_perm); /* check if shm_destroy() is tearing down shp */ if (!ipc_valid_object(&shp->shm_perm)) { ipc_unlock_object(&shp->shm_perm); err = -EIDRM; goto out_unlock; } /* * We need to take a reference to the real shm file to prevent the * pointer from becoming stale in cases where the lifetime of the outer * file extends beyond that of the shm segment. It's not usually * possible, but it can happen during remap_file_pages() emulation as * that unmaps the memory, then does ->mmap() via file reference only. * We'll deny the ->mmap() if the shm segment was since removed, but to * detect shm ID reuse we need to compare the file pointers. */ base = get_file(shp->shm_file); shp->shm_nattch++; size = i_size_read(file_inode(base)); ipc_unlock_object(&shp->shm_perm); rcu_read_unlock(); err = -ENOMEM; sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); if (!sfd) { fput(base); goto out_nattch; } file = alloc_file_clone(base, f_flags, is_file_hugepages(base) ? &shm_file_operations_huge : &shm_file_operations); err = PTR_ERR(file); if (IS_ERR(file)) { kfree(sfd); fput(base); goto out_nattch; } sfd->id = shp->shm_perm.id; sfd->ns = get_ipc_ns(ns); sfd->file = base; sfd->vm_ops = NULL; file->private_data = sfd; err = security_mmap_file(file, prot, flags); if (err) goto out_fput; if (mmap_write_lock_killable(current->mm)) { err = -EINTR; goto out_fput; } if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; if (addr + size < addr) goto invalid; if (find_vma_intersection(current->mm, addr, addr + size)) goto invalid; } addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL); *raddr = addr; err = 0; if (IS_ERR_VALUE(addr)) err = (long)addr; invalid: mmap_write_unlock(current->mm); if (populate) mm_populate(addr, populate); out_fput: fput(file); out_nattch: down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); shp->shm_nattch--; if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); up_write(&shm_ids(ns).rwsem); return err; out_unlock: rcu_read_unlock(); out: return err; } SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) { unsigned long ret; long err; err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; force_successful_syscall_return(); return (long)ret; } #ifdef CONFIG_COMPAT #ifndef COMPAT_SHMLBA #define COMPAT_SHMLBA SHMLBA #endif COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) { unsigned long ret; long err; err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA); if (err) return err; force_successful_syscall_return(); return (long)ret; } #endif /* * detach and kill segment if marked destroyed. * The work is done in shm_close. */ long ksys_shmdt(char __user *shmaddr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long addr = (unsigned long)shmaddr; int retval = -EINVAL; #ifdef CONFIG_MMU loff_t size = 0; struct file *file; VMA_ITERATOR(vmi, mm, addr); #endif if (addr & ~PAGE_MASK) return retval; if (mmap_write_lock_killable(mm)) return -EINTR; /* * This function tries to be smart and unmap shm segments that * were modified by partial mlock or munmap calls: * - It first determines the size of the shm segment that should be * unmapped: It searches for a vma that is backed by shm and that * started at address shmaddr. It records it's size and then unmaps * it. * - Then it unmaps all shm vmas that started at shmaddr and that * are within the initially determined size and that are from the * same shm segment from which we determined the size. * Errors from do_munmap are ignored: the function only fails if * it's called with invalid parameters or if it's called to unmap * a part of a vma. Both calls in this function are for full vmas, * the parameters are directly copied from the vma itself and always * valid - therefore do_munmap cannot fail. (famous last words?) */ /* * If it had been mremap()'d, the starting address would not * match the usual checks anyway. So assume all vma's are * above the starting address given. */ #ifdef CONFIG_MMU for_each_vma(vmi, vma) { /* * Check if the starting address would match, i.e. it's * a fragment created by mprotect() and/or munmap(), or it * otherwise it starts at this address with no hassles. */ if ((vma->vm_ops == &shm_vm_ops) && (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { /* * Record the file of the shm segment being * unmapped. With mremap(), someone could place * page from another segment but with equal offsets * in the range we are unmapping. */ file = vma->vm_file; size = i_size_read(file_inode(vma->vm_file)); do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start, vma->vm_end, NULL, false); /* * We discovered the size of the shm segment, so * break out of here and fall through to the next * loop that uses the size information to stop * searching for matching vma's. */ retval = 0; vma = vma_next(&vmi); break; } } /* * We need look no further than the maximum address a fragment * could possibly have landed at. Also cast things to loff_t to * prevent overflows and make comparisons vs. equal-width types. */ size = PAGE_ALIGN(size); while (vma && (loff_t)(vma->vm_end - addr) <= size) { /* finding a matching vma now does not alter retval */ if ((vma->vm_ops == &shm_vm_ops) && ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && (vma->vm_file == file)) { do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start, vma->vm_end, NULL, false); } vma = vma_next(&vmi); } #else /* CONFIG_MMU */ vma = vma_lookup(mm, addr); /* under NOMMU conditions, the exact address to be destroyed must be * given */ if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); retval = 0; } #endif mmap_write_unlock(mm); return retval; } SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) { return ksys_shmdt(shmaddr); } #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { struct pid_namespace *pid_ns = ipc_seq_pid_ns(s); struct user_namespace *user_ns = seq_user_ns(s); struct kern_ipc_perm *ipcp = it; struct shmid_kernel *shp; unsigned long rss = 0, swp = 0; shp = container_of(ipcp, struct shmid_kernel, shm_perm); shm_add_rss_swap(shp, &rss, &swp); #if BITS_PER_LONG <= 32 #define SIZE_SPEC "%10lu" #else #define SIZE_SPEC "%21lu" #endif seq_printf(s, "%10d %10d %4o " SIZE_SPEC " %5u %5u " "%5lu %5u %5u %5u %5u %10llu %10llu %10llu " SIZE_SPEC " " SIZE_SPEC "\n", shp->shm_perm.key, shp->shm_perm.id, shp->shm_perm.mode, shp->shm_segsz, pid_nr_ns(shp->shm_cprid, pid_ns), pid_nr_ns(shp->shm_lprid, pid_ns), shp->shm_nattch, from_kuid_munged(user_ns, shp->shm_perm.uid), from_kgid_munged(user_ns, shp->shm_perm.gid), from_kuid_munged(user_ns, shp->shm_perm.cuid), from_kgid_munged(user_ns, shp->shm_perm.cgid), shp->shm_atim, shp->shm_dtim, shp->shm_ctim, rss * PAGE_SIZE, swp * PAGE_SIZE); return 0; } #endif
8 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 // SPDX-License-Identifier: GPL-2.0 /* * transport_class.c - implementation of generic transport classes * using attribute_containers * * Copyright (c) 2005 - James Bottomley <James.Bottomley@steeleye.com> * * The basic idea here is to allow any "device controller" (which * would most often be a Host Bus Adapter to use the services of one * or more tranport classes for performing transport specific * services. Transport specific services are things that the generic * command layer doesn't want to know about (speed settings, line * condidtioning, etc), but which the user might be interested in. * Thus, the HBA's use the routines exported by the transport classes * to perform these functions. The transport classes export certain * values to the user via sysfs using attribute containers. * * Note: because not every HBA will care about every transport * attribute, there's a many to one relationship that goes like this: * * transport class<-----attribute container<----class device * * Usually the attribute container is per-HBA, but the design doesn't * mandate that. Although most of the services will be specific to * the actual external storage connection used by the HBA, the generic * transport class is framed entirely in terms of generic devices to * allow it to be used by any physical HBA in the system. */ #include <linux/export.h> #include <linux/attribute_container.h> #include <linux/transport_class.h> static int transport_remove_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev); /** * transport_class_register - register an initial transport class * * @tclass: a pointer to the transport class structure to be initialised * * The transport class contains an embedded class which is used to * identify it. The caller should initialise this structure with * zeros and then generic class must have been initialised with the * actual transport class unique name. There's a macro * DECLARE_TRANSPORT_CLASS() to do this (declared classes still must * be registered). * * Returns 0 on success or error on failure. */ int transport_class_register(struct transport_class *tclass) { return class_register(&tclass->class); } EXPORT_SYMBOL_GPL(transport_class_register); /** * transport_class_unregister - unregister a previously registered class * * @tclass: The transport class to unregister * * Must be called prior to deallocating the memory for the transport * class. */ void transport_class_unregister(struct transport_class *tclass) { class_unregister(&tclass->class); } EXPORT_SYMBOL_GPL(transport_class_unregister); static int anon_transport_dummy_function(struct transport_container *tc, struct device *dev, struct device *cdev) { /* do nothing */ return 0; } /** * anon_transport_class_register - register an anonymous class * * @atc: The anon transport class to register * * The anonymous transport class contains both a transport class and a * container. The idea of an anonymous class is that it never * actually has any device attributes associated with it (and thus * saves on container storage). So it can only be used for triggering * events. Use prezero and then use DECLARE_ANON_TRANSPORT_CLASS() to * initialise the anon transport class storage. */ int anon_transport_class_register(struct anon_transport_class *atc) { int error; atc->container.class = &atc->tclass.class; attribute_container_set_no_classdevs(&atc->container); error = attribute_container_register(&atc->container); if (error) return error; atc->tclass.setup = anon_transport_dummy_function; atc->tclass.remove = anon_transport_dummy_function; return 0; } EXPORT_SYMBOL_GPL(anon_transport_class_register); /** * anon_transport_class_unregister - unregister an anon class * * @atc: Pointer to the anon transport class to unregister * * Must be called prior to deallocating the memory for the anon * transport class. */ void anon_transport_class_unregister(struct anon_transport_class *atc) { if (unlikely(attribute_container_unregister(&atc->container))) BUG(); } EXPORT_SYMBOL_GPL(anon_transport_class_unregister); static int transport_setup_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->setup) tclass->setup(tcont, dev, classdev); return 0; } /** * transport_setup_device - declare a new dev for transport class association but don't make it visible yet. * @dev: the generic device representing the entity being added * * Usually, dev represents some component in the HBA system (either * the HBA itself or a device remote across the HBA bus). This * routine is simply a trigger point to see if any set of transport * classes wishes to associate with the added device. This allocates * storage for the class device and initialises it, but does not yet * add it to the system or add attributes to it (you do this with * transport_add_device). If you have no need for a separate setup * and add operations, use transport_register_device (see * transport_class.h). */ void transport_setup_device(struct device *dev) { attribute_container_add_device(dev, transport_setup_classdev); } EXPORT_SYMBOL_GPL(transport_setup_device); static int transport_add_class_device(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); int error = attribute_container_add_class_device(classdev); struct transport_container *tcont = attribute_container_to_transport_container(cont); if (error) goto err_remove; if (tcont->statistics) { error = sysfs_create_group(&classdev->kobj, tcont->statistics); if (error) goto err_del; } return 0; err_del: attribute_container_class_device_del(classdev); err_remove: if (tclass->remove) tclass->remove(tcont, dev, classdev); return error; } /** * transport_add_device - declare a new dev for transport class association * * @dev: the generic device representing the entity being added * * Usually, dev represents some component in the HBA system (either * the HBA itself or a device remote across the HBA bus). This * routine is simply a trigger point used to add the device to the * system and register attributes for it. */ int transport_add_device(struct device *dev) { return attribute_container_device_trigger_safe(dev, transport_add_class_device, transport_remove_classdev); } EXPORT_SYMBOL_GPL(transport_add_device); static int transport_configure(struct attribute_container *cont, struct device *dev, struct device *cdev) { struct transport_class *tclass = class_to_transport_class(cont->class); struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->configure) tclass->configure(tcont, dev, cdev); return 0; } /** * transport_configure_device - configure an already set up device * * @dev: generic device representing device to be configured * * The idea of configure is simply to provide a point within the setup * process to allow the transport class to extract information from a * device after it has been setup. This is used in SCSI because we * have to have a setup device to begin using the HBA, but after we * send the initial inquiry, we use configure to extract the device * parameters. The device need not have been added to be configured. */ void transport_configure_device(struct device *dev) { attribute_container_device_trigger(dev, transport_configure); } EXPORT_SYMBOL_GPL(transport_configure_device); static int transport_remove_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_container *tcont = attribute_container_to_transport_container(cont); struct transport_class *tclass = class_to_transport_class(cont->class); if (tclass->remove) tclass->remove(tcont, dev, classdev); if (tclass->remove != anon_transport_dummy_function) { if (tcont->statistics) sysfs_remove_group(&classdev->kobj, tcont->statistics); attribute_container_class_device_del(classdev); } return 0; } /** * transport_remove_device - remove the visibility of a device * * @dev: generic device to remove * * This call removes the visibility of the device (to the user from * sysfs), but does not destroy it. To eliminate a device entirely * you must also call transport_destroy_device. If you don't need to * do remove and destroy as separate operations, use * transport_unregister_device() (see transport_class.h) which will * perform both calls for you. */ void transport_remove_device(struct device *dev) { attribute_container_device_trigger(dev, transport_remove_classdev); } EXPORT_SYMBOL_GPL(transport_remove_device); static void transport_destroy_classdev(struct attribute_container *cont, struct device *dev, struct device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); if (tclass->remove != anon_transport_dummy_function) put_device(classdev); } /** * transport_destroy_device - destroy a removed device * * @dev: device to eliminate from the transport class. * * This call triggers the elimination of storage associated with the * transport classdev. Note: all it really does is relinquish a * reference to the classdev. The memory will not be freed until the * last reference goes to zero. Note also that the classdev retains a * reference count on dev, so dev too will remain for as long as the * transport class device remains around. */ void transport_destroy_device(struct device *dev) { attribute_container_remove_device(dev, transport_destroy_classdev); } EXPORT_SYMBOL_GPL(transport_destroy_device);
415 413 416 236 215 39 38 38 217 4 2 146 27 155 5 205 204 150 6 132 13 203 203 53 158 19 1 18 56 55 55 1 1 55 55 55 54 179 56 137 88 89 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic INET6 transport hashtables * * Authors: Lotsa people, from code originally in tcp, generalised here * by Arnaldo Carvalho de Melo <acme@mandriva.com> */ #include <linux/module.h> #include <linux/random.h> #include <net/addrconf.h> #include <net/hotdata.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> #include <net/sock_reuseport.h> #include <net/tcp.h> u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { u32 lhash, fhash; net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet6_ehashfn); /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(const struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif) { struct sock *sk; const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } goto found; } if (get_nulls_value(node) != slot) goto begin; out: sk = NULL; found: return sk; } EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, const struct net *net, const unsigned short hnum, const struct in6_addr *daddr, const int dif, const int sdif) { int score = -1; if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; score = sk->sk_bound_dev_if ? 2 : 1; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; } /** * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary. * @net: network namespace. * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. * @skb: context for a potential SK_REUSEPORT program. * @doff: header offset. * @saddr: source address. * @sport: source port. * @daddr: destination address. * @hnum: destination port in host byte order. * @ehashfn: hash function used to generate the fallback hash. * * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to * the selected sock or an error. */ struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk, struct sk_buff *skb, int doff, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, inet6_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn, net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } EXPORT_SYMBOL_GPL(inet6_lookup_reuseport); /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(const struct net *net, struct inet_listen_hashbucket *ilb2, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { struct sock *sk, *result = NULL; struct hlist_nulls_node *node; int score, hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, inet6_ehashfn); if (result) return result; result = sk; hiscore = score; } } return result; } struct sock *inet6_lookup_run_sk_lookup(const struct net *net, int protocol, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, inet6_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool no_reuseport; no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport, daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, ehashfn); if (reuse_sk) sk = reuse_sk; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup); struct sock *inet6_lookup_listener(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && hashinfo == net->ipv4.tcp_death_row.hashinfo) { result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet6_ehashfn); if (result) goto done; } hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); if (result) goto done; /* Lookup lhash2 with in6addr_any */ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, &in6addr_any, hnum, dif, sdif); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif) { struct sock *sk; bool refcounted; sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; struct net *net = sock_net(sk); const int sdif = l3mdev_master_ifindex_by_index(net, dif); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; if (likely(inet6_match(net, sk2, saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (sk->sk_protocol == IPPROTO_TCP && tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; } } /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); return __inet_hash_connect(death_row, sk, port_offset, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); int inet6_hash(struct sock *sk) { int err = 0; if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); return err; } EXPORT_SYMBOL_GPL(inet6_hash);
797 515 1625 1 50 26 4 24 865 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TIME64_H #define _LINUX_TIME64_H #include <linux/math64.h> #include <vdso/time64.h> typedef __s64 time64_t; typedef __u64 timeu64_t; #include <uapi/linux/time.h> struct timespec64 { time64_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; struct itimerspec64 { struct timespec64 it_interval; struct timespec64 it_value; }; /* Parameters used to convert the timespec values: */ #define PSEC_PER_NSEC 1000L /* Located here for timespec[64]_valid_strict */ #define TIME64_MAX ((s64)~((u64)1 << 63)) #define TIME64_MIN (-TIME64_MAX - 1) #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_MIN (-KTIME_MAX - 1) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) #define KTIME_SEC_MIN (KTIME_MIN / NSEC_PER_SEC) /* * Limits for settimeofday(): * * To prevent setting the time close to the wraparound point time setting * is limited so a reasonable uptime can be accomodated. Uptime of 30 years * should be really sufficient, which means the cutoff is 2232. At that * point the cutoff is just a small part of the larger problem. */ #define TIME_UPTIME_SEC_MAX (30LL * 365 * 24 *3600) #define TIME_SETTOD_SEC_MAX (KTIME_SEC_MAX - TIME_UPTIME_SEC_MAX) static inline int timespec64_equal(const struct timespec64 *a, const struct timespec64 *b) { return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } /* * lhs < rhs: return <0 * lhs == rhs: return 0 * lhs > rhs: return >0 */ static inline int timespec64_compare(const struct timespec64 *lhs, const struct timespec64 *rhs) { if (lhs->tv_sec < rhs->tv_sec) return -1; if (lhs->tv_sec > rhs->tv_sec) return 1; return lhs->tv_nsec - rhs->tv_nsec; } extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec); static inline struct timespec64 timespec64_add(struct timespec64 lhs, struct timespec64 rhs) { struct timespec64 ts_delta; set_normalized_timespec64(&ts_delta, lhs.tv_sec + rhs.tv_sec, lhs.tv_nsec + rhs.tv_nsec); return ts_delta; } /* * sub = lhs - rhs, in normalized form */ static inline struct timespec64 timespec64_sub(struct timespec64 lhs, struct timespec64 rhs) { struct timespec64 ts_delta; set_normalized_timespec64(&ts_delta, lhs.tv_sec - rhs.tv_sec, lhs.tv_nsec - rhs.tv_nsec); return ts_delta; } /* * Returns true if the timespec64 is norm, false if denorm: */ static inline bool timespec64_valid(const struct timespec64 *ts) { /* Dates before 1970 are bogus */ if (ts->tv_sec < 0) return false; /* Can't have more nanoseconds then a second */ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return false; return true; } static inline bool timespec64_valid_strict(const struct timespec64 *ts) { if (!timespec64_valid(ts)) return false; /* Disallow values that could overflow ktime_t */ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) return false; return true; } static inline bool timespec64_valid_settod(const struct timespec64 *ts) { if (!timespec64_valid(ts)) return false; /* Disallow values which cause overflow issues vs. CLOCK_REALTIME */ if ((unsigned long long)ts->tv_sec >= TIME_SETTOD_SEC_MAX) return false; return true; } /** * timespec64_to_ns - Convert timespec64 to nanoseconds * @ts: pointer to the timespec64 variable to be converted * * Returns the scalar nanosecond representation of the timespec64 * parameter. */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { /* Prevent multiplication overflow / underflow */ if (ts->tv_sec >= KTIME_SEC_MAX) return KTIME_MAX; if (ts->tv_sec <= KTIME_SEC_MIN) return KTIME_MIN; return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } /** * ns_to_timespec64 - Convert nanoseconds to timespec64 * @nsec: the nanoseconds value to be converted * * Returns the timespec64 representation of the nsec parameter. */ extern struct timespec64 ns_to_timespec64(s64 nsec); /** * timespec64_add_ns - Adds nanoseconds to a timespec64 * @a: pointer to timespec64 to be incremented * @ns: unsigned nanoseconds value to be added * * This must always be inlined because its used from the x86-64 vdso, * which cannot call other kernel functions. */ static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns) { a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); a->tv_nsec = ns; } /* * timespec64_add_safe assumes both values are positive and checks for * overflow. It will return TIME64_MAX in case of overflow. */ extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs, const struct timespec64 rhs); #endif /* _LINUX_TIME64_H */
1 7 3 1 6 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 /* SPDX-License-Identifier: GPL-2.0-only */ /* * cec - HDMI Consumer Electronics Control support header * * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #ifndef _MEDIA_CEC_H #define _MEDIA_CEC_H #include <linux/poll.h> #include <linux/fs.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/cdev.h> #include <linux/kthread.h> #include <linux/timer.h> #include <linux/cec-funcs.h> #include <media/rc-core.h> #define CEC_CAP_DEFAULTS (CEC_CAP_LOG_ADDRS | CEC_CAP_TRANSMIT | \ CEC_CAP_PASSTHROUGH | CEC_CAP_RC) /** * struct cec_devnode - cec device node * @dev: cec device * @cdev: cec character device * @minor: device node minor number * @lock: lock to serialize open/release and registration * @registered: the device was correctly registered * @unregistered: the device was unregistered * @lock_fhs: lock to control access to @fhs * @fhs: the list of open filehandles (cec_fh) * * This structure represents a cec-related device node. * * To add or remove filehandles from @fhs the @lock must be taken first, * followed by @lock_fhs. It is safe to access @fhs if either lock is held. * * The @parent is a physical device. It must be set by core or device drivers * before registering the node. */ struct cec_devnode { /* sysfs */ struct device dev; struct cdev cdev; /* device info */ int minor; /* serialize open/release and registration */ struct mutex lock; bool registered; bool unregistered; /* protect access to fhs */ struct mutex lock_fhs; struct list_head fhs; }; struct cec_adapter; struct cec_data; struct cec_pin; struct cec_notifier; struct cec_data { struct list_head list; struct list_head xfer_list; struct cec_adapter *adap; struct cec_msg msg; u8 match_len; u8 match_reply[5]; struct cec_fh *fh; struct delayed_work work; struct completion c; u8 attempts; bool blocking; bool completed; }; struct cec_msg_entry { struct list_head list; struct cec_msg msg; }; struct cec_event_entry { struct list_head list; struct cec_event ev; }; #define CEC_NUM_CORE_EVENTS 2 #define CEC_NUM_EVENTS CEC_EVENT_PIN_5V_HIGH struct cec_fh { struct list_head list; struct list_head xfer_list; struct cec_adapter *adap; u8 mode_initiator; u8 mode_follower; /* Events */ wait_queue_head_t wait; struct mutex lock; struct list_head events[CEC_NUM_EVENTS]; /* queued events */ u16 queued_events[CEC_NUM_EVENTS]; unsigned int total_queued_events; struct cec_event_entry core_events[CEC_NUM_CORE_EVENTS]; struct list_head msgs; /* queued messages */ unsigned int queued_msgs; }; #define CEC_SIGNAL_FREE_TIME_RETRY 3 #define CEC_SIGNAL_FREE_TIME_NEW_INITIATOR 5 #define CEC_SIGNAL_FREE_TIME_NEXT_XFER 7 /* The nominal data bit period is 2.4 ms */ #define CEC_FREE_TIME_TO_USEC(ft) ((ft) * 2400) struct cec_adap_ops { /* Low-level callbacks, called with adap->lock held */ int (*adap_enable)(struct cec_adapter *adap, bool enable); int (*adap_monitor_all_enable)(struct cec_adapter *adap, bool enable); int (*adap_monitor_pin_enable)(struct cec_adapter *adap, bool enable); int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr); void (*adap_unconfigured)(struct cec_adapter *adap); int (*adap_transmit)(struct cec_adapter *adap, u8 attempts, u32 signal_free_time, struct cec_msg *msg); void (*adap_nb_transmit_canceled)(struct cec_adapter *adap, const struct cec_msg *msg); void (*adap_status)(struct cec_adapter *adap, struct seq_file *file); void (*adap_free)(struct cec_adapter *adap); /* Error injection callbacks, called without adap->lock held */ int (*error_inj_show)(struct cec_adapter *adap, struct seq_file *sf); bool (*error_inj_parse_line)(struct cec_adapter *adap, char *line); /* High-level CEC message callback, called without adap->lock held */ void (*configured)(struct cec_adapter *adap); int (*received)(struct cec_adapter *adap, struct cec_msg *msg); }; /* * The minimum message length you can receive (excepting poll messages) is 2. * With a transfer rate of at most 36 bytes per second this makes 18 messages * per second worst case. * * We queue at most 3 seconds worth of received messages. The CEC specification * requires that messages are replied to within a second, so 3 seconds should * give more than enough margin. Since most messages are actually more than 2 * bytes, this is in practice a lot more than 3 seconds. */ #define CEC_MAX_MSG_RX_QUEUE_SZ (18 * 3) /* * The transmit queue is limited to 1 second worth of messages (worst case). * Messages can be transmitted by userspace and kernel space. But for both it * makes no sense to have a lot of messages queued up. One second seems * reasonable. */ #define CEC_MAX_MSG_TX_QUEUE_SZ (18 * 1) /** * struct cec_adapter - cec adapter structure * @owner: module owner * @name: name of the CEC adapter * @devnode: device node for the /dev/cecX device * @lock: mutex controlling access to this structure * @rc: remote control device * @transmit_queue: queue of pending transmits * @transmit_queue_sz: number of pending transmits * @wait_queue: queue of transmits waiting for a reply * @transmitting: CEC messages currently being transmitted * @transmit_in_progress: true if a transmit is in progress * @transmit_in_progress_aborted: true if a transmit is in progress is to be * aborted. This happens if the logical address is * invalidated while the transmit is ongoing. In that * case the transmit will finish, but will not retransmit * and be marked as ABORTED. * @xfer_timeout_ms: the transfer timeout in ms. * If 0, then timeout after 2100 ms. * @kthread_config: kthread used to configure a CEC adapter * @config_completion: used to signal completion of the config kthread * @kthread: main CEC processing thread * @kthread_waitq: main CEC processing wait_queue * @ops: cec adapter ops * @priv: cec driver's private data * @capabilities: cec adapter capabilities * @available_log_addrs: maximum number of available logical addresses * @phys_addr: the current physical address * @needs_hpd: if true, then the HDMI HotPlug Detect pin must be high * in order to transmit or receive CEC messages. This is usually a HW * limitation. * @is_enabled: the CEC adapter is enabled * @is_claiming_log_addrs: true if cec_claim_log_addrs() is running * @is_configuring: the CEC adapter is configuring (i.e. claiming LAs) * @must_reconfigure: while configuring, the PA changed, so reclaim LAs * @is_configured: the CEC adapter is configured (i.e. has claimed LAs) * @cec_pin_is_high: if true then the CEC pin is high. Only used with the * CEC pin framework. * @adap_controls_phys_addr: if true, then the CEC adapter controls the * physical address, i.e. the CEC hardware can detect HPD changes and * read the EDID and is not dependent on an external HDMI driver. * Drivers that need this can set this field to true after the * cec_allocate_adapter() call. * @last_initiator: the initiator of the last transmitted message. * @monitor_all_cnt: number of filehandles monitoring all msgs * @monitor_pin_cnt: number of filehandles monitoring pin changes * @follower_cnt: number of filehandles in follower mode * @cec_follower: filehandle of the exclusive follower * @cec_initiator: filehandle of the exclusive initiator * @passthrough: if true, then the exclusive follower is in * passthrough mode. * @log_addrs: current logical addresses * @conn_info: current connector info * @tx_timeout_cnt: count the number of Timed Out transmits. * Reset to 0 when this is reported in cec_adap_status(). * @tx_low_drive_cnt: count the number of Low Drive transmits. * Reset to 0 when this is reported in cec_adap_status(). * @tx_error_cnt: count the number of Error transmits. * Reset to 0 when this is reported in cec_adap_status(). * @tx_arb_lost_cnt: count the number of Arb Lost transmits. * Reset to 0 when this is reported in cec_adap_status(). * @tx_low_drive_log_cnt: number of logged Low Drive transmits since the * adapter was enabled. Used to avoid flooding the kernel * log if this happens a lot. * @tx_error_log_cnt: number of logged Error transmits since the adapter was * enabled. Used to avoid flooding the kernel log if this * happens a lot. * @notifier: CEC notifier * @pin: CEC pin status struct * @cec_dir: debugfs cec directory * @sequence: transmit sequence counter * @input_phys: remote control input_phys name * * This structure represents a cec adapter. */ struct cec_adapter { struct module *owner; char name[32]; struct cec_devnode devnode; struct mutex lock; struct rc_dev *rc; struct list_head transmit_queue; unsigned int transmit_queue_sz; struct list_head wait_queue; struct cec_data *transmitting; bool transmit_in_progress; bool transmit_in_progress_aborted; unsigned int xfer_timeout_ms; struct task_struct *kthread_config; struct completion config_completion; struct task_struct *kthread; wait_queue_head_t kthread_waitq; const struct cec_adap_ops *ops; void *priv; u32 capabilities; u8 available_log_addrs; u16 phys_addr; bool needs_hpd; bool is_enabled; bool is_claiming_log_addrs; bool is_configuring; bool must_reconfigure; bool is_configured; bool cec_pin_is_high; bool adap_controls_phys_addr; u8 last_initiator; u32 monitor_all_cnt; u32 monitor_pin_cnt; u32 follower_cnt; struct cec_fh *cec_follower; struct cec_fh *cec_initiator; bool passthrough; struct cec_log_addrs log_addrs; struct cec_connector_info conn_info; u32 tx_timeout_cnt; u32 tx_low_drive_cnt; u32 tx_error_cnt; u32 tx_arb_lost_cnt; u32 tx_low_drive_log_cnt; u32 tx_error_log_cnt; #ifdef CONFIG_CEC_NOTIFIER struct cec_notifier *notifier; #endif #ifdef CONFIG_CEC_PIN struct cec_pin *pin; #endif struct dentry *cec_dir; u32 sequence; char input_phys[40]; }; static inline int cec_get_device(struct cec_adapter *adap) { struct cec_devnode *devnode = &adap->devnode; /* * Check if the cec device is available. This needs to be done with * the devnode->lock held to prevent an open/unregister race: * without the lock, the device could be unregistered and freed between * the devnode->registered check and get_device() calls, leading to * a crash. */ mutex_lock(&devnode->lock); /* * return ENODEV if the cec device has been removed * already or if it is not registered anymore. */ if (!devnode->registered) { mutex_unlock(&devnode->lock); return -ENODEV; } /* and increase the device refcount */ get_device(&devnode->dev); mutex_unlock(&devnode->lock); return 0; } static inline void cec_put_device(struct cec_adapter *adap) { put_device(&adap->devnode.dev); } static inline void *cec_get_drvdata(const struct cec_adapter *adap) { return adap->priv; } static inline bool cec_has_log_addr(const struct cec_adapter *adap, u8 log_addr) { return adap->log_addrs.log_addr_mask & (1 << log_addr); } static inline bool cec_is_sink(const struct cec_adapter *adap) { return adap->phys_addr == 0; } /** * cec_is_registered() - is the CEC adapter registered? * * @adap: the CEC adapter, may be NULL. * * Return: true if the adapter is registered, false otherwise. */ static inline bool cec_is_registered(const struct cec_adapter *adap) { return adap && adap->devnode.registered; } #define cec_phys_addr_exp(pa) \ ((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf struct edid; struct drm_connector; #if IS_REACHABLE(CONFIG_CEC_CORE) struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops, void *priv, const char *name, u32 caps, u8 available_las); int cec_register_adapter(struct cec_adapter *adap, struct device *parent); void cec_unregister_adapter(struct cec_adapter *adap); void cec_delete_adapter(struct cec_adapter *adap); int cec_s_log_addrs(struct cec_adapter *adap, struct cec_log_addrs *log_addrs, bool block); void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block); void cec_s_phys_addr_from_edid(struct cec_adapter *adap, const struct edid *edid); void cec_s_conn_info(struct cec_adapter *adap, const struct cec_connector_info *conn_info); int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg, bool block); /* Called by the adapter */ void cec_transmit_done_ts(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt, u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt, ktime_t ts); static inline void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt, u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt) { cec_transmit_done_ts(adap, status, arb_lost_cnt, nack_cnt, low_drive_cnt, error_cnt, ktime_get()); } /* * Simplified version of cec_transmit_done for hardware that doesn't retry * failed transmits. So this is always just one attempt in which case * the status is sufficient. */ void cec_transmit_attempt_done_ts(struct cec_adapter *adap, u8 status, ktime_t ts); static inline void cec_transmit_attempt_done(struct cec_adapter *adap, u8 status) { cec_transmit_attempt_done_ts(adap, status, ktime_get()); } void cec_received_msg_ts(struct cec_adapter *adap, struct cec_msg *msg, ktime_t ts); static inline void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg) { cec_received_msg_ts(adap, msg, ktime_get()); } /** * cec_queue_pin_cec_event() - queue a CEC pin event with a given timestamp. * * @adap: pointer to the cec adapter * @is_high: when true the CEC pin is high, otherwise it is low * @dropped_events: when true some events were dropped * @ts: the timestamp for this event * */ void cec_queue_pin_cec_event(struct cec_adapter *adap, bool is_high, bool dropped_events, ktime_t ts); /** * cec_queue_pin_hpd_event() - queue a pin event with a given timestamp. * * @adap: pointer to the cec adapter * @is_high: when true the HPD pin is high, otherwise it is low * @ts: the timestamp for this event * */ void cec_queue_pin_hpd_event(struct cec_adapter *adap, bool is_high, ktime_t ts); /** * cec_queue_pin_5v_event() - queue a pin event with a given timestamp. * * @adap: pointer to the cec adapter * @is_high: when true the 5V pin is high, otherwise it is low * @ts: the timestamp for this event * */ void cec_queue_pin_5v_event(struct cec_adapter *adap, bool is_high, ktime_t ts); /** * cec_get_edid_phys_addr() - find and return the physical address * * @edid: pointer to the EDID data * @size: size in bytes of the EDID data * @offset: If not %NULL then the location of the physical address * bytes in the EDID will be returned here. This is set to 0 * if there is no physical address found. * * Return: the physical address or CEC_PHYS_ADDR_INVALID if there is none. */ u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size, unsigned int *offset); void cec_fill_conn_info_from_drm(struct cec_connector_info *conn_info, const struct drm_connector *connector); #else static inline int cec_register_adapter(struct cec_adapter *adap, struct device *parent) { return 0; } static inline void cec_unregister_adapter(struct cec_adapter *adap) { } static inline void cec_delete_adapter(struct cec_adapter *adap) { } static inline void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block) { } static inline void cec_s_phys_addr_from_edid(struct cec_adapter *adap, const struct edid *edid) { } static inline u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size, unsigned int *offset) { if (offset) *offset = 0; return CEC_PHYS_ADDR_INVALID; } static inline void cec_s_conn_info(struct cec_adapter *adap, const struct cec_connector_info *conn_info) { } static inline void cec_fill_conn_info_from_drm(struct cec_connector_info *conn_info, const struct drm_connector *connector) { memset(conn_info, 0, sizeof(*conn_info)); } #endif /** * cec_phys_addr_invalidate() - set the physical address to INVALID * * @adap: the CEC adapter * * This is a simple helper function to invalidate the physical * address. */ static inline void cec_phys_addr_invalidate(struct cec_adapter *adap) { cec_s_phys_addr(adap, CEC_PHYS_ADDR_INVALID, false); } /** * cec_get_edid_spa_location() - find location of the Source Physical Address * * @edid: the EDID * @size: the size of the EDID * * This EDID is expected to be a CEA-861 compliant, which means that there are * at least two blocks and one or more of the extensions blocks are CEA-861 * blocks. * * The returned location is guaranteed to be <= size-2. * * This is an inline function since it is used by both CEC and V4L2. * Ideally this would go in a module shared by both, but it is overkill to do * that for just a single function. */ static inline unsigned int cec_get_edid_spa_location(const u8 *edid, unsigned int size) { unsigned int blocks = size / 128; unsigned int block; u8 d; /* Sanity check: at least 2 blocks and a multiple of the block size */ if (blocks < 2 || size % 128) return 0; /* * If there are fewer extension blocks than the size, then update * 'blocks'. It is allowed to have more extension blocks than the size, * since some hardware can only read e.g. 256 bytes of the EDID, even * though more blocks are present. The first CEA-861 extension block * should normally be in block 1 anyway. */ if (edid[0x7e] + 1 < blocks) blocks = edid[0x7e] + 1; for (block = 1; block < blocks; block++) { unsigned int offset = block * 128; /* Skip any non-CEA-861 extension blocks */ if (edid[offset] != 0x02 || edid[offset + 1] != 0x03) continue; /* search Vendor Specific Data Block (tag 3) */ d = edid[offset + 2] & 0x7f; /* Check if there are Data Blocks */ if (d <= 4) continue; if (d > 4) { unsigned int i = offset + 4; unsigned int end = offset + d; /* Note: 'end' is always < 'size' */ do { u8 tag = edid[i] >> 5; u8 len = edid[i] & 0x1f; if (tag == 3 && len >= 5 && i + len <= end && edid[i + 1] == 0x03 && edid[i + 2] == 0x0c && edid[i + 3] == 0x00) return i + 4; i += len + 1; } while (i < end); } } return 0; } #endif /* _MEDIA_CEC_H */
3 2 1 2 4 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk> * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> */ #include <linux/ip.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_nat.h> static unsigned int netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_range2 *range = par->targinfo; struct nf_nat_range2 newrange; struct nf_conn *ct; enum ip_conntrack_info ctinfo; union nf_inet_addr new_addr, netmask; unsigned int i; ct = nf_ct_get(skb, &ctinfo); for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]); if (xt_hooknum(par) == NF_INET_PRE_ROUTING || xt_hooknum(par) == NF_INET_LOCAL_OUT) new_addr.in6 = ipv6_hdr(skb)->daddr; else new_addr.in6 = ipv6_hdr(skb)->saddr; for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { new_addr.ip6[i] &= ~netmask.ip6[i]; new_addr.ip6[i] |= range->min_addr.ip6[i] & netmask.ip6[i]; } newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr = new_addr; newrange.max_addr = new_addr; newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) { const struct nf_nat_range2 *range = par->targinfo; if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) return -EINVAL; return nf_ct_netns_get(par->net, par->family); } static void netmap_tg_destroy(const struct xt_tgdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static unsigned int netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range2 newrange; WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING && xt_hooknum(par) != NF_INET_POST_ROUTING && xt_hooknum(par) != NF_INET_LOCAL_OUT && xt_hooknum(par) != NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); if (xt_hooknum(par) == NF_INET_PRE_ROUTING || xt_hooknum(par) == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr.ip = new_ip; newrange.max_addr.ip = new_ip; newrange.min_proto = mr->range[0].min; newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg4_check(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } if (mr->rangesize != 1) { pr_debug("bad rangesize %u.\n", mr->rangesize); return -EINVAL; } return nf_ct_netns_get(par->net, par->family); } static struct xt_target netmap_tg_reg[] __read_mostly = { { .name = "NETMAP", .family = NFPROTO_IPV6, .revision = 0, .target = netmap_tg6, .targetsize = sizeof(struct nf_nat_range), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg6_checkentry, .destroy = netmap_tg_destroy, .me = THIS_MODULE, }, { .name = "NETMAP", .family = NFPROTO_IPV4, .revision = 0, .target = netmap_tg4, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg4_check, .destroy = netmap_tg_destroy, .me = THIS_MODULE, }, }; static int __init netmap_tg_init(void) { return xt_register_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); } static void netmap_tg_exit(void) { xt_unregister_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); } module_init(netmap_tg_init); module_exit(netmap_tg_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of subnets"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS("ip6t_NETMAP"); MODULE_ALIAS("ipt_NETMAP");
6929 6940 5871 800 797 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/spinlock.h> #include <linux/atomic.h> /* * This is an implementation of the notion of "decrement a * reference count, and return locked if it decremented to zero". * * NOTE NOTE NOTE! This is _not_ equivalent to * * if (atomic_dec_and_test(&atomic)) { * spin_lock(&lock); * return 1; * } * return 0; * * because the spin-lock and the decrement must be * "atomic". */ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; spin_unlock(lock); return 0; } EXPORT_SYMBOL(_atomic_dec_and_lock); int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, unsigned long *flags) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ spin_lock_irqsave(lock, *flags); if (atomic_dec_and_test(atomic)) return 1; spin_unlock_irqrestore(lock, *flags); return 0; } EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ raw_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; raw_spin_unlock(lock); return 0; } EXPORT_SYMBOL(_atomic_dec_and_raw_lock); int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, unsigned long *flags) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ raw_spin_lock_irqsave(lock, *flags); if (atomic_dec_and_test(atomic)) return 1; raw_spin_unlock_irqrestore(lock, *flags); return 0; } EXPORT_SYMBOL(_atomic_dec_and_raw_lock_irqsave);
4 9 7 2 2 1 2 4 3 13 1 2 2 1 1 6 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 // SPDX-License-Identifier: GPL-2.0-only /* * File: datagram.c * * Datagram (ISI) Phonet sockets * * Copyright (C) 2008 Nokia Corporation. * * Authors: Sakari Ailus <sakari.ailus@nokia.com> * Rémi Denis-Courmont */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/socket.h> #include <asm/ioctls.h> #include <net/sock.h> #include <linux/phonet.h> #include <linux/export.h> #include <net/phonet/phonet.h> static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); /* associated socket ceases to exist */ static void pn_sock_close(struct sock *sk, long timeout) { sk_common_release(sk); } static int pn_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; switch (cmd) { case SIOCINQ: spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: { u32 res = *karg; if (res >= 256) return -EINVAL; if (cmd == SIOCPNADDRESOURCE) return pn_sock_bind_res(sk, res); else return pn_sock_unbind_res(sk, res); } } return -ENOIOCTLCMD; } /* Destroy socket. All references are gone. */ static void pn_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } static int pn_init(struct sock *sk) { sk->sk_destruct = pn_destruct; return 0; } static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; int err; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) return -EOPNOTSUPP; if (target == NULL) return -EDESTADDRREQ; if (msg->msg_namelen < sizeof(struct sockaddr_pn)) return -EINVAL; if (target->spn_family != AF_PHONET) return -EAFNOSUPPORT; skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) return err; skb_reserve(skb, MAX_PHONET_HEADER); err = memcpy_from_msg((void *)skb_put(skb, len), msg, len); if (err < 0) { kfree_skb(skb); return err; } /* * Fill in the Phonet header and * finally pass the packet forwards. */ err = pn_skb_send(sk, skb, target); /* If ok, return len. */ return (err >= 0) ? len : err; } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; int rval = -EOPNOTSUPP; int copylen; if (flags & ~(MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) goto out_nofree; skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; pn_skb_get_src_sockaddr(skb, &sa); copylen = skb->len; if (len < copylen) { msg->msg_flags |= MSG_TRUNC; copylen = len; } rval = skb_copy_datagram_msg(skb, 0, msg, copylen); if (rval) { rval = -EFAULT; goto out; } rval = (flags & MSG_TRUNC) ? skb->len : copylen; if (msg->msg_name != NULL) { __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); *addr_len = sizeof(sa); } out: skb_free_datagram(sk, skb); out_nofree: return rval; } /* Queue an skb for a sock. */ static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); return err ? NET_RX_DROP : NET_RX_SUCCESS; } /* Module registration */ static struct proto pn_proto = { .close = pn_sock_close, .ioctl = pn_ioctl, .init = pn_init, .sendmsg = pn_sendmsg, .recvmsg = pn_recvmsg, .backlog_rcv = pn_backlog_rcv, .hash = pn_sock_hash, .unhash = pn_sock_unhash, .get_port = pn_sock_get_port, .obj_size = sizeof(struct pn_sock), .owner = THIS_MODULE, .name = "PHONET", }; static const struct phonet_protocol pn_dgram_proto = { .ops = &phonet_dgram_ops, .prot = &pn_proto, .sock_type = SOCK_DGRAM, }; int __init isi_register(void) { return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); } void __exit isi_unregister(void) { phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); }
6 2 6 6 16 16 9 11 1 11 6 6 6 6 6 10 11 11 13 13 13 7 7 1 12 12 4 4 4 8 8 8 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 6 2 8 8 7 7 7 7 7 7 7 14 8 7 10 10 10 9 7 7 9 8 1 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 10 9 10 10 10 10 10 10 10 10 10 10 9 10 10 10 9 10 10 10 10 10 10 10 10 10 7 7 7 7 7 7 7 7 13 7 2 9 5 5 5 5 5 5 5 5 5 5 5 5 7 7 7 7 7 9 9 9 2 7 7 7 7 7 7 7 2 5 5 2 9 9 9 9 5 5 5 5 5 5 5 5 5 5 4 5 4 5 5 5 5 5 5 5 5 5 11 10 7 7 7 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 7 7 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/radix-tree.h> #include <linux/writeback.h> #include <linux/workqueue.h> #include <linux/kthread.h> #include <linux/slab.h> #include <linux/migrate.h> #include <linux/ratelimit.h> #include <linux/uuid.h> #include <linux/semaphore.h> #include <linux/error-injection.h> #include <linux/crc32c.h> #include <linux/sched/mm.h> #include <linux/unaligned.h> #include <crypto/hash.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "bio.h" #include "print-tree.h" #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" #include "free-space-tree.h" #include "dev-replace.h" #include "raid56.h" #include "sysfs.h" #include "qgroup.h" #include "compression.h" #include "tree-checker.h" #include "ref-verify.h" #include "block-group.h" #include "discard.h" #include "space-info.h" #include "zoned.h" #include "subpage.h" #include "fs.h" #include "accessors.h" #include "extent-tree.h" #include "root-tree.h" #include "defrag.h" #include "uuid-tree.h" #include "relocation.h" #include "scrub.h" #include "super.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ BTRFS_SUPER_FLAG_ERROR |\ BTRFS_SUPER_FLAG_SEEDING |\ BTRFS_SUPER_FLAG_METADUMP |\ BTRFS_SUPER_FLAG_METADUMP_V2) static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info); static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info); static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) { if (fs_info->csum_shash) crypto_free_shash(fs_info->csum_shash); } /* * Compute the csum of a btree block and store the result to provided buffer. */ static void csum_tree_block(struct extent_buffer *buf, u8 *result) { struct btrfs_fs_info *fs_info = buf->fs_info; int num_pages; u32 first_page_part; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); char *kaddr; int i; shash->tfm = fs_info->csum_shash; crypto_shash_init(shash); if (buf->addr) { /* Pages are contiguous, handle them as a big one. */ kaddr = buf->addr; first_page_part = fs_info->nodesize; num_pages = 1; } else { kaddr = folio_address(buf->folios[0]); first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize); num_pages = num_extent_pages(buf); } crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, first_page_part - BTRFS_CSUM_SIZE); /* * Multiple single-page folios case would reach here. * * nodesize <= PAGE_SIZE and large folio all handled by above * crypto_shash_update() already. */ for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) { kaddr = folio_address(buf->folios[i]); crypto_shash_update(shash, kaddr, PAGE_SIZE); } memset(result, 0, BTRFS_CSUM_SIZE); crypto_shash_final(shash, result); } /* * we can't consider a given block up to date unless the transid of the * block matches the transid in the parent node's pointer. This is how we * detect blocks that either didn't get written at all or got written * in the wrong place. */ int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic) { if (!extent_buffer_uptodate(eb)) return 0; if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 1; if (atomic) return -EAGAIN; if (!extent_buffer_uptodate(eb) || btrfs_header_generation(eb) != parent_transid) { btrfs_err_rl(eb->fs_info, "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu", eb->start, eb->read_mirror, parent_transid, btrfs_header_generation(eb)); clear_extent_buffer_uptodate(eb); return 0; } return 1; } static bool btrfs_supported_super_csum(u16 csum_type) { switch (csum_type) { case BTRFS_CSUM_TYPE_CRC32: case BTRFS_CSUM_TYPE_XXHASH: case BTRFS_CSUM_TYPE_SHA256: case BTRFS_CSUM_TYPE_BLAKE2: return true; default: return false; } } /* * Return 0 if the superblock checksum type matches the checksum value of that * algorithm. Pass the raw disk superblock data. */ int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, const struct btrfs_super_block *disk_sb) { char result[BTRFS_CSUM_SIZE]; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); shash->tfm = fs_info->csum_shash; /* * The super_block structure does not span the whole * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is * filled with zeros and is included in the checksum. */ crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result); if (memcmp(disk_sb->csum, result, fs_info->csum_size)) return 1; return 0; } static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { struct btrfs_fs_info *fs_info = eb->fs_info; int num_folios = num_extent_folios(eb); int ret = 0; if (sb_rdonly(fs_info->sb)) return -EROFS; for (int i = 0; i < num_folios; i++) { struct folio *folio = eb->folios[i]; u64 start = max_t(u64, eb->start, folio_pos(folio)); u64 end = min_t(u64, eb->start + eb->len, folio_pos(folio) + eb->folio_size); u32 len = end - start; ret = btrfs_repair_io_failure(fs_info, 0, start, len, start, folio, offset_in_folio(folio, start), mirror_num); if (ret) break; } return ret; } /* * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. * * @check: expected tree parentness check, see the comments of the * structure for details. */ int btrfs_read_extent_buffer(struct extent_buffer *eb, const struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; int failed = 0; int ret; int num_copies = 0; int mirror_num = 0; int failed_mirror = 0; ASSERT(check); while (1) { clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check); if (!ret) break; num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); if (num_copies == 1) break; if (!failed_mirror) { failed = 1; failed_mirror = eb->read_mirror; } mirror_num++; if (mirror_num == failed_mirror) mirror_num++; if (mirror_num > num_copies) break; } if (failed && !ret && failed_mirror) btrfs_repair_eb_io_failure(eb, failed_mirror); return ret; } /* * Checksum a dirty tree block before IO. */ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio) { struct extent_buffer *eb = bbio->private; struct btrfs_fs_info *fs_info = eb->fs_info; u64 found_start = btrfs_header_bytenr(eb); u64 last_trans; u8 result[BTRFS_CSUM_SIZE]; int ret; /* Btree blocks are always contiguous on disk. */ if (WARN_ON_ONCE(bbio->file_offset != eb->start)) return BLK_STS_IOERR; if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len)) return BLK_STS_IOERR; /* * If an extent_buffer is marked as EXTENT_BUFFER_ZONED_ZEROOUT, don't * checksum it but zero-out its content. This is done to preserve * ordering of I/O without unnecessarily writing out data. */ if (test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)) { memzero_extent_buffer(eb, 0, eb->len); return BLK_STS_OK; } if (WARN_ON_ONCE(found_start != eb->start)) return BLK_STS_IOERR; if (WARN_ON(!btrfs_folio_test_uptodate(fs_info, eb->folios[0], eb->start, eb->len))) return BLK_STS_IOERR; ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, offsetof(struct btrfs_header, fsid), BTRFS_FSID_SIZE) == 0); csum_tree_block(eb, result); if (btrfs_header_level(eb)) ret = btrfs_check_node(eb); else ret = btrfs_check_leaf(eb); if (ret < 0) goto error; /* * Also check the generation, the eb reached here must be newer than * last committed. Or something seriously wrong happened. */ last_trans = btrfs_get_last_trans_committed(fs_info); if (unlikely(btrfs_header_generation(eb) <= last_trans)) { ret = -EUCLEAN; btrfs_err(fs_info, "block=%llu bad generation, have %llu expect > %llu", eb->start, btrfs_header_generation(eb), last_trans); goto error; } write_extent_buffer(eb, result, 0, fs_info->csum_size); return BLK_STS_OK; error: btrfs_print_tree(eb, 0); btrfs_err(fs_info, "block=%llu write time tree block corruption detected", eb->start); /* * Be noisy if this is an extent buffer from a log tree. We don't abort * a transaction in case there's a bad log tree extent buffer, we just * fallback to a transaction commit. Still we want to know when there is * a bad log tree extent buffer, as that may signal a bug somewhere. */ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); return errno_to_blk_status(ret); } static bool check_tree_block_fsid(struct extent_buffer *eb) { struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; u8 fsid[BTRFS_FSID_SIZE]; read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid), BTRFS_FSID_SIZE); /* * alloc_fsid_devices() copies the fsid into fs_devices::metadata_uuid. * This is then overwritten by metadata_uuid if it is present in the * device_list_add(). The same true for a seed device as well. So use of * fs_devices::metadata_uuid is appropriate here. */ if (memcmp(fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) == 0) return false; list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) if (!memcmp(fsid, seed_devs->fsid, BTRFS_FSID_SIZE)) return false; return true; } /* Do basic extent buffer checks at read time */ int btrfs_validate_extent_buffer(struct extent_buffer *eb, const struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; u64 found_start; const u32 csum_size = fs_info->csum_size; u8 found_level; u8 result[BTRFS_CSUM_SIZE]; const u8 *header_csum; int ret = 0; const bool ignore_csum = btrfs_test_opt(fs_info, IGNOREMETACSUMS); ASSERT(check); found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { btrfs_err_rl(fs_info, "bad tree block start, mirror %u want %llu have %llu", eb->read_mirror, eb->start, found_start); ret = -EIO; goto out; } if (check_tree_block_fsid(eb)) { btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u", eb->start, eb->read_mirror); ret = -EIO; goto out; } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "bad tree block level, mirror %u level %d on logical %llu", eb->read_mirror, btrfs_header_level(eb), eb->start); ret = -EIO; goto out; } csum_tree_block(eb, result); header_csum = folio_address(eb->folios[0]) + get_eb_offset_in_folio(eb, offsetof(struct btrfs_header, csum)); if (memcmp(result, header_csum, csum_size) != 0) { btrfs_warn_rl(fs_info, "checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d%s", eb->start, eb->read_mirror, CSUM_FMT_VALUE(csum_size, header_csum), CSUM_FMT_VALUE(csum_size, result), btrfs_header_level(eb), ignore_csum ? ", ignored" : ""); if (!ignore_csum) { ret = -EUCLEAN; goto out; } } if (found_level != check->level) { btrfs_err(fs_info, "level verify failed on logical %llu mirror %u wanted %u found %u", eb->start, eb->read_mirror, check->level, found_level); ret = -EIO; goto out; } if (unlikely(check->transid && btrfs_header_generation(eb) != check->transid)) { btrfs_err_rl(eb->fs_info, "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu", eb->start, eb->read_mirror, check->transid, btrfs_header_generation(eb)); ret = -EIO; goto out; } if (check->has_first_key) { const struct btrfs_key *expect_key = &check->first_key; struct btrfs_key found_key; if (found_level) btrfs_node_key_to_cpu(eb, &found_key, 0); else btrfs_item_key_to_cpu(eb, &found_key, 0); if (unlikely(btrfs_comp_cpu_keys(expect_key, &found_key))) { btrfs_err(fs_info, "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", eb->start, check->transid, expect_key->objectid, expect_key->type, expect_key->offset, found_key.objectid, found_key.type, found_key.offset); ret = -EUCLEAN; goto out; } } if (check->owner_root) { ret = btrfs_check_eb_owner(eb, check->owner_root); if (ret < 0) goto out; } /* * If this is a leaf block and it is corrupt, set the corrupt bit so * that we don't try and read the other copies of this block, just * return -EIO. */ if (found_level == 0 && btrfs_check_leaf(eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } if (found_level > 0 && btrfs_check_node(eb)) ret = -EIO; if (ret) btrfs_err(fs_info, "read time tree block corruption detected on logical %llu mirror %u", eb->start, eb->read_mirror); out: return ret; } #ifdef CONFIG_MIGRATION static int btree_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { /* * we can't safely write a btree page from here, * we haven't done the locking hook */ if (folio_test_dirty(src)) return -EAGAIN; /* * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ if (folio_get_private(src) && !filemap_release_folio(src, GFP_KERNEL)) return -EAGAIN; return migrate_folio(mapping, dst, src, mode); } #else #define btree_migrate_folio NULL #endif static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; if (wbc->sync_mode == WB_SYNC_NONE) { struct btrfs_fs_info *fs_info; if (wbc->for_kupdate) return 0; fs_info = inode_to_fs_info(mapping->host); /* this is a bit racy, but that's ok */ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, BTRFS_DIRTY_METADATA_THRESH, fs_info->dirty_metadata_batch); if (ret < 0) return 0; } return btree_write_cache_pages(mapping, wbc); } static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags) { if (folio_test_writeback(folio) || folio_test_dirty(folio)) return false; return try_release_extent_buffer(folio); } static void btree_invalidate_folio(struct folio *folio, size_t offset, size_t length) { struct extent_io_tree *tree; tree = &folio_to_inode(folio)->io_tree; extent_invalidate_folio(tree, folio, offset); btree_release_folio(folio, GFP_NOFS); if (folio_get_private(folio)) { btrfs_warn(folio_to_fs_info(folio), "folio private not zero on folio %llu", (unsigned long long)folio_pos(folio)); folio_detach_private(folio); } } #ifdef DEBUG static bool btree_dirty_folio(struct address_space *mapping, struct folio *folio) { struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host); struct btrfs_subpage_info *spi = fs_info->subpage_info; struct btrfs_subpage *subpage; struct extent_buffer *eb; int cur_bit = 0; u64 page_start = folio_pos(folio); if (fs_info->sectorsize == PAGE_SIZE) { eb = folio_get_private(folio); BUG_ON(!eb); BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); BUG_ON(!atomic_read(&eb->refs)); btrfs_assert_tree_write_locked(eb); return filemap_dirty_folio(mapping, folio); } ASSERT(spi); subpage = folio_get_private(folio); for (cur_bit = spi->dirty_offset; cur_bit < spi->dirty_offset + spi->bitmap_nr_bits; cur_bit++) { unsigned long flags; u64 cur; spin_lock_irqsave(&subpage->lock, flags); if (!test_bit(cur_bit, subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); continue; } spin_unlock_irqrestore(&subpage->lock, flags); cur = page_start + cur_bit * fs_info->sectorsize; eb = find_extent_buffer(fs_info, cur); ASSERT(eb); ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); ASSERT(atomic_read(&eb->refs)); btrfs_assert_tree_write_locked(eb); free_extent_buffer(eb); cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1; } return filemap_dirty_folio(mapping, folio); } #else #define btree_dirty_folio filemap_dirty_folio #endif static const struct address_space_operations btree_aops = { .writepages = btree_writepages, .release_folio = btree_release_folio, .invalidate_folio = btree_invalidate_folio, .migrate_folio = btree_migrate_folio, .dirty_folio = btree_dirty_folio, }; struct extent_buffer *btrfs_find_create_tree_block( struct btrfs_fs_info *fs_info, u64 bytenr, u64 owner_root, int level) { if (btrfs_is_testing(fs_info)) return alloc_test_extent_buffer(fs_info, bytenr); return alloc_extent_buffer(fs_info, bytenr, owner_root, level); } /* * Read tree block at logical address @bytenr and do variant basic but critical * verification. * * @check: expected tree parentness check, see comments of the * structure for details. */ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_tree_parent_check *check) { struct extent_buffer *buf = NULL; int ret; ASSERT(check); buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root, check->level); if (IS_ERR(buf)) return buf; ret = btrfs_read_extent_buffer(buf, check); if (ret) { free_extent_buffer_stale(buf); return ERR_PTR(ret); } return buf; } static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { bool dummy = btrfs_is_testing(fs_info); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); root->fs_info = fs_info; root->root_key.objectid = objectid; root->node = NULL; root->commit_root = NULL; root->state = 0; RB_CLEAR_NODE(&root->rb_node); btrfs_set_root_last_trans(root, 0); root->free_objectid = 0; root->nr_delalloc_inodes = 0; root->nr_ordered_extents = 0; xa_init(&root->inodes); xa_init(&root->delayed_nodes); btrfs_init_root_block_rsv(root); INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->root_list); INIT_LIST_HEAD(&root->delalloc_inodes); INIT_LIST_HEAD(&root->delalloc_root); INIT_LIST_HEAD(&root->ordered_extents); INIT_LIST_HEAD(&root->ordered_root); INIT_LIST_HEAD(&root->reloc_dirty_list); spin_lock_init(&root->delalloc_lock); spin_lock_init(&root->ordered_extent_lock); spin_lock_init(&root->accounting_lock); spin_lock_init(&root->qgroup_meta_rsv_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); mutex_init(&root->ordered_extent_mutex); mutex_init(&root->delalloc_mutex); init_waitqueue_head(&root->qgroup_flush_wait); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); INIT_LIST_HEAD(&root->log_ctxs[0]); INIT_LIST_HEAD(&root->log_ctxs[1]); atomic_set(&root->log_commit[0], 0); atomic_set(&root->log_commit[1], 0); atomic_set(&root->log_writers, 0); atomic_set(&root->log_batch, 0); refcount_set(&root->refs, 1); atomic_set(&root->snapshot_force_cow, 0); atomic_set(&root->nr_swapfiles, 0); btrfs_set_root_log_transid(root, 0); root->log_transid_committed = -1; btrfs_set_root_last_log_commit(root, 0); root->anon_dev = 0; if (!dummy) { extent_io_tree_init(fs_info, &root->dirty_log_pages, IO_TREE_ROOT_DIRTY_LOG_PAGES); extent_io_tree_init(fs_info, &root->log_csum_range, IO_TREE_LOG_CSUM_RANGE); } spin_lock_init(&root->root_item_lock); btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&root->leak_list); spin_lock(&fs_info->fs_roots_radix_lock); list_add_tail(&root->leak_list, &fs_info->allocated_roots); spin_unlock(&fs_info->fs_roots_radix_lock); #endif } static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, u64 objectid, gfp_t flags) { struct btrfs_root *root = kzalloc(sizeof(*root), flags); if (root) __setup_root(root, fs_info, objectid); return root; } #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; if (!fs_info) return ERR_PTR(-EINVAL); root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); /* We don't use the stripesize in selftest, set it as sectorsize */ root->alloc_bytenr = 0; return root; } #endif static int global_root_cmp(struct rb_node *a_node, const struct rb_node *b_node) { const struct btrfs_root *a = rb_entry(a_node, struct btrfs_root, rb_node); const struct btrfs_root *b = rb_entry(b_node, struct btrfs_root, rb_node); return btrfs_comp_cpu_keys(&a->root_key, &b->root_key); } static int global_root_key_cmp(const void *k, const struct rb_node *node) { const struct btrfs_key *key = k; const struct btrfs_root *root = rb_entry(node, struct btrfs_root, rb_node); return btrfs_comp_cpu_keys(key, &root->root_key); } int btrfs_global_root_insert(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *tmp; int ret = 0; write_lock(&fs_info->global_root_lock); tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp); write_unlock(&fs_info->global_root_lock); if (tmp) { ret = -EEXIST; btrfs_warn(fs_info, "global root %llu %llu already exists", btrfs_root_id(root), root->root_key.offset); } return ret; } void btrfs_global_root_delete(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; write_lock(&fs_info->global_root_lock); rb_erase(&root->rb_node, &fs_info->global_root_tree); write_unlock(&fs_info->global_root_lock); } struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info, struct btrfs_key *key) { struct rb_node *node; struct btrfs_root *root = NULL; read_lock(&fs_info->global_root_lock); node = rb_find(key, &fs_info->global_root_tree, global_root_key_cmp); if (node) root = container_of(node, struct btrfs_root, rb_node); read_unlock(&fs_info->global_root_lock); return root; } static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_block_group *block_group; u64 ret; if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) return 0; if (bytenr) block_group = btrfs_lookup_block_group(fs_info, bytenr); else block_group = btrfs_lookup_first_block_group(fs_info, bytenr); ASSERT(block_group); if (!block_group) return 0; ret = block_group->global_root_id; btrfs_put_block_group(block_group); return ret; } struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_key key = { .objectid = BTRFS_CSUM_TREE_OBJECTID, .type = BTRFS_ROOT_ITEM_KEY, .offset = btrfs_global_root_id(fs_info, bytenr), }; return btrfs_global_root(fs_info, &key); } struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_key key = { .objectid = BTRFS_EXTENT_TREE_OBJECTID, .type = BTRFS_ROOT_ITEM_KEY, .offset = btrfs_global_root_id(fs_info, bytenr), }; return btrfs_global_root(fs_info, &key); } struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, u64 objectid) { struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_buffer *leaf; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root; struct btrfs_key key; unsigned int nofs_flag; int ret = 0; /* * We're holding a transaction handle, so use a NOFS memory allocation * context to avoid deadlock if reclaim happens. */ nofs_flag = memalloc_nofs_save(); root = btrfs_alloc_root(fs_info, objectid, GFP_KERNEL); memalloc_nofs_restore(nofs_flag); if (!root) return ERR_PTR(-ENOMEM); root->root_key.objectid = objectid; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0, 0, BTRFS_NESTING_NORMAL); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); leaf = NULL; goto fail; } root->node = leaf; btrfs_mark_buffer_dirty(trans, leaf); root->commit_root = btrfs_root_node(root); set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); btrfs_set_root_flags(&root->root_item, 0); btrfs_set_root_limit(&root->root_item, 0); btrfs_set_root_bytenr(&root->root_item, leaf->start); btrfs_set_root_generation(&root->root_item, trans->transid); btrfs_set_root_level(&root->root_item, 0); btrfs_set_root_refs(&root->root_item, 1); btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0); if (is_fstree(objectid)) generate_random_guid(root->root_item.uuid); else export_guid(root->root_item.uuid, &guid_null); btrfs_set_root_drop_level(&root->root_item, 0); btrfs_tree_unlock(leaf); key.objectid = objectid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item); if (ret) goto fail; return root; fail: btrfs_put_root(root); return ERR_PTR(ret); } static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *root; root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; return root; } int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct extent_buffer *leaf; /* * DON'T set SHAREABLE bit for log trees. * * Log trees are not exposed to user space thus can't be snapshotted, * and they go away before a real commit is actually done. * * They do store pointers to file data extents, and those reference * counts still get updated (along with back refs to the log tree). */ leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0, 0, BTRFS_NESTING_NORMAL); if (IS_ERR(leaf)) return PTR_ERR(leaf); root->node = leaf; btrfs_mark_buffer_dirty(trans, root->node); btrfs_tree_unlock(root->node); return 0; } int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *log_root; log_root = alloc_log_tree(trans, fs_info); if (IS_ERR(log_root)) return PTR_ERR(log_root); if (!btrfs_is_zoned(fs_info)) { int ret = btrfs_alloc_log_tree_node(trans, log_root); if (ret) { btrfs_put_root(log_root); return ret; } } WARN_ON(fs_info->log_root_tree); fs_info->log_root_tree = log_root; return 0; } int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log_root; struct btrfs_inode_item *inode_item; int ret; log_root = alloc_log_tree(trans, fs_info); if (IS_ERR(log_root)) return PTR_ERR(log_root); ret = btrfs_alloc_log_tree_node(trans, log_root); if (ret) { btrfs_put_root(log_root); return ret; } btrfs_set_root_last_trans(log_root, trans->transid); log_root->root_key.offset = btrfs_root_id(root); inode_item = &log_root->root_item.inode; btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); btrfs_set_stack_inode_nbytes(inode_item, fs_info->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); WARN_ON(root->log_root); root->log_root = log_root; btrfs_set_root_log_transid(root, 0); root->log_transid_committed = -1; btrfs_set_root_last_log_commit(root, 0); return 0; } static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, struct btrfs_path *path, const struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_tree_parent_check check = { 0 }; struct btrfs_fs_info *fs_info = tree_root->fs_info; u64 generation; int ret; int level; root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); ret = btrfs_find_root(tree_root, key, path, &root->root_item, &root->root_key); if (ret) { if (ret > 0) ret = -ENOENT; goto fail; } generation = btrfs_root_generation(&root->root_item); level = btrfs_root_level(&root->root_item); check.level = level; check.transid = generation; check.owner_root = key->objectid; root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item), &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; goto fail; } if (!btrfs_buffer_uptodate(root->node, generation, 0)) { ret = -EIO; goto fail; } /* * For real fs, and not log/reloc trees, root owner must * match its root node owner */ if (!btrfs_is_testing(fs_info) && btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID && btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID && btrfs_root_id(root) != btrfs_header_owner(root->node)) { btrfs_crit(fs_info, "root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu", btrfs_root_id(root), root->node->start, btrfs_header_owner(root->node), btrfs_root_id(root)); ret = -EUCLEAN; goto fail; } root->commit_root = btrfs_root_node(root); return root; fail: btrfs_put_root(root); return ERR_PTR(ret); } struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, const struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_path *path; path = btrfs_alloc_path(); if (!path) return ERR_PTR(-ENOMEM); root = read_tree_root_path(tree_root, path, key); btrfs_free_path(path); return root; } /* * Initialize subvolume root in-memory structure * * @anon_dev: anonymous device to attach to the root, if zero, allocate new */ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) { int ret; btrfs_drew_lock_init(&root->snapshot_lock); if (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID && !btrfs_is_data_reloc_root(root) && is_fstree(btrfs_root_id(root))) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } /* * Don't assign anonymous block device to roots that are not exposed to * userspace, the id pool is limited to 1M */ if (is_fstree(btrfs_root_id(root)) && btrfs_root_refs(&root->root_item) > 0) { if (!anon_dev) { ret = get_anon_bdev(&root->anon_dev); if (ret) goto fail; } else { root->anon_dev = anon_dev; } } mutex_lock(&root->objectid_mutex); ret = btrfs_init_root_free_objectid(root); if (ret) { mutex_unlock(&root->objectid_mutex); goto fail; } ASSERT(root->free_objectid <= BTRFS_LAST_FREE_OBJECTID); mutex_unlock(&root->objectid_mutex); return 0; fail: /* The caller is responsible to call btrfs_free_fs_root */ return ret; } static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, u64 root_id) { struct btrfs_root *root; spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)root_id); root = btrfs_grab_root(root); spin_unlock(&fs_info->fs_roots_radix_lock); return root; } static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info, u64 objectid) { struct btrfs_key key = { .objectid = objectid, .type = BTRFS_ROOT_ITEM_KEY, .offset = 0, }; switch (objectid) { case BTRFS_ROOT_TREE_OBJECTID: return btrfs_grab_root(fs_info->tree_root); case BTRFS_EXTENT_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); case BTRFS_CHUNK_TREE_OBJECTID: return btrfs_grab_root(fs_info->chunk_root); case BTRFS_DEV_TREE_OBJECTID: return btrfs_grab_root(fs_info->dev_root); case BTRFS_CSUM_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); case BTRFS_QUOTA_TREE_OBJECTID: return btrfs_grab_root(fs_info->quota_root); case BTRFS_UUID_TREE_OBJECTID: return btrfs_grab_root(fs_info->uuid_root); case BTRFS_BLOCK_GROUP_TREE_OBJECTID: return btrfs_grab_root(fs_info->block_group_root); case BTRFS_FREE_SPACE_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); case BTRFS_RAID_STRIPE_TREE_OBJECTID: return btrfs_grab_root(fs_info->stripe_root); default: return NULL; } } int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { int ret; ret = radix_tree_preload(GFP_NOFS); if (ret) return ret; spin_lock(&fs_info->fs_roots_radix_lock); ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)btrfs_root_id(root), root); if (ret == 0) { btrfs_grab_root(root); set_bit(BTRFS_ROOT_IN_RADIX, &root->state); } spin_unlock(&fs_info->fs_roots_radix_lock); radix_tree_preload_end(); return ret; } void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info) { #ifdef CONFIG_BTRFS_DEBUG struct btrfs_root *root; while (!list_empty(&fs_info->allocated_roots)) { char buf[BTRFS_ROOT_NAME_BUF_LEN]; root = list_first_entry(&fs_info->allocated_roots, struct btrfs_root, leak_list); btrfs_err(fs_info, "leaked root %s refcount %d", btrfs_root_name(&root->root_key, buf), refcount_read(&root->refs)); WARN_ON_ONCE(1); while (refcount_read(&root->refs) > 1) btrfs_put_root(root); btrfs_put_root(root); } #endif } static void free_global_roots(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct rb_node *node; while ((node = rb_first_postorder(&fs_info->global_root_tree)) != NULL) { root = rb_entry(node, struct btrfs_root, rb_node); rb_erase(&root->rb_node, &fs_info->global_root_tree); btrfs_put_root(root); } } void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) { struct percpu_counter *em_counter = &fs_info->evictable_extent_maps; percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); percpu_counter_destroy(&fs_info->ordered_bytes); if (percpu_counter_initialized(em_counter)) ASSERT(percpu_counter_sum_positive(em_counter) == 0); percpu_counter_destroy(em_counter); percpu_counter_destroy(&fs_info->dev_replace.bio_counter); btrfs_free_csum_hash(fs_info); btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); kfree(fs_info->balance_ctl); kfree(fs_info->delayed_root); free_global_roots(fs_info); btrfs_put_root(fs_info->tree_root); btrfs_put_root(fs_info->chunk_root); btrfs_put_root(fs_info->dev_root); btrfs_put_root(fs_info->quota_root); btrfs_put_root(fs_info->uuid_root); btrfs_put_root(fs_info->fs_root); btrfs_put_root(fs_info->data_reloc_root); btrfs_put_root(fs_info->block_group_root); btrfs_put_root(fs_info->stripe_root); btrfs_check_leaked_roots(fs_info); btrfs_extent_buffer_leak_debug_check(fs_info); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); kvfree(fs_info); } /* * Get an in-memory reference of a root structure. * * For essential trees like root/extent tree, we grab it from fs_info directly. * For subvolume trees, we check the cached filesystem roots first. If not * found, then read it from disk and add it to cached fs roots. * * Caller should release the root by calling btrfs_put_root() after the usage. * * NOTE: Reloc and log trees can't be read by this function as they share the * same root objectid. * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; struct btrfs_path *path; struct btrfs_key key; int ret; root = btrfs_get_global_root(fs_info, objectid); if (root) return root; /* * If we're called for non-subvolume trees, and above function didn't * find one, do not try to read it from disk. * * This is namely for free-space-tree and quota tree, which can change * at runtime and should only be grabbed from fs_info. */ if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) return ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { /* * Some other caller may have read out the newly inserted * subvolume already (for things like backref walk etc). Not * that common but still possible. In that case, we just need * to free the anon_dev. */ if (unlikely(anon_dev && *anon_dev)) { free_anon_bdev(*anon_dev); *anon_dev = 0; } if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); } return root; } key.objectid = objectid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; root = btrfs_read_tree_root(fs_info->tree_root, &key); if (IS_ERR(root)) return root; if (check_ref && btrfs_root_refs(&root->root_item) == 0) { ret = -ENOENT; goto fail; } ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto fail; } key.objectid = BTRFS_ORPHAN_OBJECTID; key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = objectid; ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); btrfs_free_path(path); if (ret < 0) goto fail; if (ret == 0) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); ret = btrfs_insert_fs_root(fs_info, root); if (ret) { if (ret == -EEXIST) { btrfs_put_root(root); goto again; } goto fail; } return root; fail: /* * If our caller provided us an anonymous device, then it's his * responsibility to free it in case we fail. So we have to set our * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); } /* * Get in-memory reference of a root structure * * @objectid: tree objectid * @check_ref: if set, verify that the tree exists and the item has at least * one reference */ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* * Get in-memory reference of a root structure, created as new, optionally pass * the anonymous block device id * * @objectid: tree objectid * @anon_dev: if NULL, allocate a new anonymous block device or use the * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } /* * Return a root for the given objectid. * * @fs_info: the fs_info * @objectid: the objectid we need to lookup * * This is exclusively used for backref walking, and exists specifically because * of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref * creation time, which means we may have to read the tree_root in order to look * up a fs root that is not in memory. If the root is not in memory we will * read the tree root commit root and look up the fs root from there. This is a * temporary root, it will not be inserted into the radix tree as it doesn't * have the most uptodate information, it'll simply be discarded once the * backref code is finished using the root. */ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid) { struct btrfs_root *root; struct btrfs_key key; ASSERT(path->search_commit_root && path->skip_locking); /* * This can return -ENOENT if we ask for a root that doesn't exist, but * since this is called via the backref walking code we won't be looking * up a root that doesn't exist, unless there's corruption. So if root * != NULL just return it. */ root = btrfs_get_global_root(fs_info, objectid); if (root) return root; root = btrfs_lookup_fs_root(fs_info, objectid); if (root) return root; key.objectid = objectid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; root = read_tree_root_path(fs_info->tree_root, path, &key); btrfs_release_path(path); return root; } static int cleaner_kthread(void *arg) { struct btrfs_fs_info *fs_info = arg; int again; while (1) { again = 0; set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); /* Make the cleaner go to sleep early. */ if (btrfs_need_cleaner_sleep(fs_info)) goto sleep; /* * Do not do anything if we might cause open_ctree() to block * before we have finished mounting the filesystem. */ if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) goto sleep; if (!mutex_trylock(&fs_info->cleaner_mutex)) goto sleep; /* * Avoid the problem that we change the status of the fs * during the above check and trylock. */ if (btrfs_need_cleaner_sleep(fs_info)) { mutex_unlock(&fs_info->cleaner_mutex); goto sleep; } if (test_and_clear_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags)) btrfs_sysfs_feature_update(fs_info); btrfs_run_delayed_iputs(fs_info); again = btrfs_clean_one_deleted_snapshot(fs_info); mutex_unlock(&fs_info->cleaner_mutex); /* * The defragger has dealt with the R/O remount and umount, * needn't do anything special here. */ btrfs_run_defrag_inodes(fs_info); /* * Acquires fs_info->reclaim_bgs_lock to avoid racing * with relocation (btrfs_relocate_chunk) and relocation * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) * after acquiring fs_info->reclaim_bgs_lock. So we * can't hold, nor need to, fs_info->cleaner_mutex when deleting * unused block groups. */ btrfs_delete_unused_bgs(fs_info); /* * Reclaim block groups in the reclaim_bgs list after we deleted * all unused block_groups. This possibly gives us some more free * space. */ btrfs_reclaim_bgs(fs_info); sleep: clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); if (kthread_should_park()) kthread_parkme(); if (kthread_should_stop()) return 0; if (!again) { set_current_state(TASK_INTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); } } } static int transaction_kthread(void *arg) { struct btrfs_root *root = arg; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; struct btrfs_transaction *cur; u64 transid; time64_t delta; unsigned long delay; bool cannot_commit; do { cannot_commit = false; delay = msecs_to_jiffies(fs_info->commit_interval * 1000); mutex_lock(&fs_info->transaction_kthread_mutex); spin_lock(&fs_info->trans_lock); cur = fs_info->running_transaction; if (!cur) { spin_unlock(&fs_info->trans_lock); goto sleep; } delta = ktime_get_seconds() - cur->start_time; if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) && cur->state < TRANS_STATE_COMMIT_PREP && delta < fs_info->commit_interval) { spin_unlock(&fs_info->trans_lock); delay -= msecs_to_jiffies((delta - 1) * 1000); delay = min(delay, msecs_to_jiffies(fs_info->commit_interval * 1000)); goto sleep; } transid = cur->transid; spin_unlock(&fs_info->trans_lock); /* If the file system is aborted, this will always fail. */ trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT) cannot_commit = true; goto sleep; } if (transid == trans->transid) { btrfs_commit_transaction(trans); } else { btrfs_end_transaction(trans); } sleep: wake_up_process(fs_info->cleaner_kthread); mutex_unlock(&fs_info->transaction_kthread_mutex); if (BTRFS_FS_ERROR(fs_info)) btrfs_cleanup_transaction(fs_info); if (!kthread_should_stop() && (!btrfs_transaction_blocked(fs_info) || cannot_commit)) schedule_timeout_interruptible(delay); } while (!kthread_should_stop()); return 0; } /* * This will find the highest generation in the array of root backups. The * index of the highest array is returned, or -EINVAL if we can't find * anything. * * We check to make sure the array is valid by comparing the * generation of the latest root in the array with the generation * in the super block. If they don't match we pitch it. */ static int find_newest_super_backup(struct btrfs_fs_info *info) { const u64 newest_gen = btrfs_super_generation(info->super_copy); u64 cur; struct btrfs_root_backup *root_backup; int i; for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { root_backup = info->super_copy->super_roots + i; cur = btrfs_backup_tree_root_gen(root_backup); if (cur == newest_gen) return i; } return -EINVAL; } /* * copy all the root pointers into the super backup array. * this will bump the backup pointer by one when it is * done */ static void backup_super_roots(struct btrfs_fs_info *info) { const int next_backup = info->backup_root_index; struct btrfs_root_backup *root_backup; root_backup = info->super_for_commit->super_roots + next_backup; /* * make sure all of our padding and empty slots get zero filled * regardless of which ones we use today */ memset(root_backup, 0, sizeof(*root_backup)); info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS; btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start); btrfs_set_backup_tree_root_gen(root_backup, btrfs_header_generation(info->tree_root->node)); btrfs_set_backup_tree_root_level(root_backup, btrfs_header_level(info->tree_root->node)); btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start); btrfs_set_backup_chunk_root_gen(root_backup, btrfs_header_generation(info->chunk_root->node)); btrfs_set_backup_chunk_root_level(root_backup, btrfs_header_level(info->chunk_root->node)); if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) { struct btrfs_root *extent_root = btrfs_extent_root(info, 0); struct btrfs_root *csum_root = btrfs_csum_root(info, 0); btrfs_set_backup_extent_root(root_backup, extent_root->node->start); btrfs_set_backup_extent_root_gen(root_backup, btrfs_header_generation(extent_root->node)); btrfs_set_backup_extent_root_level(root_backup, btrfs_header_level(extent_root->node)); btrfs_set_backup_csum_root(root_backup, csum_root->node->start); btrfs_set_backup_csum_root_gen(root_backup, btrfs_header_generation(csum_root->node)); btrfs_set_backup_csum_root_level(root_backup, btrfs_header_level(csum_root->node)); } /* * we might commit during log recovery, which happens before we set * the fs_root. Make sure it is valid before we fill it in. */ if (info->fs_root && info->fs_root->node) { btrfs_set_backup_fs_root(root_backup, info->fs_root->node->start); btrfs_set_backup_fs_root_gen(root_backup, btrfs_header_generation(info->fs_root->node)); btrfs_set_backup_fs_root_level(root_backup, btrfs_header_level(info->fs_root->node)); } btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start); btrfs_set_backup_dev_root_gen(root_backup, btrfs_header_generation(info->dev_root->node)); btrfs_set_backup_dev_root_level(root_backup, btrfs_header_level(info->dev_root->node)); btrfs_set_backup_total_bytes(root_backup, btrfs_super_total_bytes(info->super_copy)); btrfs_set_backup_bytes_used(root_backup, btrfs_super_bytes_used(info->super_copy)); btrfs_set_backup_num_devices(root_backup, btrfs_super_num_devices(info->super_copy)); /* * if we don't copy this out to the super_copy, it won't get remembered * for the next commit */ memcpy(&info->super_copy->super_roots, &info->super_for_commit->super_roots, sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); } /* * Reads a backup root based on the passed priority. Prio 0 is the newest, prio * 1/2/3 are 2nd newest/3rd newest/4th (oldest) backup roots * * @fs_info: filesystem whose backup roots need to be read * @priority: priority of backup root required * * Returns backup root index on success and -EINVAL otherwise. */ static int read_backup_root(struct btrfs_fs_info *fs_info, u8 priority) { int backup_index = find_newest_super_backup(fs_info); struct btrfs_super_block *super = fs_info->super_copy; struct btrfs_root_backup *root_backup; if (priority < BTRFS_NUM_BACKUP_ROOTS && backup_index >= 0) { if (priority == 0) return backup_index; backup_index = backup_index + BTRFS_NUM_BACKUP_ROOTS - priority; backup_index %= BTRFS_NUM_BACKUP_ROOTS; } else { return -EINVAL; } root_backup = super->super_roots + backup_index; btrfs_set_super_generation(super, btrfs_backup_tree_root_gen(root_backup)); btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup)); btrfs_set_super_root_level(super, btrfs_backup_tree_root_level(root_backup)); btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup)); /* * Fixme: the total bytes and num_devices need to match or we should * need a fsck */ btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup)); btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup)); return backup_index; } /* helper to cleanup workers */ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) { btrfs_destroy_workqueue(fs_info->fixup_workers); btrfs_destroy_workqueue(fs_info->delalloc_workers); btrfs_destroy_workqueue(fs_info->workers); if (fs_info->endio_workers) destroy_workqueue(fs_info->endio_workers); if (fs_info->rmw_workers) destroy_workqueue(fs_info->rmw_workers); if (fs_info->compressed_write_workers) destroy_workqueue(fs_info->compressed_write_workers); btrfs_destroy_workqueue(fs_info->endio_write_workers); btrfs_destroy_workqueue(fs_info->endio_freespace_worker); btrfs_destroy_workqueue(fs_info->delayed_workers); btrfs_destroy_workqueue(fs_info->caching_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); if (fs_info->discard_ctl.discard_workers) destroy_workqueue(fs_info->discard_ctl.discard_workers); /* * Now that all other work queues are destroyed, we can safely destroy * the queues used for metadata I/O, since tasks from those other work * queues can do metadata I/O operations. */ if (fs_info->endio_meta_workers) destroy_workqueue(fs_info->endio_meta_workers); } static void free_root_extent_buffers(struct btrfs_root *root) { if (root) { free_extent_buffer(root->node); free_extent_buffer(root->commit_root); root->node = NULL; root->commit_root = NULL; } } static void free_global_root_pointers(struct btrfs_fs_info *fs_info) { struct btrfs_root *root, *tmp; rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->global_root_tree, rb_node) free_root_extent_buffers(root); } /* helper to cleanup tree roots */ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) { free_root_extent_buffers(info->tree_root); free_global_root_pointers(info); free_root_extent_buffers(info->dev_root); free_root_extent_buffers(info->quota_root); free_root_extent_buffers(info->uuid_root); free_root_extent_buffers(info->fs_root); free_root_extent_buffers(info->data_reloc_root); free_root_extent_buffers(info->block_group_root); free_root_extent_buffers(info->stripe_root); if (free_chunk_root) free_root_extent_buffers(info->chunk_root); } void btrfs_put_root(struct btrfs_root *root) { if (!root) return; if (refcount_dec_and_test(&root->refs)) { if (WARN_ON(!xa_empty(&root->inodes))) xa_destroy(&root->inodes); WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); free_root_extent_buffers(root); #ifdef CONFIG_BTRFS_DEBUG spin_lock(&root->fs_info->fs_roots_radix_lock); list_del_init(&root->leak_list); spin_unlock(&root->fs_info->fs_roots_radix_lock); #endif kfree(root); } } void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *gang[8]; int i; while (!list_empty(&fs_info->dead_roots)) { gang[0] = list_entry(fs_info->dead_roots.next, struct btrfs_root, root_list); list_del(&gang[0]->root_list); if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) btrfs_drop_and_free_fs_root(fs_info, gang[0]); btrfs_put_root(gang[0]); } while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } } static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) { mutex_init(&fs_info->scrub_lock); atomic_set(&fs_info->scrubs_running, 0); atomic_set(&fs_info->scrub_pause_req, 0); atomic_set(&fs_info->scrubs_paused, 0); atomic_set(&fs_info->scrub_cancel_req, 0); init_waitqueue_head(&fs_info->scrub_pause_wait); refcount_set(&fs_info->scrub_workers_refcnt, 0); } static void btrfs_init_balance(struct btrfs_fs_info *fs_info) { spin_lock_init(&fs_info->balance_lock); mutex_init(&fs_info->balance_mutex); atomic_set(&fs_info->balance_pause_req, 0); atomic_set(&fs_info->balance_cancel_req, 0); fs_info->balance_ctl = NULL; init_waitqueue_head(&fs_info->balance_wait_q); atomic_set(&fs_info->reloc_cancel_req, 0); } static int btrfs_init_btree_inode(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID, fs_info->tree_root); struct inode *inode; inode = new_inode(sb); if (!inode) return -ENOMEM; btrfs_set_inode_number(BTRFS_I(inode), BTRFS_BTREE_INODE_OBJECTID); set_nlink(inode, 1); /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of * the devices in the system */ inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &btree_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, IO_TREE_BTREE_INODE_IO); extent_map_tree_init(&BTRFS_I(inode)->extent_tree); BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root); set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); __insert_inode_hash(inode, hash); fs_info->btree_inode = inode; return 0; } static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) { mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); init_rwsem(&fs_info->dev_replace.rwsem); init_waitqueue_head(&fs_info->dev_replace.replace_wait); } static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) { spin_lock_init(&fs_info->qgroup_lock); mutex_init(&fs_info->qgroup_ioctl_lock); fs_info->qgroup_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; fs_info->qgroup_ulist = NULL; fs_info->qgroup_rescan_running = false; fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL; mutex_init(&fs_info->qgroup_rescan_lock); } static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) { u32 max_active = fs_info->thread_pool_size; unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE; fs_info->workers = btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16); fs_info->delalloc_workers = btrfs_alloc_workqueue(fs_info, "delalloc", flags, max_active, 2); fs_info->flush_workers = btrfs_alloc_workqueue(fs_info, "flush_delalloc", flags, max_active, 0); fs_info->caching_workers = btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0); fs_info->fixup_workers = btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags); fs_info->endio_workers = alloc_workqueue("btrfs-endio", flags, max_active); fs_info->endio_meta_workers = alloc_workqueue("btrfs-endio-meta", flags, max_active); fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active); fs_info->endio_write_workers = btrfs_alloc_workqueue(fs_info, "endio-write", flags, max_active, 2); fs_info->compressed_write_workers = alloc_workqueue("btrfs-compressed-write", flags, max_active); fs_info->endio_freespace_worker = btrfs_alloc_workqueue(fs_info, "freespace-write", flags, max_active, 0); fs_info->delayed_workers = btrfs_alloc_workqueue(fs_info, "delayed-meta", flags, max_active, 0); fs_info->qgroup_rescan_workers = btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan", ordered_flags); fs_info->discard_ctl.discard_workers = alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->compressed_write_workers && fs_info->endio_write_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->fixup_workers && fs_info->delayed_workers && fs_info->qgroup_rescan_workers && fs_info->discard_ctl.discard_workers)) { return -ENOMEM; } return 0; } static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) { struct crypto_shash *csum_shash; const char *csum_driver = btrfs_super_csum_driver(csum_type); csum_shash = crypto_alloc_shash(csum_driver, 0, 0); if (IS_ERR(csum_shash)) { btrfs_err(fs_info, "error allocating %s hash for checksum", csum_driver); return PTR_ERR(csum_shash); } fs_info->csum_shash = csum_shash; /* * Check if the checksum implementation is a fast accelerated one. * As-is this is a bit of a hack and should be replaced once the csum * implementations provide that information themselves. */ switch (csum_type) { case BTRFS_CSUM_TYPE_CRC32: if (!strstr(crypto_shash_driver_name(csum_shash), "generic")) set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); break; case BTRFS_CSUM_TYPE_XXHASH: set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); break; default: break; } btrfs_info(fs_info, "using %s (%s) checksum algorithm", btrfs_super_csum_name(csum_type), crypto_shash_driver_name(csum_shash)); return 0; } static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { int ret; struct btrfs_tree_parent_check check = { 0 }; struct btrfs_root *log_tree_root; struct btrfs_super_block *disk_super = fs_info->super_copy; u64 bytenr = btrfs_super_log_root(disk_super); int level = btrfs_super_log_root_level(disk_super); if (fs_devices->rw_devices == 0) { btrfs_warn(fs_info, "log replay required on RO media"); return -EIO; } log_tree_root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID, GFP_KERNEL); if (!log_tree_root) return -ENOMEM; check.level = level; check.transid = fs_info->generation + 1; check.owner_root = BTRFS_TREE_LOG_OBJECTID; log_tree_root->node = read_tree_block(fs_info, bytenr, &check); if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); log_tree_root->node = NULL; btrfs_put_root(log_tree_root); return ret; } if (!extent_buffer_uptodate(log_tree_root->node)) { btrfs_err(fs_info, "failed to read log tree"); btrfs_put_root(log_tree_root); return -EIO; } /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); if (ret) { btrfs_handle_fs_error(fs_info, ret, "Failed to recover log tree"); btrfs_put_root(log_tree_root); return ret; } if (sb_rdonly(fs_info->sb)) { ret = btrfs_commit_super(fs_info); if (ret) return ret; } return 0; } static int load_global_roots_objectid(struct btrfs_root *tree_root, struct btrfs_path *path, u64 objectid, const char *name) { struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_root *root; u64 max_global_id = 0; int ret; struct btrfs_key key = { .objectid = objectid, .type = BTRFS_ROOT_ITEM_KEY, .offset = 0, }; bool found = false; /* If we have IGNOREDATACSUMS skip loading these roots. */ if (objectid == BTRFS_CSUM_TREE_OBJECTID && btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state); return 0; } while (1) { ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); if (ret < 0) break; if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(tree_root, path); if (ret) { if (ret > 0) ret = 0; break; } } ret = 0; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid != objectid) break; btrfs_release_path(path); /* * Just worry about this for extent tree, it'll be the same for * everybody. */ if (objectid == BTRFS_EXTENT_TREE_OBJECTID) max_global_id = max(max_global_id, key.offset); found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) ret = PTR_ERR(root); break; } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); ret = btrfs_global_root_insert(root); if (ret) { btrfs_put_root(root); break; } key.offset++; } btrfs_release_path(path); if (objectid == BTRFS_EXTENT_TREE_OBJECTID) fs_info->nr_global_roots = max_global_id + 1; if (!found || ret) { if (objectid == BTRFS_CSUM_TREE_OBJECTID) set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state); if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) ret = ret ? ret : -ENOENT; else ret = 0; btrfs_err(fs_info, "failed to load root %s", name); } return ret; } static int load_global_roots(struct btrfs_root *tree_root) { struct btrfs_path *path; int ret = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; ret = load_global_roots_objectid(tree_root, path, BTRFS_EXTENT_TREE_OBJECTID, "extent"); if (ret) goto out; ret = load_global_roots_objectid(tree_root, path, BTRFS_CSUM_TREE_OBJECTID, "csum"); if (ret) goto out; if (!btrfs_fs_compat_ro(tree_root->fs_info, FREE_SPACE_TREE)) goto out; ret = load_global_roots_objectid(tree_root, path, BTRFS_FREE_SPACE_TREE_OBJECTID, "free space"); out: btrfs_free_path(path); return ret; } static int btrfs_read_roots(struct btrfs_fs_info *fs_info) { struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root; struct btrfs_key location; int ret; ASSERT(fs_info->tree_root); ret = load_global_roots(tree_root); if (ret) return ret; location.type = BTRFS_ROOT_ITEM_KEY; location.offset = 0; if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) { location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->block_group_root = root; } } location.objectid = BTRFS_DEV_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->dev_root = root; } /* Initialize fs_info for all devices in any case */ ret = btrfs_init_devices_late(fs_info); if (ret) goto out; /* * This tree can share blocks with some other fs tree during relocation * and we need a proper setup by btrfs_get_fs_root */ root = btrfs_get_fs_root(tree_root->fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID, true); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->data_reloc_root = root; } location.objectid = BTRFS_QUOTA_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (!IS_ERR(root)) { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->quota_root = root; } location.objectid = BTRFS_UUID_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); if (ret != -ENOENT) goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->uuid_root = root; } if (btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE)) { location.objectid = BTRFS_RAID_STRIPE_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->stripe_root = root; } } return 0; out: btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d", location.objectid, ret); return ret; } /* * Real super block validation * NOTE: super csum type and incompat features will not be checked here. * * @sb: super block to check * @mirror_num: the super block number to check its bytenr: * 0 the primary (1st) sb * 1, 2 2nd and 3rd backup copy * -1 skip bytenr check */ int btrfs_validate_super(const struct btrfs_fs_info *fs_info, const struct btrfs_super_block *sb, int mirror_num) { u64 nodesize = btrfs_super_nodesize(sb); u64 sectorsize = btrfs_super_sectorsize(sb); int ret = 0; const bool ignore_flags = btrfs_test_opt(fs_info, IGNORESUPERFLAGS); if (btrfs_super_magic(sb) != BTRFS_MAGIC) { btrfs_err(fs_info, "no valid FS found"); ret = -EINVAL; } if ((btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)) { if (!ignore_flags) { btrfs_err(fs_info, "unrecognized or unsupported super flag 0x%llx", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); ret = -EINVAL; } else { btrfs_info(fs_info, "unrecognized or unsupported super flags: 0x%llx, ignored", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); } } if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "chunk_root level too big: %d >= %d", btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "log_root level too big: %d >= %d", btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } /* * Check sectorsize and nodesize first, other check will need it. * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here. */ if (!is_power_of_2(sectorsize) || sectorsize < 4096 || sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) { btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize); ret = -EINVAL; } /* * We only support at most two sectorsizes: 4K and PAGE_SIZE. * * We can support 16K sectorsize with 64K page size without problem, * but such sectorsize/pagesize combination doesn't make much sense. * 4K will be our future standard, PAGE_SIZE is supported from the very * beginning. */ if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && sectorsize != PAGE_SIZE)) { btrfs_err(fs_info, "sectorsize %llu not yet supported for page size %lu", sectorsize, PAGE_SIZE); ret = -EINVAL; } if (!is_power_of_2(nodesize) || nodesize < sectorsize || nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { btrfs_err(fs_info, "invalid nodesize %llu", nodesize); ret = -EINVAL; } if (nodesize != le32_to_cpu(sb->__unused_leafsize)) { btrfs_err(fs_info, "invalid leafsize %u, should be %llu", le32_to_cpu(sb->__unused_leafsize), nodesize); ret = -EINVAL; } /* Root alignment check */ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) { btrfs_warn(fs_info, "tree_root block unaligned: %llu", btrfs_super_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) { btrfs_warn(fs_info, "chunk_root block unaligned: %llu", btrfs_super_chunk_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) { btrfs_warn(fs_info, "log_root block unaligned: %llu", btrfs_super_log_root(sb)); ret = -EINVAL; } if (!fs_info->fs_devices->temp_fsid && memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "superblock fsid doesn't match fsid of fs_devices: %pU != %pU", sb->fsid, fs_info->fs_devices->fsid); ret = -EINVAL; } if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb), BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU", btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid); ret = -EINVAL; } if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "dev_item UUID does not match metadata fsid: %pU != %pU", fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid); ret = -EINVAL; } /* * Artificial requirement for block-group-tree to force newer features * (free-space-tree, no-holes) so the test matrix is smaller. */ if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) && (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) || !btrfs_fs_incompat(fs_info, NO_HOLES))) { btrfs_err(fs_info, "block-group-tree feature requires free-space-tree and no-holes"); ret = -EINVAL; } /* * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { btrfs_err(fs_info, "bytes_used is too small %llu", btrfs_super_bytes_used(sb)); ret = -EINVAL; } if (!is_power_of_2(btrfs_super_stripesize(sb))) { btrfs_err(fs_info, "invalid stripesize %u", btrfs_super_stripesize(sb)); ret = -EINVAL; } if (btrfs_super_num_devices(sb) > (1UL << 31)) btrfs_warn(fs_info, "suspicious number of devices: %llu", btrfs_super_num_devices(sb)); if (btrfs_super_num_devices(sb) == 0) { btrfs_err(fs_info, "number of devices is 0"); ret = -EINVAL; } if (mirror_num >= 0 && btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) { btrfs_err(fs_info, "super offset mismatch %llu != %u", btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); ret = -EINVAL; } /* * Obvious sys_chunk_array corruptions, it must hold at least one key * and one chunk */ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { btrfs_err(fs_info, "system chunk array too big %u > %u", btrfs_super_sys_array_size(sb), BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); ret = -EINVAL; } if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)) { btrfs_err(fs_info, "system chunk array too small %u < %zu", btrfs_super_sys_array_size(sb), sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)); ret = -EINVAL; } /* * The generation is a global counter, we'll trust it more than the others * but it's still possible that it's the one that's wrong. */ if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) btrfs_warn(fs_info, "suspicious: generation < chunk_root_generation: %llu < %llu", btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) && btrfs_super_cache_generation(sb) != (u64)-1) btrfs_warn(fs_info, "suspicious: generation < cache_generation: %llu < %llu", btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); return ret; } /* * Validation of super block at mount time. * Some checks already done early at mount time, like csum type and incompat * flags will be skipped. */ static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info) { return btrfs_validate_super(fs_info, fs_info->super_copy, 0); } /* * Validation of super block at write time. * Some checks like bytenr check will be skipped as their values will be * overwritten soon. * Extra checks like csum type and incompat flags will be done here. */ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb) { int ret; ret = btrfs_validate_super(fs_info, sb, -1); if (ret < 0) goto out; if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) { ret = -EUCLEAN; btrfs_err(fs_info, "invalid csum type, has %u want %u", btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32); goto out; } if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) { ret = -EUCLEAN; btrfs_err(fs_info, "invalid incompat flags, has 0x%llx valid mask 0x%llx", btrfs_super_incompat_flags(sb), (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP); goto out; } out: if (ret < 0) btrfs_err(fs_info, "super block corruption detected before writing it to disk"); return ret; } static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level) { struct btrfs_tree_parent_check check = { .level = level, .transid = gen, .owner_root = btrfs_root_id(root) }; int ret = 0; root->node = read_tree_block(root->fs_info, bytenr, &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; return ret; } if (!extent_buffer_uptodate(root->node)) { free_extent_buffer(root->node); root->node = NULL; return -EIO; } btrfs_set_root_node(&root->root_item, root->node); root->commit_root = btrfs_root_node(root); btrfs_set_root_refs(&root->root_item, 1); return ret; } static int load_important_roots(struct btrfs_fs_info *fs_info) { struct btrfs_super_block *sb = fs_info->super_copy; u64 gen, bytenr; int level, ret; bytenr = btrfs_super_root(sb); gen = btrfs_super_generation(sb); level = btrfs_super_root_level(sb); ret = load_super_root(fs_info->tree_root, bytenr, gen, level); if (ret) { btrfs_warn(fs_info, "couldn't read tree root"); return ret; } return 0; } static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) { int backup_index = find_newest_super_backup(fs_info); struct btrfs_super_block *sb = fs_info->super_copy; struct btrfs_root *tree_root = fs_info->tree_root; bool handle_error = false; int ret = 0; int i; for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { if (handle_error) { if (!IS_ERR(tree_root->node)) free_extent_buffer(tree_root->node); tree_root->node = NULL; if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) break; free_root_pointers(fs_info, 0); /* * Don't use the log in recovery mode, it won't be * valid */ btrfs_set_super_log_root(sb, 0); btrfs_warn(fs_info, "try to load backup roots slot %d", i); ret = read_backup_root(fs_info, i); backup_index = ret; if (ret < 0) return ret; } ret = load_important_roots(fs_info); if (ret) { handle_error = true; continue; } /* * No need to hold btrfs_root::objectid_mutex since the fs * hasn't been fully initialised and we are the only user */ ret = btrfs_init_root_free_objectid(tree_root); if (ret < 0) { handle_error = true; continue; } ASSERT(tree_root->free_objectid <= BTRFS_LAST_FREE_OBJECTID); ret = btrfs_read_roots(fs_info); if (ret < 0) { handle_error = true; continue; } /* All successful */ fs_info->generation = btrfs_header_generation(tree_root->node); btrfs_set_last_trans_committed(fs_info, fs_info->generation); fs_info->last_reloc_trans = 0; /* Always begin writing backup roots after the one being used */ if (backup_index < 0) { fs_info->backup_root_index = 0; } else { fs_info->backup_root_index = backup_index + 1; fs_info->backup_root_index %= BTRFS_NUM_BACKUP_ROOTS; } break; } return ret; } void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) { INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->delalloc_roots); INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_root_lock); spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); spin_lock_init(&fs_info->zone_active_bgs_lock); spin_lock_init(&fs_info->relocation_bg_lock); rwlock_init(&fs_info->tree_mod_log_lock); rwlock_init(&fs_info->global_root_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reclaim_bgs_lock); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->zoned_meta_io_lock); mutex_init(&fs_info->zoned_data_reloc_io_lock); seqlock_init(&fs_info->profiles_lock); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered); btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep, BTRFS_LOCKDEP_TRANS_COMMIT_PREP); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, BTRFS_LOCKDEP_TRANS_UNBLOCKED); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed, BTRFS_LOCKDEP_TRANS_COMPLETED); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); INIT_LIST_HEAD(&fs_info->reclaim_bgs); INIT_LIST_HEAD(&fs_info->zone_active_bgs); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&fs_info->allocated_roots); INIT_LIST_HEAD(&fs_info->allocated_ebs); spin_lock_init(&fs_info->eb_leak_lock); #endif fs_info->mapping_tree = RB_ROOT_CACHED; rwlock_init(&fs_info->mapping_tree_lock); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); btrfs_init_block_rsv(&fs_info->delayed_block_rsv, BTRFS_BLOCK_RSV_DELOPS); btrfs_init_block_rsv(&fs_info->delayed_refs_rsv, BTRFS_BLOCK_RSV_DELREFS); atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->nr_delayed_iputs, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->global_root_tree = RB_ROOT; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; atomic64_set(&fs_info->free_chunk_space, 0); fs_info->tree_mod_log = RB_ROOT; fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; btrfs_init_ref_verify(fs_info); fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); INIT_LIST_HEAD(&fs_info->ordered_roots); spin_lock_init(&fs_info->ordered_root_lock); btrfs_init_scrub(fs_info); btrfs_init_balance(fs_info); btrfs_init_async_reclaim_work(fs_info); rwlock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree = RB_ROOT_CACHED; extent_io_tree_init(fs_info, &fs_info->excluded_extents, IO_TREE_FS_EXCLUDED_EXTENTS); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->ro_block_group_mutex); init_rwsem(&fs_info->commit_root_sem); init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); sema_init(&fs_info->uuid_tree_rescan_sem, 1); btrfs_init_dev_replace_locks(fs_info); btrfs_init_qgroup(fs_info); btrfs_discard_init(fs_info); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait); /* Usable values until the real ones are cached from the superblock */ fs_info->nodesize = 4096; fs_info->sectorsize = 4096; fs_info->sectorsize_bits = ilog2(4096); fs_info->stripesize = 4096; /* Default compress algorithm when user does -o compress */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE; spin_lock_init(&fs_info->swapfile_pins_lock); fs_info->swapfile_pins = RB_ROOT; fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH; INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work); } static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb) { int ret; fs_info->sb = sb; /* Temporary fixed values for block size until we read the superblock. */ sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE; sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE); ret = percpu_counter_init(&fs_info->ordered_bytes, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&fs_info->evictable_extent_maps, 0, GFP_KERNEL); if (ret) return ret; spin_lock_init(&fs_info->extent_map_shrinker_lock); ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) return ret; fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&fs_info->dev_replace.bio_counter, 0, GFP_KERNEL); if (ret) return ret; fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), GFP_KERNEL); if (!fs_info->delayed_root) return -ENOMEM; btrfs_init_delayed_root(fs_info->delayed_root); if (sb_rdonly(sb)) set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); if (btrfs_test_opt(fs_info, IGNOREMETACSUMS)) set_bit(BTRFS_FS_STATE_SKIP_META_CSUMS, &fs_info->fs_state); return btrfs_alloc_stripe_hash_table(fs_info); } static int btrfs_uuid_rescan_kthread(void *data) { struct btrfs_fs_info *fs_info = data; int ret; /* * 1st step is to iterate through the existing UUID tree and * to delete all entries that contain outdated data. * 2nd step is to add all missing entries to the UUID tree. */ ret = btrfs_uuid_tree_iterate(fs_info); if (ret < 0) { if (ret != -EINTR) btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret); up(&fs_info->uuid_tree_rescan_sem); return ret; } return btrfs_uuid_scan_kthread(data); } static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) { struct task_struct *task; down(&fs_info->uuid_tree_rescan_sem); task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { /* fs_info->update_uuid_tree_gen remains 0 in all error case */ btrfs_warn(fs_info, "failed to start uuid_rescan task"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); } return 0; } static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { u64 root_objectid = 0; struct btrfs_root *gang[8]; int ret = 0; while (1) { unsigned int found; spin_lock(&fs_info->fs_roots_radix_lock); found = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, root_objectid, ARRAY_SIZE(gang)); if (!found) { spin_unlock(&fs_info->fs_roots_radix_lock); break; } root_objectid = btrfs_root_id(gang[found - 1]) + 1; for (int i = 0; i < found; i++) { /* Avoid to grab roots in dead_roots. */ if (btrfs_root_refs(&gang[i]->root_item) == 0) { gang[i] = NULL; continue; } /* Grab all the search result for later use. */ gang[i] = btrfs_grab_root(gang[i]); } spin_unlock(&fs_info->fs_roots_radix_lock); for (int i = 0; i < found; i++) { if (!gang[i]) continue; root_objectid = btrfs_root_id(gang[i]); /* * Continue to release the remaining roots after the first * error without cleanup and preserve the first error * for the return. */ if (!ret) ret = btrfs_orphan_cleanup(gang[i]); btrfs_put_root(gang[i]); } if (ret) break; root_objectid++; } return ret; } /* * Mounting logic specific to read-write file systems. Shared by open_ctree * and btrfs_remount when remounting from read-only to read-write. */ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) { int ret; const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); bool rebuild_free_space_tree = false; if (btrfs_test_opt(fs_info, CLEAR_CACHE) && btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) btrfs_warn(fs_info, "'clear_cache' option is ignored with extent tree v2"); else rebuild_free_space_tree = true; } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) { btrfs_warn(fs_info, "free space tree is invalid"); rebuild_free_space_tree = true; } if (rebuild_free_space_tree) { btrfs_info(fs_info, "rebuilding free space tree"); ret = btrfs_rebuild_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to rebuild free space tree: %d", ret); goto out; } } if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) { btrfs_info(fs_info, "disabling free space tree"); ret = btrfs_delete_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to disable free space tree: %d", ret); goto out; } } /* * btrfs_find_orphan_roots() is responsible for finding all the dead * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load * them into the fs_info->fs_roots_radix tree. This must be done before * calling btrfs_orphan_cleanup() on the tree root. If we don't do it * first, then btrfs_orphan_cleanup() will delete a dead root's orphan * item before the root's tree is deleted - this means that if we unmount * or crash before the deletion completes, on the next mount we will not * delete what remains of the tree because the orphan item does not * exists anymore, which is what tells us we have a pending deletion. */ ret = btrfs_find_orphan_roots(fs_info); if (ret) goto out; ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto out; down_read(&fs_info->cleanup_work_sem); if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { up_read(&fs_info->cleanup_work_sem); goto out; } up_read(&fs_info->cleanup_work_sem); mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(fs_info); mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { btrfs_warn(fs_info, "failed to recover relocation: %d", ret); goto out; } if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { btrfs_info(fs_info, "creating free space tree"); ret = btrfs_create_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to create free space tree: %d", ret); goto out; } } if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) { ret = btrfs_set_free_space_cache_v1_active(fs_info, cache_opt); if (ret) goto out; } ret = btrfs_resume_balance_async(fs_info); if (ret) goto out; ret = btrfs_resume_dev_replace_async(fs_info); if (ret) { btrfs_warn(fs_info, "failed to resume dev_replace"); goto out; } btrfs_qgroup_rescan_resume(fs_info); if (!fs_info->uuid_root) { btrfs_info(fs_info, "creating UUID tree"); ret = btrfs_create_uuid_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to create the UUID tree %d", ret); goto out; } } out: return ret; } /* * Do various sanity and dependency checks of different features. * * @is_rw_mount: If the mount is read-write. * * This is the place for less strict checks (like for subpage or artificial * feature dependencies). * * For strict checks or possible corruption detection, see * btrfs_validate_super(). * * This should be called after btrfs_parse_options(), as some mount options * (space cache related) can modify on-disk format like free space tree and * screw up certain feature dependencies. */ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) { struct btrfs_super_block *disk_super = fs_info->super_copy; u64 incompat = btrfs_super_incompat_flags(disk_super); const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super); const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP); if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) { btrfs_err(fs_info, "cannot mount because of unknown incompat features (0x%llx)", incompat); return -EINVAL; } /* Runtime limitation for mixed block groups. */ if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && (fs_info->sectorsize != fs_info->nodesize)) { btrfs_err(fs_info, "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", fs_info->nodesize, fs_info->sectorsize); return -EINVAL; } /* Mixed backref is an always-enabled feature. */ incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; /* Set compression related flags just in case. */ if (fs_info->compress_type == BTRFS_COMPRESS_LZO) incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; /* * An ancient flag, which should really be marked deprecated. * Such runtime limitation doesn't really need a incompat flag. */ if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", compat_ro); return -EINVAL; } /* * We have unsupported RO compat features, although RO mounted, we * should not cause any metadata writes, including log replay. * Or we could screw up whatever the new feature requires. */ if (compat_ro_unsupp && btrfs_super_log_root(disk_super) && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_err(fs_info, "cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay", compat_ro); return -EINVAL; } /* * Artificial limitations for block group tree, to force * block-group-tree to rely on no-holes and free-space-tree. */ if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) && (!btrfs_fs_incompat(fs_info, NO_HOLES) || !btrfs_test_opt(fs_info, FREE_SPACE_TREE))) { btrfs_err(fs_info, "block-group-tree feature requires no-holes and free-space-tree features"); return -EINVAL; } /* * Subpage runtime limitation on v1 cache. * * V1 space cache still has some hard codeed PAGE_SIZE usage, while * we're already defaulting to v2 cache, no need to bother v1 as it's * going to be deprecated anyway. */ if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { btrfs_warn(fs_info, "v1 space cache is not supported for page size %lu with sectorsize %u", PAGE_SIZE, fs_info->sectorsize); return -EINVAL; } /* This can be called by remount, we need to protect the super block. */ spin_lock(&fs_info->super_lock); btrfs_set_super_incompat_flags(disk_super, incompat); spin_unlock(&fs_info->super_lock); return 0; } int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, const char *options) { u32 sectorsize; u32 nodesize; u32 stripesize; u64 generation; u16 csum_type; struct btrfs_super_block *disk_super; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *tree_root; struct btrfs_root *chunk_root; int ret; int level; ret = init_mount_fs_info(fs_info, sb); if (ret) goto fail; /* These need to be init'ed before we start creating inodes and such. */ tree_root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, GFP_KERNEL); fs_info->tree_root = tree_root; chunk_root = btrfs_alloc_root(fs_info, BTRFS_CHUNK_TREE_OBJECTID, GFP_KERNEL); fs_info->chunk_root = chunk_root; if (!tree_root || !chunk_root) { ret = -ENOMEM; goto fail; } ret = btrfs_init_btree_inode(sb); if (ret) goto fail; invalidate_bdev(fs_devices->latest_dev->bdev); /* * Read super block and check the signature bytes only */ disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev); if (IS_ERR(disk_super)) { ret = PTR_ERR(disk_super); goto fail_alloc; } btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid); /* * Verify the type first, if that or the checksum value are * corrupted, we'll find out */ csum_type = btrfs_super_csum_type(disk_super); if (!btrfs_supported_super_csum(csum_type)) { btrfs_err(fs_info, "unsupported checksum algorithm: %u", csum_type); ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } fs_info->csum_size = btrfs_super_csum_size(disk_super); ret = btrfs_init_csum_hash(fs_info, csum_type); if (ret) { btrfs_release_disk_super(disk_super); goto fail_alloc; } /* * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ if (btrfs_check_super_csum(fs_info, disk_super)) { btrfs_err(fs_info, "superblock checksum mismatch"); ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } /* * super_copy is zeroed at allocation time and we never touch the * following bytes up to INFO_SIZE, the checksum is calculated from * the whole block of INFO_SIZE */ memcpy(fs_info->super_copy, disk_super, sizeof(*fs_info->super_copy)); btrfs_release_disk_super(disk_super); disk_super = fs_info->super_copy; memcpy(fs_info->super_for_commit, fs_info->super_copy, sizeof(*fs_info->super_for_commit)); ret = btrfs_validate_mount_super(fs_info); if (ret) { btrfs_err(fs_info, "superblock contains fatal errors"); ret = -EINVAL; goto fail_alloc; } if (!btrfs_super_root(disk_super)) { btrfs_err(fs_info, "invalid superblock tree root bytenr"); ret = -EINVAL; goto fail_alloc; } /* check FS state, whether FS is broken. */ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) WRITE_ONCE(fs_info->fs_error, -EUCLEAN); /* Set up fs_info before parsing mount options */ nodesize = btrfs_super_nodesize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); stripesize = sectorsize; fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); fs_info->nodesize = nodesize; fs_info->sectorsize = sectorsize; fs_info->sectorsize_bits = ilog2(sectorsize); fs_info->sectors_per_page = (PAGE_SIZE >> fs_info->sectorsize_bits); fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size; fs_info->stripesize = stripesize; /* * Handle the space caching options appropriately now that we have the * super block loaded and validated. */ btrfs_set_free_space_cache_settings(fs_info); if (!btrfs_check_options(fs_info, &fs_info->mount_opt, sb->s_flags)) { ret = -EINVAL; goto fail_alloc; } ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); if (ret < 0) goto fail_alloc; /* * At this point our mount options are validated, if we set ->max_inline * to something non-standard make sure we truncate it to sectorsize. */ fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize); if (sectorsize < PAGE_SIZE) btrfs_warn(fs_info, "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); ret = btrfs_init_workqueues(fs_info); if (ret) goto fail_sb_buffer; sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); /* Update the values for the current filesystem. */ sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE); mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(fs_info); mutex_unlock(&fs_info->chunk_mutex); if (ret) { btrfs_err(fs_info, "failed to read the system array: %d", ret); goto fail_sb_buffer; } generation = btrfs_super_chunk_root_generation(disk_super); level = btrfs_super_chunk_root_level(disk_super); ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super), generation, level); if (ret) { btrfs_err(fs_info, "failed to read chunk root"); goto fail_tree_roots; } read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, offsetof(struct btrfs_header, chunk_tree_uuid), BTRFS_UUID_SIZE); ret = btrfs_read_chunk_tree(fs_info); if (ret) { btrfs_err(fs_info, "failed to read chunk tree: %d", ret); goto fail_tree_roots; } /* * At this point we know all the devices that make this filesystem, * including the seed devices but we don't know yet if the replace * target is required. So free devices that are not part of this * filesystem but skip the replace target device which is checked * below in btrfs_init_dev_replace(). */ btrfs_free_extra_devids(fs_devices); if (!fs_devices->latest_dev->bdev) { btrfs_err(fs_info, "failed to read devices"); ret = -EIO; goto fail_tree_roots; } ret = init_tree_roots(fs_info); if (ret) goto fail_tree_roots; /* * Get zone type information of zoned block devices. This will also * handle emulation of a zoned filesystem if a regular device has the * zoned incompat feature flag set. */ ret = btrfs_get_dev_zone_info_all_devices(fs_info); if (ret) { btrfs_err(fs_info, "zoned: failed to read device zone info: %d", ret); goto fail_block_groups; } /* * If we have a uuid root and we're not being told to rescan we need to * check the generation here so we can set the * BTRFS_FS_UPDATE_UUID_TREE_GEN bit. Otherwise we could commit the * transaction during a balance or the log replay without updating the * uuid generation, and then if we crash we would rescan the uuid tree, * even though it was perfectly fine. */ if (fs_info->uuid_root && !btrfs_test_opt(fs_info, RESCAN_UUID_TREE) && fs_info->generation == btrfs_super_uuid_tree_generation(disk_super)) set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); ret = btrfs_verify_dev_extents(fs_info); if (ret) { btrfs_err(fs_info, "failed to verify dev extents against chunks: %d", ret); goto fail_block_groups; } ret = btrfs_recover_balance(fs_info); if (ret) { btrfs_err(fs_info, "failed to recover balance: %d", ret); goto fail_block_groups; } ret = btrfs_init_dev_stats(fs_info); if (ret) { btrfs_err(fs_info, "failed to init dev_stats: %d", ret); goto fail_block_groups; } ret = btrfs_init_dev_replace(fs_info); if (ret) { btrfs_err(fs_info, "failed to init dev_replace: %d", ret); goto fail_block_groups; } ret = btrfs_check_zoned_mode(fs_info); if (ret) { btrfs_err(fs_info, "failed to initialize zoned mode: %d", ret); goto fail_block_groups; } ret = btrfs_sysfs_add_fsid(fs_devices); if (ret) { btrfs_err(fs_info, "failed to init sysfs fsid interface: %d", ret); goto fail_block_groups; } ret = btrfs_sysfs_add_mounted(fs_info); if (ret) { btrfs_err(fs_info, "failed to init sysfs interface: %d", ret); goto fail_fsdev_sysfs; } ret = btrfs_init_space_info(fs_info); if (ret) { btrfs_err(fs_info, "failed to initialize space info: %d", ret); goto fail_sysfs; } ret = btrfs_read_block_groups(fs_info); if (ret) { btrfs_err(fs_info, "failed to read block groups: %d", ret); goto fail_sysfs; } btrfs_free_zone_cache(fs_info); btrfs_check_active_zone_reservation(fs_info); if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices && !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); ret = -EINVAL; goto fail_sysfs; } fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) { ret = PTR_ERR(fs_info->cleaner_kthread); goto fail_sysfs; } fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); if (IS_ERR(fs_info->transaction_kthread)) { ret = PTR_ERR(fs_info->transaction_kthread); goto fail_cleaner; } ret = btrfs_read_qgroup_config(fs_info); if (ret) goto fail_trans_kthread; if (btrfs_build_ref_tree(fs_info)) btrfs_err(fs_info, "couldn't build ref tree"); /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); if (ret) goto fail_qgroup; } fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { ret = PTR_ERR(fs_info->fs_root); btrfs_warn(fs_info, "failed to read fs tree: %d", ret); fs_info->fs_root = NULL; goto fail_qgroup; } if (sb_rdonly(sb)) return 0; ret = btrfs_start_pre_rw_mount(fs_info); if (ret) { close_ctree(fs_info); return ret; } btrfs_discard_resume(fs_info); if (fs_info->uuid_root && (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) || fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) { btrfs_info(fs_info, "checking UUID tree"); ret = btrfs_check_uuid_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to check the UUID tree: %d", ret); close_ctree(fs_info); return ret; } } set_bit(BTRFS_FS_OPEN, &fs_info->flags); /* Kick the cleaner thread so it'll start deleting snapshots. */ if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags)) wake_up_process(fs_info->cleaner_kthread); return 0; fail_qgroup: btrfs_free_qgroup_config(fs_info); fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); btrfs_cleanup_transaction(fs_info); btrfs_free_fs_roots(fs_info); fail_cleaner: kthread_stop(fs_info->cleaner_kthread); /* * make sure we're done with the btree inode before we stop our * kthreads */ filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: btrfs_sysfs_remove_mounted(fs_info); fail_fsdev_sysfs: btrfs_sysfs_remove_fsid(fs_info->fs_devices); fail_block_groups: btrfs_put_block_group_cache(fs_info); fail_tree_roots: if (fs_info->data_reloc_root) btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root); free_root_pointers(fs_info, true); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); fail_sb_buffer: btrfs_stop_all_workers(fs_info); btrfs_free_block_groups(fs_info); fail_alloc: btrfs_mapping_tree_free(fs_info); iput(fs_info->btree_inode); fail: btrfs_close_devices(fs_info->fs_devices); ASSERT(ret < 0); return ret; } ALLOW_ERROR_INJECTION(open_ctree, ERRNO); static void btrfs_end_super_write(struct bio *bio) { struct btrfs_device *device = bio->bi_private; struct folio_iter fi; bio_for_each_folio_all(fi, bio) { if (bio->bi_status) { btrfs_warn_rl_in_rcu(device->fs_info, "lost super block write due to IO error on %s (%d)", btrfs_dev_name(device), blk_status_to_errno(bio->bi_status)); btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); /* Ensure failure if the primary sb fails. */ if (bio->bi_opf & REQ_FUA) atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR, &device->sb_write_errors); else atomic_inc(&device->sb_write_errors); } folio_unlock(fi.folio); folio_put(fi.folio); } bio_put(bio); } struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, int copy_num, bool drop_cache) { struct btrfs_super_block *super; struct page *page; u64 bytenr, bytenr_orig; struct address_space *mapping = bdev->bd_mapping; int ret; bytenr_orig = btrfs_sb_offset(copy_num); ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr); if (ret == -ENOENT) return ERR_PTR(-EINVAL); else if (ret) return ERR_PTR(ret); if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev)) return ERR_PTR(-EINVAL); if (drop_cache) { /* This should only be called with the primary sb. */ ASSERT(copy_num == 0); /* * Drop the page of the primary superblock, so later read will * always read from the device. */ invalidate_inode_pages2_range(mapping, bytenr >> PAGE_SHIFT, (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT); } page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); if (IS_ERR(page)) return ERR_CAST(page); super = page_address(page); if (btrfs_super_magic(super) != BTRFS_MAGIC) { btrfs_release_disk_super(super); return ERR_PTR(-ENODATA); } if (btrfs_super_bytenr(super) != bytenr_orig) { btrfs_release_disk_super(super); return ERR_PTR(-EINVAL); } return super; } struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev) { struct btrfs_super_block *super, *latest = NULL; int i; u64 transid = 0; /* we would like to check all the supers, but that would make * a btrfs mount succeed after a mkfs from a different FS. * So, we need to add a special mount option to scan for * later supers, using BTRFS_SUPER_MIRROR_MAX instead */ for (i = 0; i < 1; i++) { super = btrfs_read_dev_one_super(bdev, i, false); if (IS_ERR(super)) continue; if (!latest || btrfs_super_generation(super) > transid) { if (latest) btrfs_release_disk_super(super); latest = super; transid = btrfs_super_generation(super); } } return super; } /* * Write superblock @sb to the @device. Do not wait for completion, all the * folios we use for writing are locked. * * Write @max_mirrors copies of the superblock, where 0 means default that fit * the expected device size at commit time. Note that max_mirrors must be * same for write and wait phases. * * Return number of errors when folio is not found or submission fails. */ static int write_dev_supers(struct btrfs_device *device, struct btrfs_super_block *sb, int max_mirrors) { struct btrfs_fs_info *fs_info = device->fs_info; struct address_space *mapping = device->bdev->bd_mapping; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); int i; int ret; u64 bytenr, bytenr_orig; atomic_set(&device->sb_write_errors, 0); if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; shash->tfm = fs_info->csum_shash; for (i = 0; i < max_mirrors; i++) { struct folio *folio; struct bio *bio; struct btrfs_super_block *disk_super; size_t offset; bytenr_orig = btrfs_sb_offset(i); ret = btrfs_sb_log_location(device, i, WRITE, &bytenr); if (ret == -ENOENT) { continue; } else if (ret < 0) { btrfs_err(device->fs_info, "couldn't get super block location for mirror %d", i); atomic_inc(&device->sb_write_errors); continue; } if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->commit_total_bytes) break; btrfs_set_super_bytenr(sb, bytenr_orig); crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, sb->csum); folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); if (IS_ERR(folio)) { btrfs_err(device->fs_info, "couldn't get super block page for bytenr %llu", bytenr); atomic_inc(&device->sb_write_errors); continue; } ASSERT(folio_order(folio) == 0); offset = offset_in_folio(folio, bytenr); disk_super = folio_address(folio) + offset; memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE); /* * Directly use bios here instead of relying on the page cache * to do I/O, so we don't lose the ability to do integrity * checking. */ bio = bio_alloc(device->bdev, 1, REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, GFP_NOFS); bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT; bio->bi_private = device; bio->bi_end_io = btrfs_end_super_write; bio_add_folio_nofail(bio, folio, BTRFS_SUPER_INFO_SIZE, offset); /* * We FUA only the first super block. The others we allow to * go down lazy and there's a short window where the on-disk * copies might still contain the older version. */ if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER)) bio->bi_opf |= REQ_FUA; submit_bio(bio); if (btrfs_advance_sb_log(device, i)) atomic_inc(&device->sb_write_errors); } return atomic_read(&device->sb_write_errors) < i ? 0 : -1; } /* * Wait for write completion of superblocks done by write_dev_supers, * @max_mirrors same for write and wait phases. * * Return -1 if primary super block write failed or when there were no super block * copies written. Otherwise 0. */ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) { int i; int errors = 0; bool primary_failed = false; int ret; u64 bytenr; if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; for (i = 0; i < max_mirrors; i++) { struct folio *folio; ret = btrfs_sb_log_location(device, i, READ, &bytenr); if (ret == -ENOENT) { break; } else if (ret < 0) { errors++; if (i == 0) primary_failed = true; continue; } if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->commit_total_bytes) break; folio = filemap_get_folio(device->bdev->bd_mapping, bytenr >> PAGE_SHIFT); /* If the folio has been removed, then we know it completed. */ if (IS_ERR(folio)) continue; ASSERT(folio_order(folio) == 0); /* Folio will be unlocked once the write completes. */ folio_wait_locked(folio); folio_put(folio); } errors += atomic_read(&device->sb_write_errors); if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR) primary_failed = true; if (primary_failed) { btrfs_err(device->fs_info, "error writing primary super block to device %llu", device->devid); return -1; } return errors < i ? 0 : -1; } /* * endio for the write_dev_flush, this will wake anyone waiting * for the barrier when it is done */ static void btrfs_end_empty_barrier(struct bio *bio) { bio_uninit(bio); complete(bio->bi_private); } /* * Submit a flush request to the device if it supports it. Error handling is * done in the waiting counterpart. */ static void write_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; device->last_flush_error = BLK_STS_OK; bio_init(bio, device->bdev, NULL, 0, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH); bio->bi_end_io = btrfs_end_empty_barrier; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; submit_bio(bio); set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); } /* * If the flush bio has been submitted by write_dev_flush, wait for it. * Return true for any error, and false otherwise. */ static bool wait_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; if (!test_and_clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) return false; wait_for_completion_io(&device->flush_wait); if (bio->bi_status) { device->last_flush_error = bio->bi_status; btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); return true; } return false; } /* * send an empty flush down to each device in parallel, * then wait for them */ static int barrier_all_devices(struct btrfs_fs_info *info) { struct list_head *head; struct btrfs_device *dev; int errors_wait = 0; lockdep_assert_held(&info->fs_devices->device_list_mutex); /* send down all the barriers */ head = &info->fs_devices->devices; list_for_each_entry(dev, head, dev_list) { if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) continue; if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; write_dev_flush(dev); } /* wait for all the barriers */ list_for_each_entry(dev, head, dev_list) { if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) { errors_wait++; continue; } if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; if (wait_dev_flush(dev)) errors_wait++; } /* * Checks last_flush_error of disks in order to determine the device * state. */ if (errors_wait && !btrfs_check_rw_degradable(info, NULL)) return -EIO; return 0; } int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) { int raid_type; int min_tolerated = INT_MAX; if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 || (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE)) min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[BTRFS_RAID_SINGLE]. tolerated_failures); for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) { if (raid_type == BTRFS_RAID_SINGLE) continue; if (!(flags & btrfs_raid_array[raid_type].bg_flag)) continue; min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[raid_type]. tolerated_failures); } if (min_tolerated == INT_MAX) { pr_warn("BTRFS: unknown raid flag: %llu", flags); min_tolerated = 0; } return min_tolerated; } int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) { struct list_head *head; struct btrfs_device *dev; struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; int ret; int do_barriers; int max_errors; int total_errors = 0; u64 flags; do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); /* * max_mirrors == 0 indicates we're from commit_transaction, * not from fsync where the tree roots in fs_info have not * been consistent on disk. */ if (max_mirrors == 0) backup_super_roots(fs_info); sb = fs_info->super_for_commit; dev_item = &sb->dev_item; mutex_lock(&fs_info->fs_devices->device_list_mutex); head = &fs_info->fs_devices->devices; max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1; if (do_barriers) { ret = barrier_all_devices(fs_info); if (ret) { mutex_unlock( &fs_info->fs_devices->device_list_mutex); btrfs_handle_fs_error(fs_info, ret, "errors while submitting device barriers."); return ret; } } list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; } if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->commit_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->commit_bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); memcpy(dev_item->fsid, dev->fs_devices->metadata_uuid, BTRFS_FSID_SIZE); flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); ret = btrfs_validate_write_super(fs_info, sb); if (ret < 0) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_handle_fs_error(fs_info, -EUCLEAN, "unexpected superblock corruption detected"); return -EUCLEAN; } ret = write_dev_supers(dev, sb, max_mirrors); if (ret) total_errors++; } if (total_errors > max_errors) { btrfs_err(fs_info, "%d errors while writing supers", total_errors); mutex_unlock(&fs_info->fs_devices->device_list_mutex); /* FUA is masked off if unsupported and can't be the reason */ btrfs_handle_fs_error(fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } total_errors = 0; list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; ret = wait_dev_supers(dev, max_mirrors); if (ret) total_errors++; } mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (total_errors > max_errors) { btrfs_handle_fs_error(fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } return 0; } /* Drop a fs root from the radix tree and free it. */ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { bool drop_ref = false; spin_lock(&fs_info->fs_roots_radix_lock); radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)btrfs_root_id(root)); if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state)) drop_ref = true; spin_unlock(&fs_info->fs_roots_radix_lock); if (BTRFS_FS_ERROR(fs_info)) { ASSERT(root->log_root == NULL); if (root->reloc_root) { btrfs_put_root(root->reloc_root); root->reloc_root = NULL; } } if (drop_ref) btrfs_put_root(root); } int btrfs_commit_super(struct btrfs_fs_info *fs_info) { mutex_lock(&fs_info->cleaner_mutex); btrfs_run_delayed_iputs(fs_info); mutex_unlock(&fs_info->cleaner_mutex); wake_up_process(fs_info->cleaner_kthread); /* wait until ongoing cleanup work done */ down_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem); return btrfs_commit_current_transaction(fs_info->tree_root); } static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info) { struct btrfs_transaction *trans; struct btrfs_transaction *tmp; bool found = false; /* * This function is only called at the very end of close_ctree(), * thus no other running transaction, no need to take trans_lock. */ ASSERT(test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)); list_for_each_entry_safe(trans, tmp, &fs_info->trans_list, list) { struct extent_state *cached = NULL; u64 dirty_bytes = 0; u64 cur = 0; u64 found_start; u64 found_end; found = true; while (find_first_extent_bit(&trans->dirty_pages, cur, &found_start, &found_end, EXTENT_DIRTY, &cached)) { dirty_bytes += found_end + 1 - found_start; cur = found_end + 1; } btrfs_warn(fs_info, "transaction %llu (with %llu dirty metadata bytes) is not committed", trans->transid, dirty_bytes); btrfs_cleanup_one_transaction(trans, fs_info); if (trans == fs_info->running_transaction) fs_info->running_transaction = NULL; list_del_init(&trans->list); btrfs_put_transaction(trans); trace_btrfs_transaction_commit(fs_info); } ASSERT(!found); } void __cold close_ctree(struct btrfs_fs_info *fs_info) { int ret; set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); /* * If we had UNFINISHED_DROPS we could still be processing them, so * clear that bit and wake up relocation so it can stop. * We must do this before stopping the block group reclaim task, because * at btrfs_relocate_block_group() we wait for this bit, and after the * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will * return 1. */ btrfs_wake_unfinished_drop(fs_info); /* * We may have the reclaim task running and relocating a data block group, * in which case it may create delayed iputs. So stop it before we park * the cleaner kthread otherwise we can get new delayed iputs after * parking the cleaner, and that can make the async reclaim task to hang * if it's waiting for delayed iputs to complete, since the cleaner is * parked and can not run delayed iputs - this will make us hang when * trying to stop the async reclaim task. */ cancel_work_sync(&fs_info->reclaim_bgs_work); /* * We don't want the cleaner to start new transactions, add more delayed * iputs, etc. while we're closing. We can't use kthread_stop() yet * because that frees the task_struct, and the transaction kthread might * still try to wake up the cleaner. */ kthread_park(fs_info->cleaner_kthread); /* wait for the qgroup rescan worker to stop */ btrfs_qgroup_wait_for_completion(fs_info, false); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* avoid complains from lockdep et al., set sem back to initial state */ up(&fs_info->uuid_tree_rescan_sem); /* pause restriper - we want to resume on mount */ btrfs_pause_balance(fs_info); btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); /* wait for any defraggers to finish */ wait_event(fs_info->transaction_wait, (atomic_read(&fs_info->defrag_running) == 0)); /* clear out the rbtree of defraggable inodes */ btrfs_cleanup_defrag_inodes(fs_info); /* * Wait for any fixup workers to complete. * If we don't wait for them here and they are still running by the time * we call kthread_stop() against the cleaner kthread further below, we * get an use-after-free on the cleaner because the fixup worker adds an * inode to the list of delayed iputs and then attempts to wakeup the * cleaner kthread, which was already stopped and destroyed. We parked * already the cleaner, but below we run all pending delayed iputs. */ btrfs_flush_workqueue(fs_info->fixup_workers); /* * After we parked the cleaner kthread, ordered extents may have * completed and created new delayed iputs. If one of the async reclaim * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we * can hang forever trying to stop it, because if a delayed iput is * added after it ran btrfs_run_delayed_iputs() and before it called * btrfs_wait_on_delayed_iputs(), it will hang forever since there is * no one else to run iputs. * * So wait for all ongoing ordered extents to complete and then run * delayed iputs. This works because once we reach this point no one * can either create new ordered extents nor create delayed iputs * through some other means. * * Also note that btrfs_wait_ordered_roots() is not safe here, because * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent, * but the delayed iput for the respective inode is made only when doing * the final btrfs_put_ordered_extent() (which must happen at * btrfs_finish_ordered_io() when we are unmounting). */ btrfs_flush_workqueue(fs_info->endio_write_workers); /* Ordered extents for free space inodes. */ btrfs_flush_workqueue(fs_info->endio_freespace_worker); btrfs_run_delayed_iputs(fs_info); cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); if (!sb_rdonly(fs_info->sb)) { /* * The cleaner kthread is stopped, so do one final pass over * unused block groups. */ btrfs_delete_unused_bgs(fs_info); /* * There might be existing delayed inode workers still running * and holding an empty delayed inode item. We must wait for * them to complete first because they can create a transaction. * This happens when someone calls btrfs_balance_delayed_items() * and then a transaction commit runs the same delayed nodes * before any delayed worker has done something with the nodes. * We must wait for any worker here and not at transaction * commit time since that could cause a deadlock. * This is a very rare case. */ btrfs_flush_workqueue(fs_info->delayed_workers); ret = btrfs_commit_super(fs_info); if (ret) btrfs_err(fs_info, "commit super ret %d", ret); } if (BTRFS_FS_ERROR(fs_info)) btrfs_error_commit_super(fs_info); kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); ASSERT(list_empty(&fs_info->delayed_iputs)); set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); if (btrfs_check_quota_leak(fs_info)) { WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); btrfs_err(fs_info, "qgroup reserved space leaked"); } btrfs_free_qgroup_config(fs_info); ASSERT(list_empty(&fs_info->delalloc_roots)); if (percpu_counter_sum(&fs_info->delalloc_bytes)) { btrfs_info(fs_info, "at unmount delalloc count %lld", percpu_counter_sum(&fs_info->delalloc_bytes)); } if (percpu_counter_sum(&fs_info->ordered_bytes)) btrfs_info(fs_info, "at unmount dio bytes count %lld", percpu_counter_sum(&fs_info->ordered_bytes)); btrfs_sysfs_remove_mounted(fs_info); btrfs_sysfs_remove_fsid(fs_info->fs_devices); btrfs_put_block_group_cache(fs_info); /* * we must make sure there is not any read request to * submit after we stopping all workers. */ invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); /* We shouldn't have any transaction open at this point */ warn_about_uncommitted_trans(fs_info); clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); btrfs_free_fs_roots(fs_info); /* * We must free the block groups after dropping the fs_roots as we could * have had an IO error and have left over tree log blocks that aren't * cleaned up until the fs roots are freed. This makes the block group * accounting appear to be wrong because there's pending reserved bytes, * so make sure we do the block group cleanup afterwards. */ btrfs_free_block_groups(fs_info); iput(fs_info->btree_inode); btrfs_mapping_tree_free(fs_info); btrfs_close_devices(fs_info->fs_devices); } void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, struct extent_buffer *buf) { struct btrfs_fs_info *fs_info = buf->fs_info; u64 transid = btrfs_header_generation(buf); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* * This is a fast path so only do this check if we have sanity tests * enabled. Normal people shouldn't be using unmapped buffers as dirty * outside of the sanity tests. */ if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) return; #endif /* This is an active transaction (its state < TRANS_STATE_UNBLOCKED). */ ASSERT(trans->transid == fs_info->generation); btrfs_assert_tree_write_locked(buf); if (unlikely(transid != fs_info->generation)) { btrfs_abort_transaction(trans, -EUCLEAN); btrfs_crit(fs_info, "dirty buffer transid mismatch, logical %llu found transid %llu running transid %llu", buf->start, transid, fs_info->generation); } set_extent_buffer_dirty(buf); } static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info, int flush_delayed) { /* * looks as though older kernels can get into trouble with * this code, they end up stuck in balance_dirty_pages forever */ int ret; if (current->flags & PF_MEMALLOC) return; if (flush_delayed) btrfs_balance_delayed_items(fs_info); ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, BTRFS_DIRTY_METADATA_THRESH, fs_info->dirty_metadata_batch); if (ret > 0) { balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping); } } void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info) { __btrfs_btree_balance_dirty(fs_info, 1); } void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info) { __btrfs_btree_balance_dirty(fs_info, 0); } static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) { /* cleanup FS via transaction */ btrfs_cleanup_transaction(fs_info); mutex_lock(&fs_info->cleaner_mutex); btrfs_run_delayed_iputs(fs_info); mutex_unlock(&fs_info->cleaner_mutex); down_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem); } static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info) { struct btrfs_root *gang[8]; u64 root_objectid = 0; int ret; spin_lock(&fs_info->fs_roots_radix_lock); while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, root_objectid, ARRAY_SIZE(gang))) != 0) { int i; for (i = 0; i < ret; i++) gang[i] = btrfs_grab_root(gang[i]); spin_unlock(&fs_info->fs_roots_radix_lock); for (i = 0; i < ret; i++) { if (!gang[i]) continue; root_objectid = btrfs_root_id(gang[i]); btrfs_free_log(NULL, gang[i]); btrfs_put_root(gang[i]); } root_objectid++; spin_lock(&fs_info->fs_roots_radix_lock); } spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log_root_tree(NULL, fs_info); } static void btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct btrfs_ordered_extent *ordered; spin_lock(&root->ordered_extent_lock); /* * This will just short circuit the ordered completion stuff which will * make sure the ordered extent gets properly cleaned up. */ list_for_each_entry(ordered, &root->ordered_extents, root_extent_list) set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); spin_unlock(&root->ordered_extent_lock); } static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; LIST_HEAD(splice); spin_lock(&fs_info->ordered_root_lock); list_splice_init(&fs_info->ordered_roots, &splice); while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); list_move_tail(&root->ordered_root, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); btrfs_destroy_ordered_extents(root); cond_resched(); spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); /* * We need this here because if we've been flipped read-only we won't * get sync() from the umount, so we need to make sure any ordered * extents that haven't had their dirty pages IO start writeout yet * actually get run and error out properly. */ btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL); } static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info) { struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs; struct btrfs_delayed_ref_node *ref; spin_lock(&delayed_refs->lock); while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) { struct btrfs_delayed_ref_head *head; struct rb_node *n; bool pin_bytes = false; head = rb_entry(node, struct btrfs_delayed_ref_head, href_node); if (btrfs_delayed_ref_lock(delayed_refs, head)) continue; spin_lock(&head->lock); while ((n = rb_first_cached(&head->ref_tree)) != NULL) { ref = rb_entry(n, struct btrfs_delayed_ref_node, ref_node); rb_erase_cached(&ref->ref_node, &head->ref_tree); RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list)) list_del(&ref->add_list); atomic_dec(&delayed_refs->num_entries); btrfs_put_delayed_ref(ref); btrfs_delayed_refs_rsv_release(fs_info, 1, 0); } if (head->must_insert_reserved) pin_bytes = true; btrfs_free_delayed_extent_op(head->extent_op); btrfs_delete_ref_head(delayed_refs, head); spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); mutex_unlock(&head->mutex); if (pin_bytes) { struct btrfs_block_group *cache; cache = btrfs_lookup_block_group(fs_info, head->bytenr); BUG_ON(!cache); spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); cache->pinned += head->num_bytes; btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info, head->num_bytes); cache->reserved -= head->num_bytes; cache->space_info->bytes_reserved -= head->num_bytes; spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); btrfs_put_block_group(cache); btrfs_error_unpin_extent_range(fs_info, head->bytenr, head->bytenr + head->num_bytes - 1); } btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); btrfs_put_delayed_ref_head(head); cond_resched(); spin_lock(&delayed_refs->lock); } btrfs_qgroup_destroy_extent_records(trans); spin_unlock(&delayed_refs->lock); } static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; LIST_HEAD(splice); spin_lock(&root->delalloc_lock); list_splice_init(&root->delalloc_inodes, &splice); while (!list_empty(&splice)) { struct inode *inode = NULL; btrfs_inode = list_first_entry(&splice, struct btrfs_inode, delalloc_inodes); btrfs_del_delalloc_inode(btrfs_inode); spin_unlock(&root->delalloc_lock); /* * Make sure we get a live inode and that it'll not disappear * meanwhile. */ inode = igrab(&btrfs_inode->vfs_inode); if (inode) { unsigned int nofs_flag; nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); memalloc_nofs_restore(nofs_flag); iput(inode); } spin_lock(&root->delalloc_lock); } spin_unlock(&root->delalloc_lock); } static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; LIST_HEAD(splice); spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_root(root); BUG_ON(!root); spin_unlock(&fs_info->delalloc_root_lock); btrfs_destroy_delalloc_inodes(root); btrfs_put_root(root); spin_lock(&fs_info->delalloc_root_lock); } spin_unlock(&fs_info->delalloc_root_lock); } static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, struct extent_io_tree *dirty_pages, int mark) { struct extent_buffer *eb; u64 start = 0; u64 end; while (find_first_extent_bit(dirty_pages, start, &start, &end, mark, NULL)) { clear_extent_bits(dirty_pages, start, end, mark); while (start <= end) { eb = find_extent_buffer(fs_info, start); start += fs_info->nodesize; if (!eb) continue; btrfs_tree_lock(eb); wait_on_extent_buffer_writeback(eb); btrfs_clear_buffer_dirty(NULL, eb); btrfs_tree_unlock(eb); free_extent_buffer_stale(eb); } } } static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, struct extent_io_tree *unpin) { u64 start; u64 end; while (1) { struct extent_state *cached_state = NULL; /* * The btrfs_finish_extent_commit() may get the same range as * ours between find_first_extent_bit and clear_extent_dirty. * Hence, hold the unused_bg_unpin_mutex to avoid double unpin * the same extent range. */ mutex_lock(&fs_info->unused_bg_unpin_mutex); if (!find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, &cached_state)) { mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } clear_extent_dirty(unpin, start, end, &cached_state); free_extent_state(cached_state); btrfs_error_unpin_extent_range(fs_info, start, end); mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); } } static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache) { struct inode *inode; inode = cache->io_ctl.inode; if (inode) { unsigned int nofs_flag; nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); memalloc_nofs_restore(nofs_flag); BTRFS_I(inode)->generation = 0; cache->io_ctl.inode = NULL; iput(inode); } ASSERT(cache->io_ctl.pages == NULL); btrfs_put_block_group(cache); } void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { struct btrfs_block_group *cache; spin_lock(&cur_trans->dirty_bgs_lock); while (!list_empty(&cur_trans->dirty_bgs)) { cache = list_first_entry(&cur_trans->dirty_bgs, struct btrfs_block_group, dirty_list); if (!list_empty(&cache->io_list)) { spin_unlock(&cur_trans->dirty_bgs_lock); list_del_init(&cache->io_list); btrfs_cleanup_bg_io(cache); spin_lock(&cur_trans->dirty_bgs_lock); } list_del_init(&cache->dirty_list); spin_lock(&cache->lock); cache->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&cache->lock); spin_unlock(&cur_trans->dirty_bgs_lock); btrfs_put_block_group(cache); btrfs_dec_delayed_refs_rsv_bg_updates(fs_info); spin_lock(&cur_trans->dirty_bgs_lock); } spin_unlock(&cur_trans->dirty_bgs_lock); /* * Refer to the definition of io_bgs member for details why it's safe * to use it without any locking */ while (!list_empty(&cur_trans->io_bgs)) { cache = list_first_entry(&cur_trans->io_bgs, struct btrfs_block_group, io_list); list_del_init(&cache->io_list); spin_lock(&cache->lock); cache->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&cache->lock); btrfs_cleanup_bg_io(cache); } } static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info) { struct btrfs_root *gang[8]; int i; int ret; spin_lock(&fs_info->fs_roots_radix_lock); while (1) { ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang), BTRFS_ROOT_TRANS_TAG); if (ret == 0) break; for (i = 0; i < ret; i++) { struct btrfs_root *root = gang[i]; btrfs_qgroup_free_meta_all_pertrans(root); radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)btrfs_root_id(root), BTRFS_ROOT_TRANS_TAG); } } spin_unlock(&fs_info->fs_roots_radix_lock); } void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { struct btrfs_device *dev, *tmp; btrfs_cleanup_dirty_bgs(cur_trans, fs_info); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list, post_commit_list) { list_del_init(&dev->post_commit_list); } btrfs_destroy_delayed_refs(cur_trans, fs_info); cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&fs_info->transaction_blocked_wait); cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&fs_info->transaction_wait); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); } static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) { struct btrfs_transaction *t; mutex_lock(&fs_info->transaction_kthread_mutex); spin_lock(&fs_info->trans_lock); while (!list_empty(&fs_info->trans_list)) { t = list_first_entry(&fs_info->trans_list, struct btrfs_transaction, list); if (t->state >= TRANS_STATE_COMMIT_PREP) { refcount_inc(&t->use_count); spin_unlock(&fs_info->trans_lock); btrfs_wait_for_commit(fs_info, t->transid); btrfs_put_transaction(t); spin_lock(&fs_info->trans_lock); continue; } if (t == fs_info->running_transaction) { t->state = TRANS_STATE_COMMIT_DOING; spin_unlock(&fs_info->trans_lock); /* * We wait for 0 num_writers since we don't hold a trans * handle open currently for this transaction. */ wait_event(t->writer_wait, atomic_read(&t->num_writers) == 0); } else { spin_unlock(&fs_info->trans_lock); } btrfs_cleanup_one_transaction(t, fs_info); spin_lock(&fs_info->trans_lock); if (t == fs_info->running_transaction) fs_info->running_transaction = NULL; list_del_init(&t->list); spin_unlock(&fs_info->trans_lock); btrfs_put_transaction(t); trace_btrfs_transaction_commit(fs_info); spin_lock(&fs_info->trans_lock); } spin_unlock(&fs_info->trans_lock); btrfs_destroy_all_ordered_extents(fs_info); btrfs_destroy_delayed_inodes(fs_info); btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_all_delalloc_inodes(fs_info); btrfs_drop_all_logs(fs_info); btrfs_free_all_qgroup_pertrans(fs_info); mutex_unlock(&fs_info->transaction_kthread_mutex); return 0; } int btrfs_init_root_free_objectid(struct btrfs_root *root) { struct btrfs_path *path; int ret; struct extent_buffer *l; struct btrfs_key search_key; struct btrfs_key found_key; int slot; path = btrfs_alloc_path(); if (!path) return -ENOMEM; search_key.objectid = BTRFS_LAST_FREE_OBJECTID; search_key.type = -1; search_key.offset = (u64)-1; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto error; if (ret == 0) { /* * Key with offset -1 found, there would have to exist a root * with such id, but this is out of valid range. */ ret = -EUCLEAN; goto error; } if (path->slots[0] > 0) { slot = path->slots[0] - 1; l = path->nodes[0]; btrfs_item_key_to_cpu(l, &found_key, slot); root->free_objectid = max_t(u64, found_key.objectid + 1, BTRFS_FIRST_FREE_OBJECTID); } else { root->free_objectid = BTRFS_FIRST_FREE_OBJECTID; } ret = 0; error: btrfs_free_path(path); return ret; } int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid) { int ret; mutex_lock(&root->objectid_mutex); if (unlikely(root->free_objectid >= BTRFS_LAST_FREE_OBJECTID)) { btrfs_warn(root->fs_info, "the objectid of root %llu reaches its highest value", btrfs_root_id(root)); ret = -ENOSPC; goto out; } *objectid = root->free_objectid++; ret = 0; out: mutex_unlock(&root->objectid_mutex); return ret; }
8 9 8 8 8 8 8 8 8 8 8 8 8 8 8 8 62 62 62 62 62 8 54 54 8 62 62 62 62 62 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 // SPDX-License-Identifier: GPL-2.0-only /* * hosts.c Copyright (C) 1992 Drew Eckhardt * Copyright (C) 1993, 1994, 1995 Eric Youngdale * Copyright (C) 2002-2003 Christoph Hellwig * * mid to lowlevel SCSI driver interface * Initial versions: Drew Eckhardt * Subsequent revisions: Eric Youngdale * * <drew@colorado.edu> * * Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli * Added QLOGIC QLA1280 SCSI controller kernel host support. * August 4, 1999 Fred Lewis, Intel DuPont * * Updated to reflect the new initialization scheme for the higher * level of scsi drivers (sd/sr/st) * September 17, 2000 Torben Mathiasen <tmm@image.dk> * * Restructured scsi_host lists and associated functions. * September 04, 2002 Mike Anderson (andmike@us.ibm.com) */ #include <linux/module.h> #include <linux/blkdev.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/kthread.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/completion.h> #include <linux/transport_class.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/idr.h> #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_cmnd.h> #include "scsi_priv.h" #include "scsi_logging.h" static int shost_eh_deadline = -1; module_param_named(eh_deadline, shost_eh_deadline, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(eh_deadline, "SCSI EH timeout in seconds (should be between 0 and 2^31-1)"); static DEFINE_IDA(host_index_ida); static void scsi_host_cls_release(struct device *dev) { put_device(&class_to_shost(dev)->shost_gendev); } static struct class shost_class = { .name = "scsi_host", .dev_release = scsi_host_cls_release, .dev_groups = scsi_shost_groups, }; /** * scsi_host_set_state - Take the given host through the host state model. * @shost: scsi host to change the state of. * @state: state to change to. * * Returns zero if unsuccessful or an error if the requested * transition is illegal. **/ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state) { enum scsi_host_state oldstate = shost->shost_state; if (state == oldstate) return 0; switch (state) { case SHOST_CREATED: /* There are no legal states that come back to * created. This is the manually initialised start * state */ goto illegal; case SHOST_RUNNING: switch (oldstate) { case SHOST_CREATED: case SHOST_RECOVERY: break; default: goto illegal; } break; case SHOST_RECOVERY: switch (oldstate) { case SHOST_RUNNING: break; default: goto illegal; } break; case SHOST_CANCEL: switch (oldstate) { case SHOST_CREATED: case SHOST_RUNNING: case SHOST_CANCEL_RECOVERY: break; default: goto illegal; } break; case SHOST_DEL: switch (oldstate) { case SHOST_CANCEL: case SHOST_DEL_RECOVERY: break; default: goto illegal; } break; case SHOST_CANCEL_RECOVERY: switch (oldstate) { case SHOST_CANCEL: case SHOST_RECOVERY: break; default: goto illegal; } break; case SHOST_DEL_RECOVERY: switch (oldstate) { case SHOST_CANCEL_RECOVERY: break; default: goto illegal; } break; } shost->shost_state = state; return 0; illegal: SCSI_LOG_ERROR_RECOVERY(1, shost_printk(KERN_ERR, shost, "Illegal host state transition" "%s->%s\n", scsi_host_state_name(oldstate), scsi_host_state_name(state))); return -EINVAL; } /** * scsi_remove_host - remove a scsi host * @shost: a pointer to a scsi host to remove **/ void scsi_remove_host(struct Scsi_Host *shost) { unsigned long flags; mutex_lock(&shost->scan_mutex); spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_CANCEL)) if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) { spin_unlock_irqrestore(shost->host_lock, flags); mutex_unlock(&shost->scan_mutex); return; } spin_unlock_irqrestore(shost->host_lock, flags); scsi_autopm_get_host(shost);