74 74 34 34 74 75 75 60 27 29 29 29 74 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 /* * NET Generic infrastructure for Network protocols. * * Definitions for request_sock * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * From code originally in include/net/tcp.h * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #ifndef _REQUEST_SOCK_H #define _REQUEST_SOCK_H #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/bug.h> #include <linux/refcount.h> #include <net/sock.h> struct request_sock; struct sk_buff; struct dst_entry; struct proto; struct request_sock_ops { int family; unsigned int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(const struct sock *sk, struct request_sock *req); void (*send_ack)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(const struct sock *sk, struct sk_buff *skb); void (*destructor)(struct request_sock *req); void (*syn_ack_timeout)(const struct request_sock *req); }; int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req); /* struct request_sock - mini sock to represent a connection request */ struct request_sock { struct sock_common __req_common; #define rsk_refcnt __req_common.skc_refcnt #define rsk_hash __req_common.skc_hash #define rsk_listener __req_common.skc_listener #define rsk_window_clamp __req_common.skc_window_clamp #define rsk_rcv_wnd __req_common.skc_rcv_wnd struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ u8 num_timeout:7; /* number of timeouts */ u32 ts_recent; struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; u32 *saved_syn; u32 secid; u32 peer_secid; }; static inline struct request_sock *inet_reqsk(const struct sock *sk) { return (struct request_sock *)sk; } static inline struct sock *req_to_sk(struct request_sock *req) { return (struct sock *)req; } static inline struct request_sock * reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) { struct request_sock *req; req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN); if (!req) return NULL; req->rsk_listener = NULL; if (attach_listener) { if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { kmem_cache_free(ops->slab, req); return NULL; } req->rsk_listener = sk_listener; } req->rsk_ops = ops; req_to_sk(req)->sk_prot = sk_listener->sk_prot; sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; refcount_set(&req->rsk_refcnt, 0); return req; } static inline void reqsk_free(struct request_sock *req) { /* temporary debugging */ WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } /* * For a TCP Fast Open listener - * lock - protects the access to all the reqsk, which is co-owned by * the listener and the child socket. * qlen - pending TFO requests (still in TCP_SYN_RECV). * max_qlen - max TFO reqs allowed before TFO is disabled. * * XXX (TFO) - ideally these fields can be made as part of "listen_sock" * structure above. But there is some implementation difficulty due to * listen_sock being part of request_sock_queue hence will be freed when * a listener is stopped. But TFO related fields may continue to be * accessed even after a listener is closed, until its sk_refcnt drops * to 0 implying no more outstanding TFO reqs. One solution is to keep * listen_opt around until sk_refcnt drops to 0. But there is some other * complexity that needs to be resolved. E.g., a listener can be disabled * temporarily through shutdown()->tcp_disconnect(), and re-enabled later. */ struct fastopen_queue { struct request_sock *rskq_rst_head; /* Keep track of past TFO */ struct request_sock *rskq_rst_tail; /* requests that caused RST. * This is part of the defense * against spoofing attack. */ spinlock_t lock; int qlen; /* # of pending (TCP_SYN_RECV) reqs */ int max_qlen; /* != 0 iff TFO is currently enabled */ struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */ }; /** struct request_sock_queue - queue of request_socks * * @rskq_accept_head - FIFO head of established children * @rskq_accept_tail - FIFO tail of established children * @rskq_defer_accept - User waits for some data after accept() * */ struct request_sock_queue { spinlock_t rskq_lock; u8 rskq_defer_accept; u32 synflood_warned; atomic_t qlen; atomic_t young; struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine * if TFO is enabled. */ }; void reqsk_queue_alloc(struct request_sock_queue *queue); void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset); static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) { return READ_ONCE(queue->rskq_accept_head) == NULL; } static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, struct sock *parent) { struct request_sock *req; spin_lock_bh(&queue->rskq_lock); req = queue->rskq_accept_head; if (req) { sk_acceptq_removed(parent); WRITE_ONCE(queue->rskq_accept_head, req->dl_next); if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; } spin_unlock_bh(&queue->rskq_lock); return req; } static inline void reqsk_queue_removed(struct request_sock_queue *queue, const struct request_sock *req) { if (req->num_timeout == 0) atomic_dec(&queue->young); atomic_dec(&queue->qlen); } static inline void reqsk_queue_added(struct request_sock_queue *queue) { atomic_inc(&queue->young); atomic_inc(&queue->qlen); } static inline int reqsk_queue_len(const struct request_sock_queue *queue) { return atomic_read(&queue->qlen); } static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { return atomic_read(&queue->young); } #endif /* _REQUEST_SOCK_H */
4 2 2 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-ctrls.c - control support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/videodev2.h> #include <media/v4l2-event.h> #include <media/v4l2-common.h> #include "vivid-core.h" #include "vivid-vid-cap.h" #include "vivid-vid-out.h" #include "vivid-vid-common.h" #include "vivid-radio-common.h" #include "vivid-osd.h" #include "vivid-ctrls.h" #define VIVID_CID_CUSTOM_BASE (V4L2_CID_USER_BASE | 0xf000) #define VIVID_CID_BUTTON (VIVID_CID_CUSTOM_BASE + 0) #define VIVID_CID_BOOLEAN (VIVID_CID_CUSTOM_BASE + 1) #define VIVID_CID_INTEGER (VIVID_CID_CUSTOM_BASE + 2) #define VIVID_CID_INTEGER64 (VIVID_CID_CUSTOM_BASE + 3) #define VIVID_CID_MENU (VIVID_CID_CUSTOM_BASE + 4) #define VIVID_CID_STRING (VIVID_CID_CUSTOM_BASE + 5) #define VIVID_CID_BITMASK (VIVID_CID_CUSTOM_BASE + 6) #define VIVID_CID_INTMENU (VIVID_CID_CUSTOM_BASE + 7) #define VIVID_CID_U32_ARRAY (VIVID_CID_CUSTOM_BASE + 8) #define VIVID_CID_U16_MATRIX (VIVID_CID_CUSTOM_BASE + 9) #define VIVID_CID_U8_4D_ARRAY (VIVID_CID_CUSTOM_BASE + 10) #define VIVID_CID_VIVID_BASE (0x00f00000 | 0xf000) #define VIVID_CID_VIVID_CLASS (0x00f00000 | 1) #define VIVID_CID_TEST_PATTERN (VIVID_CID_VIVID_BASE + 0) #define VIVID_CID_OSD_TEXT_MODE (VIVID_CID_VIVID_BASE + 1) #define VIVID_CID_HOR_MOVEMENT (VIVID_CID_VIVID_BASE + 2) #define VIVID_CID_VERT_MOVEMENT (VIVID_CID_VIVID_BASE + 3) #define VIVID_CID_SHOW_BORDER (VIVID_CID_VIVID_BASE + 4) #define VIVID_CID_SHOW_SQUARE (VIVID_CID_VIVID_BASE + 5) #define VIVID_CID_INSERT_SAV (VIVID_CID_VIVID_BASE + 6) #define VIVID_CID_INSERT_EAV (VIVID_CID_VIVID_BASE + 7) #define VIVID_CID_VBI_CAP_INTERLACED (VIVID_CID_VIVID_BASE + 8) #define VIVID_CID_HFLIP (VIVID_CID_VIVID_BASE + 20) #define VIVID_CID_VFLIP (VIVID_CID_VIVID_BASE + 21) #define VIVID_CID_STD_ASPECT_RATIO (VIVID_CID_VIVID_BASE + 22) #define VIVID_CID_DV_TIMINGS_ASPECT_RATIO (VIVID_CID_VIVID_BASE + 23) #define VIVID_CID_TSTAMP_SRC (VIVID_CID_VIVID_BASE + 24) #define VIVID_CID_COLORSPACE (VIVID_CID_VIVID_BASE + 25) #define VIVID_CID_XFER_FUNC (VIVID_CID_VIVID_BASE + 26) #define VIVID_CID_YCBCR_ENC (VIVID_CID_VIVID_BASE + 27) #define VIVID_CID_QUANTIZATION (VIVID_CID_VIVID_BASE + 28) #define VIVID_CID_LIMITED_RGB_RANGE (VIVID_CID_VIVID_BASE + 29) #define VIVID_CID_ALPHA_MODE (VIVID_CID_VIVID_BASE + 30) #define VIVID_CID_HAS_CROP_CAP (VIVID_CID_VIVID_BASE + 31) #define VIVID_CID_HAS_COMPOSE_CAP (VIVID_CID_VIVID_BASE + 32) #define VIVID_CID_HAS_SCALER_CAP (VIVID_CID_VIVID_BASE + 33) #define VIVID_CID_HAS_CROP_OUT (VIVID_CID_VIVID_BASE + 34) #define VIVID_CID_HAS_COMPOSE_OUT (VIVID_CID_VIVID_BASE + 35) #define VIVID_CID_HAS_SCALER_OUT (VIVID_CID_VIVID_BASE + 36) #define VIVID_CID_LOOP_VIDEO (VIVID_CID_VIVID_BASE + 37) #define VIVID_CID_SEQ_WRAP (VIVID_CID_VIVID_BASE + 38) #define VIVID_CID_TIME_WRAP (VIVID_CID_VIVID_BASE + 39) #define VIVID_CID_MAX_EDID_BLOCKS (VIVID_CID_VIVID_BASE + 40) #define VIVID_CID_PERCENTAGE_FILL (VIVID_CID_VIVID_BASE + 41) #define VIVID_CID_REDUCED_FPS (VIVID_CID_VIVID_BASE + 42) #define VIVID_CID_HSV_ENC (VIVID_CID_VIVID_BASE + 43) #define VIVID_CID_STD_SIGNAL_MODE (VIVID_CID_VIVID_BASE + 60) #define VIVID_CID_STANDARD (VIVID_CID_VIVID_BASE + 61) #define VIVID_CID_DV_TIMINGS_SIGNAL_MODE (VIVID_CID_VIVID_BASE + 62) #define VIVID_CID_DV_TIMINGS (VIVID_CID_VIVID_BASE + 63) #define VIVID_CID_PERC_DROPPED (VIVID_CID_VIVID_BASE + 64) #define VIVID_CID_DISCONNECT (VIVID_CID_VIVID_BASE + 65) #define VIVID_CID_DQBUF_ERROR (VIVID_CID_VIVID_BASE + 66) #define VIVID_CID_QUEUE_SETUP_ERROR (VIVID_CID_VIVID_BASE + 67) #define VIVID_CID_BUF_PREPARE_ERROR (VIVID_CID_VIVID_BASE + 68) #define VIVID_CID_START_STR_ERROR (VIVID_CID_VIVID_BASE + 69) #define VIVID_CID_QUEUE_ERROR (VIVID_CID_VIVID_BASE + 70) #define VIVID_CID_CLEAR_FB (VIVID_CID_VIVID_BASE + 71) #define VIVID_CID_RADIO_SEEK_MODE (VIVID_CID_VIVID_BASE + 90) #define VIVID_CID_RADIO_SEEK_PROG_LIM (VIVID_CID_VIVID_BASE + 91) #define VIVID_CID_RADIO_RX_RDS_RBDS (VIVID_CID_VIVID_BASE + 92) #define VIVID_CID_RADIO_RX_RDS_BLOCKIO (VIVID_CID_VIVID_BASE + 93) #define VIVID_CID_RADIO_TX_RDS_BLOCKIO (VIVID_CID_VIVID_BASE + 94) #define VIVID_CID_SDR_CAP_FM_DEVIATION (VIVID_CID_VIVID_BASE + 110) /* General User Controls */ static int vivid_user_gen_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_user_gen); switch (ctrl->id) { case VIVID_CID_DISCONNECT: v4l2_info(&dev->v4l2_dev, "disconnect\n"); clear_bit(V4L2_FL_REGISTERED, &dev->vid_cap_dev.flags); clear_bit(V4L2_FL_REGISTERED, &dev->vid_out_dev.flags); clear_bit(V4L2_FL_REGISTERED, &dev->vbi_cap_dev.flags); clear_bit(V4L2_FL_REGISTERED, &dev->vbi_out_dev.flags); clear_bit(V4L2_FL_REGISTERED, &dev->sdr_cap_dev.flags); clear_bit(V4L2_FL_REGISTERED, &dev->radio_rx_dev.flags); clear_bit(V4L2_FL_REGISTERED, &dev->radio_tx_dev.flags); break; case VIVID_CID_BUTTON: dev->button_pressed = 30; break; } return 0; } static const struct v4l2_ctrl_ops vivid_user_gen_ctrl_ops = { .s_ctrl = vivid_user_gen_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_button = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_BUTTON, .name = "Button", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_boolean = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_BOOLEAN, .name = "Boolean", .type = V4L2_CTRL_TYPE_BOOLEAN, .min = 0, .max = 1, .step = 1, .def = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_int32 = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_INTEGER, .name = "Integer 32 Bits", .type = V4L2_CTRL_TYPE_INTEGER, .min = 0xffffffff80000000ULL, .max = 0x7fffffff, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_int64 = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_INTEGER64, .name = "Integer 64 Bits", .type = V4L2_CTRL_TYPE_INTEGER64, .min = 0x8000000000000000ULL, .max = 0x7fffffffffffffffLL, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_u32_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U32_ARRAY, .name = "U32 1 Element Array", .type = V4L2_CTRL_TYPE_U32, .def = 0x18, .min = 0x10, .max = 0x20000, .step = 1, .dims = { 1 }, }; static const struct v4l2_ctrl_config vivid_ctrl_u16_matrix = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U16_MATRIX, .name = "U16 8x16 Matrix", .type = V4L2_CTRL_TYPE_U16, .def = 0x18, .min = 0x10, .max = 0x2000, .step = 1, .dims = { 8, 16 }, }; static const struct v4l2_ctrl_config vivid_ctrl_u8_4d_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U8_4D_ARRAY, .name = "U8 2x3x4x5 Array", .type = V4L2_CTRL_TYPE_U8, .def = 0x18, .min = 0x10, .max = 0x20, .step = 1, .dims = { 2, 3, 4, 5 }, }; static const char * const vivid_ctrl_menu_strings[] = { "Menu Item 0 (Skipped)", "Menu Item 1", "Menu Item 2 (Skipped)", "Menu Item 3", "Menu Item 4", "Menu Item 5 (Skipped)", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_menu = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_MENU, .name = "Menu", .type = V4L2_CTRL_TYPE_MENU, .min = 1, .max = 4, .def = 3, .menu_skip_mask = 0x04, .qmenu = vivid_ctrl_menu_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_string = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_STRING, .name = "String", .type = V4L2_CTRL_TYPE_STRING, .min = 2, .max = 4, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_bitmask = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_BITMASK, .name = "Bitmask", .type = V4L2_CTRL_TYPE_BITMASK, .def = 0x80002000, .min = 0, .max = 0x80402010, .step = 0, }; static const s64 vivid_ctrl_int_menu_values[] = { 1, 1, 2, 3, 5, 8, 13, 21, 42, }; static const struct v4l2_ctrl_config vivid_ctrl_int_menu = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_INTMENU, .name = "Integer Menu", .type = V4L2_CTRL_TYPE_INTEGER_MENU, .min = 1, .max = 8, .def = 4, .menu_skip_mask = 0x02, .qmenu_int = vivid_ctrl_int_menu_values, }; static const struct v4l2_ctrl_config vivid_ctrl_disconnect = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_DISCONNECT, .name = "Disconnect", .type = V4L2_CTRL_TYPE_BUTTON, }; /* Framebuffer Controls */ static int vivid_fb_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_fb); switch (ctrl->id) { case VIVID_CID_CLEAR_FB: vivid_clear_fb(dev); break; } return 0; } static const struct v4l2_ctrl_ops vivid_fb_ctrl_ops = { .s_ctrl = vivid_fb_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_clear_fb = { .ops = &vivid_fb_ctrl_ops, .id = VIVID_CID_CLEAR_FB, .name = "Clear Framebuffer", .type = V4L2_CTRL_TYPE_BUTTON, }; /* Video User Controls */ static int vivid_user_vid_g_volatile_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_user_vid); switch (ctrl->id) { case V4L2_CID_AUTOGAIN: dev->gain->val = (jiffies_to_msecs(jiffies) / 1000) & 0xff; break; } return 0; } static int vivid_user_vid_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_user_vid); switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: dev->input_brightness[dev->input] = ctrl->val - dev->input * 128; tpg_s_brightness(&dev->tpg, dev->input_brightness[dev->input]); break; case V4L2_CID_CONTRAST: tpg_s_contrast(&dev->tpg, ctrl->val); break; case V4L2_CID_SATURATION: tpg_s_saturation(&dev->tpg, ctrl->val); break; case V4L2_CID_HUE: tpg_s_hue(&dev->tpg, ctrl->val); break; case V4L2_CID_HFLIP: dev->hflip = ctrl->val; tpg_s_hflip(&dev->tpg, dev->sensor_hflip ^ dev->hflip); break; case V4L2_CID_VFLIP: dev->vflip = ctrl->val; tpg_s_vflip(&dev->tpg, dev->sensor_vflip ^ dev->vflip); break; case V4L2_CID_ALPHA_COMPONENT: tpg_s_alpha_component(&dev->tpg, ctrl->val); break; } return 0; } static const struct v4l2_ctrl_ops vivid_user_vid_ctrl_ops = { .g_volatile_ctrl = vivid_user_vid_g_volatile_ctrl, .s_ctrl = vivid_user_vid_s_ctrl, }; /* Video Capture Controls */ static int vivid_vid_cap_s_ctrl(struct v4l2_ctrl *ctrl) { static const u32 colorspaces[] = { V4L2_COLORSPACE_SMPTE170M, V4L2_COLORSPACE_REC709, V4L2_COLORSPACE_SRGB, V4L2_COLORSPACE_OPRGB, V4L2_COLORSPACE_BT2020, V4L2_COLORSPACE_DCI_P3, V4L2_COLORSPACE_SMPTE240M, V4L2_COLORSPACE_470_SYSTEM_M, V4L2_COLORSPACE_470_SYSTEM_BG, }; struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_vid_cap); unsigned i; switch (ctrl->id) { case VIVID_CID_TEST_PATTERN: vivid_update_quality(dev); tpg_s_pattern(&dev->tpg, ctrl->val); break; case VIVID_CID_COLORSPACE: tpg_s_colorspace(&dev->tpg, colorspaces[ctrl->val]); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_XFER_FUNC: tpg_s_xfer_func(&dev->tpg, ctrl->val); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_YCBCR_ENC: tpg_s_ycbcr_enc(&dev->tpg, ctrl->val); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_HSV_ENC: tpg_s_hsv_enc(&dev->tpg, ctrl->val ? V4L2_HSV_ENC_256 : V4L2_HSV_ENC_180); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_QUANTIZATION: tpg_s_quantization(&dev->tpg, ctrl->val); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case V4L2_CID_DV_RX_RGB_RANGE: if (!vivid_is_hdmi_cap(dev)) break; tpg_s_rgb_range(&dev->tpg, ctrl->val); break; case VIVID_CID_LIMITED_RGB_RANGE: tpg_s_real_rgb_range(&dev->tpg, ctrl->val ? V4L2_DV_RGB_RANGE_LIMITED : V4L2_DV_RGB_RANGE_FULL); break; case VIVID_CID_ALPHA_MODE: tpg_s_alpha_mode(&dev->tpg, ctrl->val); break; case VIVID_CID_HOR_MOVEMENT: tpg_s_mv_hor_mode(&dev->tpg, ctrl->val); break; case VIVID_CID_VERT_MOVEMENT: tpg_s_mv_vert_mode(&dev->tpg, ctrl->val); break; case VIVID_CID_OSD_TEXT_MODE: dev->osd_mode = ctrl->val; break; case VIVID_CID_PERCENTAGE_FILL: tpg_s_perc_fill(&dev->tpg, ctrl->val); for (i = 0; i < VIDEO_MAX_FRAME; i++) dev->must_blank[i] = ctrl->val < 100; break; case VIVID_CID_INSERT_SAV: tpg_s_insert_sav(&dev->tpg, ctrl->val); break; case VIVID_CID_INSERT_EAV: tpg_s_insert_eav(&dev->tpg, ctrl->val); break; case VIVID_CID_HFLIP: dev->sensor_hflip = ctrl->val; tpg_s_hflip(&dev->tpg, dev->sensor_hflip ^ dev->hflip); break; case VIVID_CID_VFLIP: dev->sensor_vflip = ctrl->val; tpg_s_vflip(&dev->tpg, dev->sensor_vflip ^ dev->vflip); break; case VIVID_CID_REDUCED_FPS: dev->reduced_fps = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_HAS_CROP_CAP: dev->has_crop_cap = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_HAS_COMPOSE_CAP: dev->has_compose_cap = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_HAS_SCALER_CAP: dev->has_scaler_cap = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_SHOW_BORDER: tpg_s_show_border(&dev->tpg, ctrl->val); break; case VIVID_CID_SHOW_SQUARE: tpg_s_show_square(&dev->tpg, ctrl->val); break; case VIVID_CID_STD_ASPECT_RATIO: dev->std_aspect_ratio = ctrl->val; tpg_s_video_aspect(&dev->tpg, vivid_get_video_aspect(dev)); break; case VIVID_CID_DV_TIMINGS_SIGNAL_MODE: dev->dv_timings_signal_mode = dev->ctrl_dv_timings_signal_mode->val; if (dev->dv_timings_signal_mode == SELECTED_DV_TIMINGS) dev->query_dv_timings = dev->ctrl_dv_timings->val; v4l2_ctrl_activate(dev->ctrl_dv_timings, dev->dv_timings_signal_mode == SELECTED_DV_TIMINGS); vivid_update_quality(dev); vivid_send_source_change(dev, HDMI); break; case VIVID_CID_DV_TIMINGS_ASPECT_RATIO: dev->dv_timings_aspect_ratio = ctrl->val; tpg_s_video_aspect(&dev->tpg, vivid_get_video_aspect(dev)); break; case VIVID_CID_TSTAMP_SRC: dev->tstamp_src_is_soe = ctrl->val; dev->vb_vid_cap_q.timestamp_flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK; if (dev->tstamp_src_is_soe) dev->vb_vid_cap_q.timestamp_flags |= V4L2_BUF_FLAG_TSTAMP_SRC_SOE; break; case VIVID_CID_MAX_EDID_BLOCKS: dev->edid_max_blocks = ctrl->val; if (dev->edid_blocks > dev->edid_max_blocks) dev->edid_blocks = dev->edid_max_blocks; break; } return 0; } static const struct v4l2_ctrl_ops vivid_vid_cap_ctrl_ops = { .s_ctrl = vivid_vid_cap_s_ctrl, }; static const char * const vivid_ctrl_hor_movement_strings[] = { "Move Left Fast", "Move Left", "Move Left Slow", "No Movement", "Move Right Slow", "Move Right", "Move Right Fast", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_hor_movement = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HOR_MOVEMENT, .name = "Horizontal Movement", .type = V4L2_CTRL_TYPE_MENU, .max = TPG_MOVE_POS_FAST, .def = TPG_MOVE_NONE, .qmenu = vivid_ctrl_hor_movement_strings, }; static const char * const vivid_ctrl_vert_movement_strings[] = { "Move Up Fast", "Move Up", "Move Up Slow", "No Movement", "Move Down Slow", "Move Down", "Move Down Fast", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_vert_movement = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_VERT_MOVEMENT, .name = "Vertical Movement", .type = V4L2_CTRL_TYPE_MENU, .max = TPG_MOVE_POS_FAST, .def = TPG_MOVE_NONE, .qmenu = vivid_ctrl_vert_movement_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_show_border = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_SHOW_BORDER, .name = "Show Border", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_show_square = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_SHOW_SQUARE, .name = "Show Square", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const char * const vivid_ctrl_osd_mode_strings[] = { "All", "Counters Only", "None", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_osd_mode = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_OSD_TEXT_MODE, .name = "OSD Text Mode", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_osd_mode_strings) - 2, .qmenu = vivid_ctrl_osd_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_perc_fill = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_PERCENTAGE_FILL, .name = "Fill Percentage of Frame", .type = V4L2_CTRL_TYPE_INTEGER, .min = 0, .max = 100, .def = 100, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_insert_sav = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_INSERT_SAV, .name = "Insert SAV Code in Image", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_insert_eav = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_INSERT_EAV, .name = "Insert EAV Code in Image", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_hflip = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HFLIP, .name = "Sensor Flipped Horizontally", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_vflip = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_VFLIP, .name = "Sensor Flipped Vertically", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_reduced_fps = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_REDUCED_FPS, .name = "Reduced Framerate", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_crop_cap = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HAS_CROP_CAP, .name = "Enable Capture Cropping", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_compose_cap = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HAS_COMPOSE_CAP, .name = "Enable Capture Composing", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_scaler_cap = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HAS_SCALER_CAP, .name = "Enable Capture Scaler", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const char * const vivid_ctrl_tstamp_src_strings[] = { "End of Frame", "Start of Exposure", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_tstamp_src = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_TSTAMP_SRC, .name = "Timestamp Source", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_tstamp_src_strings) - 2, .qmenu = vivid_ctrl_tstamp_src_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_std_aspect_ratio = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_STD_ASPECT_RATIO, .name = "Standard Aspect Ratio", .type = V4L2_CTRL_TYPE_MENU, .min = 1, .max = 4, .def = 1, .qmenu = tpg_aspect_strings, }; static const char * const vivid_ctrl_dv_timings_signal_mode_strings[] = { "Current DV Timings", "No Signal", "No Lock", "Out of Range", "Selected DV Timings", "Cycle Through All DV Timings", "Custom DV Timings", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_dv_timings_signal_mode = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_DV_TIMINGS_SIGNAL_MODE, .name = "DV Timings Signal Mode", .type = V4L2_CTRL_TYPE_MENU, .max = 5, .qmenu = vivid_ctrl_dv_timings_signal_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_dv_timings_aspect_ratio = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_DV_TIMINGS_ASPECT_RATIO, .name = "DV Timings Aspect Ratio", .type = V4L2_CTRL_TYPE_MENU, .max = 3, .qmenu = tpg_aspect_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_max_edid_blocks = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_MAX_EDID_BLOCKS, .name = "Maximum EDID Blocks", .type = V4L2_CTRL_TYPE_INTEGER, .min = 1, .max = 256, .def = 2, .step = 1, }; static const char * const vivid_ctrl_colorspace_strings[] = { "SMPTE 170M", "Rec. 709", "sRGB", "opRGB", "BT.2020", "DCI-P3", "SMPTE 240M", "470 System M", "470 System BG", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_colorspace = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_COLORSPACE, .name = "Colorspace", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_colorspace_strings) - 2, .def = 2, .qmenu = vivid_ctrl_colorspace_strings, }; static const char * const vivid_ctrl_xfer_func_strings[] = { "Default", "Rec. 709", "sRGB", "opRGB", "SMPTE 240M", "None", "DCI-P3", "SMPTE 2084", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_xfer_func = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_XFER_FUNC, .name = "Transfer Function", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_xfer_func_strings) - 2, .qmenu = vivid_ctrl_xfer_func_strings, }; static const char * const vivid_ctrl_ycbcr_enc_strings[] = { "Default", "ITU-R 601", "Rec. 709", "xvYCC 601", "xvYCC 709", "", "BT.2020", "BT.2020 Constant Luminance", "SMPTE 240M", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_ycbcr_enc = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_YCBCR_ENC, .name = "Y'CbCr Encoding", .type = V4L2_CTRL_TYPE_MENU, .menu_skip_mask = 1 << 5, .max = ARRAY_SIZE(vivid_ctrl_ycbcr_enc_strings) - 2, .qmenu = vivid_ctrl_ycbcr_enc_strings, }; static const char * const vivid_ctrl_hsv_enc_strings[] = { "Hue 0-179", "Hue 0-256", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_hsv_enc = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HSV_ENC, .name = "HSV Encoding", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_hsv_enc_strings) - 2, .qmenu = vivid_ctrl_hsv_enc_strings, }; static const char * const vivid_ctrl_quantization_strings[] = { "Default", "Full Range", "Limited Range", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_quantization = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_QUANTIZATION, .name = "Quantization", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_quantization_strings) - 2, .qmenu = vivid_ctrl_quantization_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_alpha_mode = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_ALPHA_MODE, .name = "Apply Alpha To Red Only", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_limited_rgb_range = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_LIMITED_RGB_RANGE, .name = "Limited RGB Range (16-235)", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* Video Loop Control */ static int vivid_loop_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_loop_cap); switch (ctrl->id) { case VIVID_CID_LOOP_VIDEO: dev->loop_video = ctrl->val; vivid_update_quality(dev); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); break; } return 0; } static const struct v4l2_ctrl_ops vivid_loop_cap_ctrl_ops = { .s_ctrl = vivid_loop_cap_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_loop_video = { .ops = &vivid_loop_cap_ctrl_ops, .id = VIVID_CID_LOOP_VIDEO, .name = "Loop Video", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* VBI Capture Control */ static int vivid_vbi_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_vbi_cap); switch (ctrl->id) { case VIVID_CID_VBI_CAP_INTERLACED: dev->vbi_cap_interlaced = ctrl->val; break; } return 0; } static const struct v4l2_ctrl_ops vivid_vbi_cap_ctrl_ops = { .s_ctrl = vivid_vbi_cap_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_vbi_cap_interlaced = { .ops = &vivid_vbi_cap_ctrl_ops, .id = VIVID_CID_VBI_CAP_INTERLACED, .name = "Interlaced VBI Format", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* Video Output Controls */ static int vivid_vid_out_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_vid_out); struct v4l2_bt_timings *bt = &dev->dv_timings_out.bt; switch (ctrl->id) { case VIVID_CID_HAS_CROP_OUT: dev->has_crop_out = ctrl->val; vivid_update_format_out(dev); break; case VIVID_CID_HAS_COMPOSE_OUT: dev->has_compose_out = ctrl->val; vivid_update_format_out(dev); break; case VIVID_CID_HAS_SCALER_OUT: dev->has_scaler_out = ctrl->val; vivid_update_format_out(dev); break; case V4L2_CID_DV_TX_MODE: dev->dvi_d_out = ctrl->val == V4L2_DV_TX_MODE_DVI_D; if (!vivid_is_hdmi_out(dev)) break; if (!dev->dvi_d_out && (bt->flags & V4L2_DV_FL_IS_CE_VIDEO)) { if (bt->width == 720 && bt->height <= 576) dev->colorspace_out = V4L2_COLORSPACE_SMPTE170M; else dev->colorspace_out = V4L2_COLORSPACE_REC709; dev->quantization_out = V4L2_QUANTIZATION_DEFAULT; } else { dev->colorspace_out = V4L2_COLORSPACE_SRGB; dev->quantization_out = dev->dvi_d_out ? V4L2_QUANTIZATION_LIM_RANGE : V4L2_QUANTIZATION_DEFAULT; } if (dev->loop_video) vivid_send_source_change(dev, HDMI); break; } return 0; } static const struct v4l2_ctrl_ops vivid_vid_out_ctrl_ops = { .s_ctrl = vivid_vid_out_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_has_crop_out = { .ops = &vivid_vid_out_ctrl_ops, .id = VIVID_CID_HAS_CROP_OUT, .name = "Enable Output Cropping", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_compose_out = { .ops = &vivid_vid_out_ctrl_ops, .id = VIVID_CID_HAS_COMPOSE_OUT, .name = "Enable Output Composing", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_scaler_out = { .ops = &vivid_vid_out_ctrl_ops, .id = VIVID_CID_HAS_SCALER_OUT, .name = "Enable Output Scaler", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; /* Streaming Controls */ static int vivid_streaming_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_streaming); u64 rem; switch (ctrl->id) { case VIVID_CID_DQBUF_ERROR: dev->dqbuf_error = true; break; case VIVID_CID_PERC_DROPPED: dev->perc_dropped_buffers = ctrl->val; break; case VIVID_CID_QUEUE_SETUP_ERROR: dev->queue_setup_error = true; break; case VIVID_CID_BUF_PREPARE_ERROR: dev->buf_prepare_error = true; break; case VIVID_CID_START_STR_ERROR: dev->start_streaming_error = true; break; case VIVID_CID_QUEUE_ERROR: if (vb2_start_streaming_called(&dev->vb_vid_cap_q)) vb2_queue_error(&dev->vb_vid_cap_q); if (vb2_start_streaming_called(&dev->vb_vbi_cap_q)) vb2_queue_error(&dev->vb_vbi_cap_q); if (vb2_start_streaming_called(&dev->vb_vid_out_q)) vb2_queue_error(&dev->vb_vid_out_q); if (vb2_start_streaming_called(&dev->vb_vbi_out_q)) vb2_queue_error(&dev->vb_vbi_out_q); if (vb2_start_streaming_called(&dev->vb_sdr_cap_q)) vb2_queue_error(&dev->vb_sdr_cap_q); break; case VIVID_CID_SEQ_WRAP: dev->seq_wrap = ctrl->val; break; case VIVID_CID_TIME_WRAP: dev->time_wrap = ctrl->val; if (ctrl->val == 0) { dev->time_wrap_offset = 0; break; } /* * We want to set the time 16 seconds before the 32 bit tv_sec * value of struct timeval would wrap around. So first we * calculate ktime_get_ns() % ((1 << 32) * NSEC_PER_SEC), and * then we set the offset to ((1 << 32) - 16) * NSEC_PER_SEC). */ div64_u64_rem(ktime_get_ns(), 0x100000000ULL * NSEC_PER_SEC, &rem); dev->time_wrap_offset = (0x100000000ULL - 16) * NSEC_PER_SEC - rem; break; } return 0; } static const struct v4l2_ctrl_ops vivid_streaming_ctrl_ops = { .s_ctrl = vivid_streaming_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_dqbuf_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_DQBUF_ERROR, .name = "Inject V4L2_BUF_FLAG_ERROR", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_perc_dropped = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_PERC_DROPPED, .name = "Percentage of Dropped Buffers", .type = V4L2_CTRL_TYPE_INTEGER, .min = 0, .max = 100, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_queue_setup_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_QUEUE_SETUP_ERROR, .name = "Inject VIDIOC_REQBUFS Error", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_buf_prepare_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_BUF_PREPARE_ERROR, .name = "Inject VIDIOC_QBUF Error", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_start_streaming_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_START_STR_ERROR, .name = "Inject VIDIOC_STREAMON Error", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_queue_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_QUEUE_ERROR, .name = "Inject Fatal Streaming Error", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_seq_wrap = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_SEQ_WRAP, .name = "Wrap Sequence Number", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_time_wrap = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_TIME_WRAP, .name = "Wrap Timestamp", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* SDTV Capture Controls */ static int vivid_sdtv_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_sdtv_cap); switch (ctrl->id) { case VIVID_CID_STD_SIGNAL_MODE: dev->std_signal_mode = dev->ctrl_std_signal_mode->val; if (dev->std_signal_mode == SELECTED_STD) dev->query_std = vivid_standard[dev->ctrl_standard->val]; v4l2_ctrl_activate(dev->ctrl_standard, dev->std_signal_mode == SELECTED_STD); vivid_update_quality(dev); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); break; } return 0; } static const struct v4l2_ctrl_ops vivid_sdtv_cap_ctrl_ops = { .s_ctrl = vivid_sdtv_cap_s_ctrl, }; static const char * const vivid_ctrl_std_signal_mode_strings[] = { "Current Standard", "No Signal", "No Lock", "", "Selected Standard", "Cycle Through All Standards", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_std_signal_mode = { .ops = &vivid_sdtv_cap_ctrl_ops, .id = VIVID_CID_STD_SIGNAL_MODE, .name = "Standard Signal Mode", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_std_signal_mode_strings) - 2, .menu_skip_mask = 1 << 3, .qmenu = vivid_ctrl_std_signal_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_standard = { .ops = &vivid_sdtv_cap_ctrl_ops, .id = VIVID_CID_STANDARD, .name = "Standard", .type = V4L2_CTRL_TYPE_MENU, .max = 14, .qmenu = vivid_ctrl_standard_strings, }; /* Radio Receiver Controls */ static int vivid_radio_rx_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_radio_rx); switch (ctrl->id) { case VIVID_CID_RADIO_SEEK_MODE: dev->radio_rx_hw_seek_mode = ctrl->val; break; case VIVID_CID_RADIO_SEEK_PROG_LIM: dev->radio_rx_hw_seek_prog_lim = ctrl->val; break; case VIVID_CID_RADIO_RX_RDS_RBDS: dev->rds_gen.use_rbds = ctrl->val; break; case VIVID_CID_RADIO_RX_RDS_BLOCKIO: dev->radio_rx_rds_controls = ctrl->val; dev->radio_rx_caps &= ~V4L2_CAP_READWRITE; dev->radio_rx_rds_use_alternates = false; if (!dev->radio_rx_rds_controls) { dev->radio_rx_caps |= V4L2_CAP_READWRITE; __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_pty, 0); __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ta, 0); __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_tp, 0); __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ms, 0); __v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_psname, ""); __v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_radiotext, ""); } v4l2_ctrl_activate(dev->radio_rx_rds_pty, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_psname, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_radiotext, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_ta, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_tp, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_ms, dev->radio_rx_rds_controls); dev->radio_rx_dev.device_caps = dev->radio_rx_caps; break; case V4L2_CID_RDS_RECEPTION: dev->radio_rx_rds_enabled = ctrl->val; break; } return 0; } static const struct v4l2_ctrl_ops vivid_radio_rx_ctrl_ops = { .s_ctrl = vivid_radio_rx_s_ctrl, }; static const char * const vivid_ctrl_radio_rds_mode_strings[] = { "Block I/O", "Controls", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_rx_rds_blockio = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_RX_RDS_BLOCKIO, .name = "RDS Rx I/O Mode", .type = V4L2_CTRL_TYPE_MENU, .qmenu = vivid_ctrl_radio_rds_mode_strings, .max = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_rx_rds_rbds = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_RX_RDS_RBDS, .name = "Generate RBDS Instead of RDS", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const char * const vivid_ctrl_radio_hw_seek_mode_strings[] = { "Bounded", "Wrap Around", "Both", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_hw_seek_mode = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_SEEK_MODE, .name = "Radio HW Seek Mode", .type = V4L2_CTRL_TYPE_MENU, .max = 2, .qmenu = vivid_ctrl_radio_hw_seek_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_hw_seek_prog_lim = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_SEEK_PROG_LIM, .name = "Radio Programmable HW Seek", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* Radio Transmitter Controls */ static int vivid_radio_tx_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_radio_tx); switch (ctrl->id) { case VIVID_CID_RADIO_TX_RDS_BLOCKIO: dev->radio_tx_rds_controls = ctrl->val; dev->radio_tx_caps &= ~V4L2_CAP_READWRITE; if (!dev->radio_tx_rds_controls) dev->radio_tx_caps |= V4L2_CAP_READWRITE; dev->radio_tx_dev.device_caps = dev->radio_tx_caps; break; case V4L2_CID_RDS_TX_PTY: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_pty, ctrl->val); break; case V4L2_CID_RDS_TX_PS_NAME: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_psname, ctrl->p_new.p_char); break; case V4L2_CID_RDS_TX_RADIO_TEXT: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_radiotext, ctrl->p_new.p_char); break; case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ta, ctrl->val); break; case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_tp, ctrl->val); break; case V4L2_CID_RDS_TX_MUSIC_SPEECH: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ms, ctrl->val); break; } return 0; } static const struct v4l2_ctrl_ops vivid_radio_tx_ctrl_ops = { .s_ctrl = vivid_radio_tx_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_tx_rds_blockio = { .ops = &vivid_radio_tx_ctrl_ops, .id = VIVID_CID_RADIO_TX_RDS_BLOCKIO, .name = "RDS Tx I/O Mode", .type = V4L2_CTRL_TYPE_MENU, .qmenu = vivid_ctrl_radio_rds_mode_strings, .max = 1, .def = 1, }; /* SDR Capture Controls */ static int vivid_sdr_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_sdr_cap); switch (ctrl->id) { case VIVID_CID_SDR_CAP_FM_DEVIATION: dev->sdr_fm_deviation = ctrl->val; break; } return 0; } static const struct v4l2_ctrl_ops vivid_sdr_cap_ctrl_ops = { .s_ctrl = vivid_sdr_cap_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_sdr_cap_fm_deviation = { .ops = &vivid_sdr_cap_ctrl_ops, .id = VIVID_CID_SDR_CAP_FM_DEVIATION, .name = "FM Deviation", .type = V4L2_CTRL_TYPE_INTEGER, .min = 100, .max = 200000, .def = 75000, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_class = { .ops = &vivid_user_gen_ctrl_ops, .flags = V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY, .id = VIVID_CID_VIVID_CLASS, .name = "Vivid Controls", .type = V4L2_CTRL_TYPE_CTRL_CLASS, }; int vivid_create_controls(struct vivid_dev *dev, bool show_ccs_cap, bool show_ccs_out, bool no_error_inj, bool has_sdtv, bool has_hdmi) { struct v4l2_ctrl_handler *hdl_user_gen = &dev->ctrl_hdl_user_gen; struct v4l2_ctrl_handler *hdl_user_vid = &dev->ctrl_hdl_user_vid; struct v4l2_ctrl_handler *hdl_user_aud = &dev->ctrl_hdl_user_aud; struct v4l2_ctrl_handler *hdl_streaming = &dev->ctrl_hdl_streaming; struct v4l2_ctrl_handler *hdl_sdtv_cap = &dev->ctrl_hdl_sdtv_cap; struct v4l2_ctrl_handler *hdl_loop_cap = &dev->ctrl_hdl_loop_cap; struct v4l2_ctrl_handler *hdl_fb = &dev->ctrl_hdl_fb; struct v4l2_ctrl_handler *hdl_vid_cap = &dev->ctrl_hdl_vid_cap; struct v4l2_ctrl_handler *hdl_vid_out = &dev->ctrl_hdl_vid_out; struct v4l2_ctrl_handler *hdl_vbi_cap = &dev->ctrl_hdl_vbi_cap; struct v4l2_ctrl_handler *hdl_vbi_out = &dev->ctrl_hdl_vbi_out; struct v4l2_ctrl_handler *hdl_radio_rx = &dev->ctrl_hdl_radio_rx; struct v4l2_ctrl_handler *hdl_radio_tx = &dev->ctrl_hdl_radio_tx; struct v4l2_ctrl_handler *hdl_sdr_cap = &dev->ctrl_hdl_sdr_cap; struct v4l2_ctrl_config vivid_ctrl_dv_timings = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_DV_TIMINGS, .name = "DV Timings", .type = V4L2_CTRL_TYPE_MENU, }; int i; v4l2_ctrl_handler_init(hdl_user_gen, 10); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_user_vid, 9); v4l2_ctrl_new_custom(hdl_user_vid, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_user_aud, 2); v4l2_ctrl_new_custom(hdl_user_aud, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_streaming, 8); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_sdtv_cap, 2); v4l2_ctrl_new_custom(hdl_sdtv_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_loop_cap, 1); v4l2_ctrl_new_custom(hdl_loop_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_fb, 1); v4l2_ctrl_new_custom(hdl_fb, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vid_cap, 55); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vid_out, 26); if (!no_error_inj || dev->has_fb) v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vbi_cap, 21); v4l2_ctrl_new_custom(hdl_vbi_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vbi_out, 19); if (!no_error_inj) v4l2_ctrl_new_custom(hdl_vbi_out, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_radio_rx, 17); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_radio_tx, 17); v4l2_ctrl_new_custom(hdl_radio_tx, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_sdr_cap, 19); v4l2_ctrl_new_custom(hdl_sdr_cap, &vivid_ctrl_class, NULL); /* User Controls */ dev->volume = v4l2_ctrl_new_std(hdl_user_aud, NULL, V4L2_CID_AUDIO_VOLUME, 0, 255, 1, 200); dev->mute = v4l2_ctrl_new_std(hdl_user_aud, NULL, V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0); if (dev->has_vid_cap) { dev->brightness = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); for (i = 0; i < MAX_INPUTS; i++) dev->input_brightness[i] = 128; dev->contrast = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 128); dev->saturation = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_SATURATION, 0, 255, 1, 128); dev->hue = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_HUE, -128, 128, 1, 0); v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); dev->autogain = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); dev->gain = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_GAIN, 0, 255, 1, 100); dev->alpha = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_ALPHA_COMPONENT, 0, 255, 1, 0); } dev->button = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_button, NULL); dev->int32 = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_int32, NULL); dev->int64 = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_int64, NULL); dev->boolean = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_boolean, NULL); dev->menu = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_menu, NULL); dev->string = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_string, NULL); dev->bitmask = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_bitmask, NULL); dev->int_menu = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_int_menu, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u32_array, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u16_matrix, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u8_4d_array, NULL); if (dev->has_vid_cap) { /* Image Processing Controls */ struct v4l2_ctrl_config vivid_ctrl_test_pattern = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_TEST_PATTERN, .name = "Test Pattern", .type = V4L2_CTRL_TYPE_MENU, .max = TPG_PAT_NOISE, .qmenu = tpg_pattern_strings, }; dev->test_pattern = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_test_pattern, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_perc_fill, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_hor_movement, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_vert_movement, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_osd_mode, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_show_border, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_show_square, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_hflip, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_vflip, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_insert_sav, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_insert_eav, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_reduced_fps, NULL); if (show_ccs_cap) { dev->ctrl_has_crop_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_has_crop_cap, NULL); dev->ctrl_has_compose_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_has_compose_cap, NULL); dev->ctrl_has_scaler_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_has_scaler_cap, NULL); } v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_tstamp_src, NULL); dev->colorspace = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_colorspace, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_xfer_func, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_ycbcr_enc, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_hsv_enc, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_quantization, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_alpha_mode, NULL); } if (dev->has_vid_out && show_ccs_out) { dev->ctrl_has_crop_out = v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_has_crop_out, NULL); dev->ctrl_has_compose_out = v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_has_compose_out, NULL); dev->ctrl_has_scaler_out = v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_has_scaler_out, NULL); } /* * Testing this driver with v4l2-compliance will trigger the error * injection controls, and after that nothing will work as expected. * So we have a module option to drop these error injecting controls * allowing us to run v4l2_compliance again. */ if (!no_error_inj) { v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_disconnect, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_dqbuf_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_perc_dropped, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_queue_setup_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_buf_prepare_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_start_streaming_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_queue_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_seq_wrap, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_time_wrap, NULL); } if (has_sdtv && (dev->has_vid_cap || dev->has_vbi_cap)) { if (dev->has_vid_cap) v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_std_aspect_ratio, NULL); dev->ctrl_std_signal_mode = v4l2_ctrl_new_custom(hdl_sdtv_cap, &vivid_ctrl_std_signal_mode, NULL); dev->ctrl_standard = v4l2_ctrl_new_custom(hdl_sdtv_cap, &vivid_ctrl_standard, NULL); if (dev->ctrl_std_signal_mode) v4l2_ctrl_cluster(2, &dev->ctrl_std_signal_mode); if (dev->has_raw_vbi_cap) v4l2_ctrl_new_custom(hdl_vbi_cap, &vivid_ctrl_vbi_cap_interlaced, NULL); } if (has_hdmi && dev->has_vid_cap) { dev->ctrl_dv_timings_signal_mode = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_dv_timings_signal_mode, NULL); vivid_ctrl_dv_timings.max = dev->query_dv_timings_size - 1; vivid_ctrl_dv_timings.qmenu = (const char * const *)dev->query_dv_timings_qmenu; dev->ctrl_dv_timings = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_dv_timings, NULL); if (dev->ctrl_dv_timings_signal_mode) v4l2_ctrl_cluster(2, &dev->ctrl_dv_timings_signal_mode); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_dv_timings_aspect_ratio, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_max_edid_blocks, NULL); dev->real_rgb_range_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_limited_rgb_range, NULL); dev->rgb_range_cap = v4l2_ctrl_new_std_menu(hdl_vid_cap, &vivid_vid_cap_ctrl_ops, V4L2_CID_DV_RX_RGB_RANGE, V4L2_DV_RGB_RANGE_FULL, 0, V4L2_DV_RGB_RANGE_AUTO); } if (has_hdmi && dev->has_vid_out) { /* * We aren't doing anything with this at the moment, but * HDMI outputs typically have this controls. */ dev->ctrl_tx_rgb_range = v4l2_ctrl_new_std_menu(hdl_vid_out, NULL, V4L2_CID_DV_TX_RGB_RANGE, V4L2_DV_RGB_RANGE_FULL, 0, V4L2_DV_RGB_RANGE_AUTO); dev->ctrl_tx_mode = v4l2_ctrl_new_std_menu(hdl_vid_out, NULL, V4L2_CID_DV_TX_MODE, V4L2_DV_TX_MODE_HDMI, 0, V4L2_DV_TX_MODE_HDMI); } if ((dev->has_vid_cap && dev->has_vid_out) || (dev->has_vbi_cap && dev->has_vbi_out)) v4l2_ctrl_new_custom(hdl_loop_cap, &vivid_ctrl_loop_video, NULL); if (dev->has_fb) v4l2_ctrl_new_custom(hdl_fb, &vivid_ctrl_clear_fb, NULL); if (dev->has_radio_rx) { v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_hw_seek_mode, NULL); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_hw_seek_prog_lim, NULL); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_rx_rds_blockio, NULL); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_rx_rds_rbds, NULL); v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RECEPTION, 0, 1, 1, 1); dev->radio_rx_rds_pty = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_PTY, 0, 31, 1, 0); dev->radio_rx_rds_psname = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_PS_NAME, 0, 8, 8, 0); dev->radio_rx_rds_radiotext = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_RADIO_TEXT, 0, 64, 64, 0); dev->radio_rx_rds_ta = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT, 0, 1, 1, 0); dev->radio_rx_rds_tp = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_TRAFFIC_PROGRAM, 0, 1, 1, 0); dev->radio_rx_rds_ms = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_MUSIC_SPEECH, 0, 1, 1, 1); } if (dev->has_radio_tx) { v4l2_ctrl_new_custom(hdl_radio_tx, &vivid_ctrl_radio_tx_rds_blockio, NULL); dev->radio_tx_rds_pi = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_PI, 0, 0xffff, 1, 0x8088); dev->radio_tx_rds_pty = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_PTY, 0, 31, 1, 3); dev->radio_tx_rds_psname = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_PS_NAME, 0, 8, 8, 0); if (dev->radio_tx_rds_psname) v4l2_ctrl_s_ctrl_string(dev->radio_tx_rds_psname, "VIVID-TX"); dev->radio_tx_rds_radiotext = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_RADIO_TEXT, 0, 64 * 2, 64, 0); if (dev->radio_tx_rds_radiotext) v4l2_ctrl_s_ctrl_string(dev->radio_tx_rds_radiotext, "This is a VIVID default Radio Text template text, change at will"); dev->radio_tx_rds_mono_stereo = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_MONO_STEREO, 0, 1, 1, 1); dev->radio_tx_rds_art_head = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_ARTIFICIAL_HEAD, 0, 1, 1, 0); dev->radio_tx_rds_compressed = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_COMPRESSED, 0, 1, 1, 0); dev->radio_tx_rds_dyn_pty = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_DYNAMIC_PTY, 0, 1, 1, 0); dev->radio_tx_rds_ta = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT, 0, 1, 1, 0); dev->radio_tx_rds_tp = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_TRAFFIC_PROGRAM, 0, 1, 1, 1); dev->radio_tx_rds_ms = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_MUSIC_SPEECH, 0, 1, 1, 1); } if (dev->has_sdr_cap) { v4l2_ctrl_new_custom(hdl_sdr_cap, &vivid_ctrl_sdr_cap_fm_deviation, NULL); } if (hdl_user_gen->error) return hdl_user_gen->error; if (hdl_user_vid->error) return hdl_user_vid->error; if (hdl_user_aud->error) return hdl_user_aud->error; if (hdl_streaming->error) return hdl_streaming->error; if (hdl_sdr_cap->error) return hdl_sdr_cap->error; if (hdl_loop_cap->error) return hdl_loop_cap->error; if (dev->autogain) v4l2_ctrl_auto_cluster(2, &dev->autogain, 0, true); if (dev->has_vid_cap) { v4l2_ctrl_add_handler(hdl_vid_cap, hdl_user_gen, NULL); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_user_vid, NULL); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_user_aud, NULL); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_streaming, NULL); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_sdtv_cap, NULL); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_loop_cap, NULL); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_fb, NULL); if (hdl_vid_cap->error) return hdl_vid_cap->error; dev->vid_cap_dev.ctrl_handler = hdl_vid_cap; } if (dev->has_vid_out) { v4l2_ctrl_add_handler(hdl_vid_out, hdl_user_gen, NULL); v4l2_ctrl_add_handler(hdl_vid_out, hdl_user_aud, NULL); v4l2_ctrl_add_handler(hdl_vid_out, hdl_streaming, NULL); v4l2_ctrl_add_handler(hdl_vid_out, hdl_fb, NULL); if (hdl_vid_out->error) return hdl_vid_out->error; dev->vid_out_dev.ctrl_handler = hdl_vid_out; } if (dev->has_vbi_cap) { v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_user_gen, NULL); v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_streaming, NULL); v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_sdtv_cap, NULL); v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_loop_cap, NULL); if (hdl_vbi_cap->error) return hdl_vbi_cap->error; dev->vbi_cap_dev.ctrl_handler = hdl_vbi_cap; } if (dev->has_vbi_out) { v4l2_ctrl_add_handler(hdl_vbi_out, hdl_user_gen, NULL); v4l2_ctrl_add_handler(hdl_vbi_out, hdl_streaming, NULL); if (hdl_vbi_out->error) return hdl_vbi_out->error; dev->vbi_out_dev.ctrl_handler = hdl_vbi_out; } if (dev->has_radio_rx) { v4l2_ctrl_add_handler(hdl_radio_rx, hdl_user_gen, NULL); v4l2_ctrl_add_handler(hdl_radio_rx, hdl_user_aud, NULL); if (hdl_radio_rx->error) return hdl_radio_rx->error; dev->radio_rx_dev.ctrl_handler = hdl_radio_rx; } if (dev->has_radio_tx) { v4l2_ctrl_add_handler(hdl_radio_tx, hdl_user_gen, NULL); v4l2_ctrl_add_handler(hdl_radio_tx, hdl_user_aud, NULL); if (hdl_radio_tx->error) return hdl_radio_tx->error; dev->radio_tx_dev.ctrl_handler = hdl_radio_tx; } if (dev->has_sdr_cap) { v4l2_ctrl_add_handler(hdl_sdr_cap, hdl_user_gen, NULL); v4l2_ctrl_add_handler(hdl_sdr_cap, hdl_streaming, NULL); if (hdl_sdr_cap->error) return hdl_sdr_cap->error; dev->sdr_cap_dev.ctrl_handler = hdl_sdr_cap; } return 0; } void vivid_free_controls(struct vivid_dev *dev) { v4l2_ctrl_handler_free(&dev->ctrl_hdl_vid_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_vid_out); v4l2_ctrl_handler_free(&dev->ctrl_hdl_vbi_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_vbi_out); v4l2_ctrl_handler_free(&dev->ctrl_hdl_radio_rx); v4l2_ctrl_handler_free(&dev->ctrl_hdl_radio_tx); v4l2_ctrl_handler_free(&dev->ctrl_hdl_sdr_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_user_gen); v4l2_ctrl_handler_free(&dev->ctrl_hdl_user_vid); v4l2_ctrl_handler_free(&dev->ctrl_hdl_user_aud); v4l2_ctrl_handler_free(&dev->ctrl_hdl_streaming); v4l2_ctrl_handler_free(&dev->ctrl_hdl_sdtv_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_loop_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_fb); }
28 28 28 15 5 5 7 8 8 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 /* * videobuf2-memops.c - generic memory handling routines for videobuf2 * * Copyright (C) 2010 Samsung Electronics * * Author: Pawel Osciak <pawel@osciak.com> * Marek Szyprowski <m.szyprowski@samsung.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. */ #include <linux/slab.h> #include <linux/module.h> #include <linux/dma-mapping.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/file.h> #include <media/videobuf2-v4l2.h> #include <media/videobuf2-memops.h> /** * vb2_create_framevec() - map virtual addresses to pfns * @start: Virtual user address where we start mapping * @length: Length of a range to map * @write: Should we map for writing into the area * * This function allocates and fills in a vector with pfns corresponding to * virtual address range passed in arguments. If pfns have corresponding pages, * page references are also grabbed to pin pages in memory. The function * returns pointer to the vector on success and error pointer in case of * failure. Returned vector needs to be freed via vb2_destroy_pfnvec(). */ struct frame_vector *vb2_create_framevec(unsigned long start, unsigned long length, bool write) { int ret; unsigned long first, last; unsigned long nr; struct frame_vector *vec; unsigned int flags = FOLL_FORCE; if (write) flags |= FOLL_WRITE; first = start >> PAGE_SHIFT; last = (start + length - 1) >> PAGE_SHIFT; nr = last - first + 1; vec = frame_vector_create(nr); if (!vec) return ERR_PTR(-ENOMEM); ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec); if (ret < 0) goto out_destroy; /* We accept only complete set of PFNs */ if (ret != nr) { ret = -EFAULT; goto out_release; } return vec; out_release: put_vaddr_frames(vec); out_destroy: frame_vector_destroy(vec); return ERR_PTR(ret); } EXPORT_SYMBOL(vb2_create_framevec); /** * vb2_destroy_framevec() - release vector of mapped pfns * @vec: vector of pfns / pages to release * * This releases references to all pages in the vector @vec (if corresponding * pfns are backed by pages) and frees the passed vector. */ void vb2_destroy_framevec(struct frame_vector *vec) { put_vaddr_frames(vec); frame_vector_destroy(vec); } EXPORT_SYMBOL(vb2_destroy_framevec); /** * vb2_common_vm_open() - increase refcount of the vma * @vma: virtual memory region for the mapping * * This function adds another user to the provided vma. It expects * struct vb2_vmarea_handler pointer in vma->vm_private_data. */ static void vb2_common_vm_open(struct vm_area_struct *vma) { struct vb2_vmarea_handler *h = vma->vm_private_data; pr_debug("%s: %p, refcount: %d, vma: %08lx-%08lx\n", __func__, h, refcount_read(h->refcount), vma->vm_start, vma->vm_end); refcount_inc(h->refcount); } /** * vb2_common_vm_close() - decrease refcount of the vma * @vma: virtual memory region for the mapping * * This function releases the user from the provided vma. It expects * struct vb2_vmarea_handler pointer in vma->vm_private_data. */ static void vb2_common_vm_close(struct vm_area_struct *vma) { struct vb2_vmarea_handler *h = vma->vm_private_data; pr_debug("%s: %p, refcount: %d, vma: %08lx-%08lx\n", __func__, h, refcount_read(h->refcount), vma->vm_start, vma->vm_end); h->put(h->arg); } /* * vb2_common_vm_ops - common vm_ops used for tracking refcount of mmaped * video buffers */ const struct vm_operations_struct vb2_common_vm_ops = { .open = vb2_common_vm_open, .close = vb2_common_vm_close, }; EXPORT_SYMBOL_GPL(vb2_common_vm_ops); MODULE_DESCRIPTION("common memory handling routines for videobuf2"); MODULE_AUTHOR("Pawel Osciak <pawel@osciak.com>"); MODULE_LICENSE("GPL");
37 8 8 1 234 234 46 227 227 191 12 194 38 38 38 38 194 2 223 223 217 7 7 7 222 209 209 210 227 215 211 2 2 210 210 1 1 209 207 209 5 159 12 160 366 367 368 340 368 143 15 236 156 9 236 2 23 227 225 20 227 227 227 216 69 209 209 209 209 20 5 3 3 3 3 1 1 2 3 1 3 3 5 4 3 1 4 4 4 4 4 3 3 3 3 3 3 107 105 45 45 91 91 91 106 16 212 212 14 198 4 4 1 99 75 4 6 7 6 6 212 14 15 343 343 343 335 325 35 4 27 1 35 275 275 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 /* * Internet Control Message Protocol (ICMPv6) * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on net/ipv4/icmp.c * * RFC 1885 * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ /* * Changes: * * Andi Kleen : exception handling * Andi Kleen add rate limits. never reply to a icmp. * add more length checks and other fixes. * yoshfuji : ensure to sent parameter problem for * fragments. * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. * Randy Dunlap and * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/netfilter.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> #include <net/ip.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> #include <net/ping.h> #include <net/protocol.h> #include <net/raw.h> #include <net/rawv6.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/inet_common.h> #include <net/dsfield.h> #include <net/l3mdev.h> #include <linux/uaccess.h> /* * The ICMP socket(s). This is the most convenient way to flow control * our ICMP output as well as maintain a clean interface throughout * all layers. All Socketless IP sends will soon be gone. * * On SMP we have one ICMP socket per-cpu. */ static inline struct sock *icmpv6_sk(struct net *net) { return net->ipv6.icmp_sk[smp_processor_id()]; } static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) ping_err(skb, offset, ntohl(info)); } static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; /* Called with BH disabled */ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) { struct sock *sk; sk = icmpv6_sk(net); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. */ return NULL; } return sk; } static __inline__ void icmpv6_xmit_unlock(struct sock *sk) { spin_unlock(&sk->sk_lock.slock); } /* * Figure out, may we reply to this packet with icmp error. * * We do not reply, if: * - it was icmp error message. * - it is truncated, so that it is known, that protocol is ICMPV6 * (i.e. in the middle of some exthdr) * * --ANK (980726) */ static bool is_ineligible(const struct sk_buff *skb) { int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; int len = skb->len - ptr; __u8 nexthdr = ipv6_hdr(skb)->nexthdr; __be16 frag_off; if (len < 0) return true; ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); if (ptr < 0) return false; if (nexthdr == IPPROTO_ICMPV6) { u8 _type, *tp; tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; } static bool icmpv6_mask_allow(int type) { /* Informational messages are not limited. */ if (type & ICMPV6_INFOMSG_MASK) return true; /* Do not limit pmtu discovery, it would break it. */ if (type == ICMPV6_PKT_TOOBIG) return true; return false; } static bool icmpv6_global_allow(int type) { if (icmpv6_mask_allow(type)) return true; if (icmp_global_allow()) return true; return false; } /* * Check the ICMP output rate limit */ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi6 *fl6) { struct net *net = sock_net(sk); struct dst_entry *dst; bool res = false; if (icmpv6_mask_allow(type)) return true; /* * Look up the output route. * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(net, sk, fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = true; } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1); res = inet_peer_xrlim_allow(peer, tmo); if (peer) inet_putpeer(peer); } dst_release(dst); return res; } /* * an inline helper for the "simple" if statement below * checks if parameter problem report is caused by an * unrecognized IPv6 option that has the Option Type * highest-order two bits set to 10 */ static bool opt_unrec(struct sk_buff *skb, __u32 offset) { u8 _optval, *op; offset += skb_network_offset(skb); op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); if (!op) return true; return (*op & 0xC0) == 0x80; } void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; skb = skb_peek(&sk->sk_write_queue); if (!skb) return; icmp6h = icmp6_hdr(skb); memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, skb->csum); } else { __wsum tmp_csum = 0; skb_queue_walk(&sk->sk_write_queue, skb) { tmp_csum = csum_add(tmp_csum, skb->csum); } tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, tmp_csum); } ip6_push_pending_frames(sk); } struct icmpv6_msg { struct sk_buff *skb; int offset; uint8_t type; }; static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct icmpv6_msg *msg = (struct icmpv6_msg *) from; struct sk_buff *org_skb = msg->skb; __wsum csum = 0; csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, to, len, csum); skb->csum = csum_block_add(skb->csum, csum, odd); if (!(msg->type & ICMPV6_INFOMSG_MASK)) nf_ct_attach(skb, org_skb); return 0; } #if IS_ENABLED(CONFIG_IPV6_MIP6) static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) { struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6_destopt_hao *hao; struct in6_addr tmp; int off; if (opt->dsthao) { off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); if (likely(off >= 0)) { hao = (struct ipv6_destopt_hao *) (skb_network_header(skb) + off); tmp = iph->saddr; iph->saddr = hao->addr; hao->addr = tmp; } } } #else static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} #endif static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; int err; err = ip6_dst_lookup(net, sk, &dst, fl6); if (err) return ERR_PTR(err); /* * We won't send icmp if the destination is known * anycast. */ if (ipv6_anycast_destination(dst, &fl6->daddr)) { net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } /* No need to clone since we're just using its address. */ dst2 = dst; dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); if (!IS_ERR(dst)) { if (dst != dst2) return dst; } else { if (PTR_ERR(dst) == -EPERM) dst = NULL; else return dst; } err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; err = ip6_dst_lookup(net, sk, &dst2, &fl2); if (err) goto relookup_failed; dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); if (!IS_ERR(dst2)) { dst_release(dst); dst = dst2; } else { err = PTR_ERR(dst2); if (err == -EPERM) { dst_release(dst); return dst2; } else goto relookup_failed; } relookup_failed: if (dst) return dst; return ERR_PTR(err); } static int icmp6_iif(const struct sk_buff *skb) { int iif = skb->dev->ifindex; /* for local traffic to local address, skb dev is the loopback * device. Check if there is a dst attached to the skb and if so * get the real device index. Same is needed for replies to a link * local address on a device enslaved to an L3 master device */ if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); if (rt6) iif = rt6->rt6i_idev->dev->ifindex; } return iif; } /* * Send an ICMP message in response to a packet in error */ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm) { struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct dst_entry *dst; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; struct ipcm6_cookie ipc6; int iif = 0; int addr_type = 0; int len; u32 mark; if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; if (!skb->dev) return; net = dev_net(skb->dev); mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules * i.e. RFC 1885 2.4(e) * Rule (e.1) is enforced by not using icmp6_send * in any code that processes icmp errors. */ addr_type = ipv6_addr_type(&hdr->daddr); if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) saddr = &hdr->daddr; /* * Dest addr check */ if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { if (type != ICMPV6_PKT_TOOBIG && !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && (opt_unrec(skb, info)))) return; saddr = NULL; } addr_type = ipv6_addr_type(&hdr->saddr); /* * Source addr check */ if (__ipv6_addr_needs_scope_id(addr_type)) { iif = icmp6_iif(skb); } else { dst = skb_dst(skb); iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev); } /* * Must not send error if the source does not uniquely * identify a single node (RFC2463 Section 2.4). * We check unspecified / multicast addresses here, * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); return; } /* * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); return; } /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type)) goto out_bh_enable; mip6_addr_swap(skb, parm); memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = hdr->saddr; if (force_saddr) saddr = force_saddr; if (saddr) fl6.saddr = *saddr; fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) goto out; tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_code = code; tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; ipcm6_init_sk(&ipc6, np); fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); msg.skb = skb; msg.offset = skb_network_offset(skb); msg.type = type; len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); goto out_dst_release; } rcu_read_lock(); idev = __in6_dev_get(skb->dev); if (ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); } rcu_read_unlock(); out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); } EXPORT_SYMBOL(icmp6_send); /* Slightly more convenient version of icmp6_send. */ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); kfree_skb(skb); } /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH * if sufficient data bytes are available * @nhs is the size of the tunnel header(s) : * Either an IPv4 header for SIT encap * an IPv4 header + GRE header for GRE encap */ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len) { struct in6_addr temp_saddr; struct rt6_info *rt; struct sk_buff *skb2; u32 info = 0; if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) return 1; /* RFC 4884 (partial) support for ICMP extensions */ if (data_len < 128 || (data_len & 7) || skb->len < data_len) data_len = 0; skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); if (!skb2) return 1; skb_dst_drop(skb2); skb_pull(skb2, nhs); skb_reset_network_header(skb2); rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); if (data_len) { /* RFC 4884 (partial) support : * insert 0 padding at the end, before the extensions */ __skb_push(skb2, nhs); skb_reset_network_header(skb2); memmove(skb2->data, skb2->data + nhs, data_len - nhs); memset(skb2->data + data_len - nhs, 0, nhs); /* RFC 4884 4.5 : Length is measured in 64-bit words, * and stored in reserved[0] */ info = (data_len/8) << 24; } if (type == ICMP_TIME_EXCEEDED) icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, info, &temp_saddr, IP6CB(skb2)); else icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, info, &temp_saddr, IP6CB(skb2)); if (rt) ip6_rt_put(rt); kfree_skb(skb2); return 0; } EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); static void icmpv6_echo_reply(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; struct dst_entry *dst; struct ipcm6_cookie ipc6; u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; if (!ipv6_unicast_destination(skb) && !(net->ipv6.sysctl.anycast_src_echo_reply && ipv6_anycast_destination(skb_dst(skb), saddr))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; fl6.flowi6_oif = icmp6_iif(skb); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); local_bh_disable(); sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; if (ip6_dst_lookup(net, sk, &dst, &fl6)) goto out; dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) goto out; idev = __in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; msg.type = ICMPV6_ECHO_REPLY; ipcm6_init_sk(&ipc6, np); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); } dst_release(dst); out: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); } void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; __be16 frag_off; u8 nexthdr; struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; if (ipv6_ext_hdr(nexthdr)) { /* now skip over extension headers */ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (inner_offset < 0) goto out; } else { inner_offset = sizeof(struct ipv6hdr); } /* Checkin header including 8 bytes of inner protocol header. */ if (!pskb_may_pull(skb, inner_offset+8)) goto out; /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed pmtu discovery. Corresponding argument (opt) to notifiers is already added. --ANK (980726) */ ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); return; out: __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); } /* * Handle icmp messages */ static int icmpv6_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; bool success = false; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop_no_count; if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*hdr)); if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) goto drop_no_count; skb_set_network_header(skb, nh); } __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } if (!pskb_pull(skb, sizeof(*hdr))) goto discard_it; hdr = icmp6_hdr(skb); type = hdr->icmp6_type; ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) icmpv6_echo_reply(skb); break; case ICMPV6_ECHO_REPLY: success = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: /* BUGGG_FUTURE: if packet contains rthdr, we cannot update standard destination cache. Seems, only "advanced" destination cache will allow to solve this problem --ANK (980726) */ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); /* to notify */ /* fall through */ case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); break; case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: ndisc_rcv(skb); break; case ICMPV6_MGM_QUERY: igmp6_event_query(skb); break; case ICMPV6_MGM_REPORT: igmp6_event_report(skb); break; case ICMPV6_MGM_REDUCTION: case ICMPV6_NI_QUERY: case ICMPV6_NI_REPLY: case ICMPV6_MLD2_REPORT: case ICMPV6_DHAAD_REQUEST: case ICMPV6_DHAAD_REPLY: case ICMPV6_MOBILE_PREFIX_SOL: case ICMPV6_MOBILE_PREFIX_ADV: break; default: /* informational */ if (type & ICMPV6_INFOMSG_MASK) break; net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", saddr, daddr); /* * error of unknown type. * must pass to upper level */ icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); } /* until the v6 path can be better sorted assume failure and * preserve the status quo behaviour for the rest of the paths to here */ if (success) consume_skb(skb); else kfree_skb(skb); return 0; csum_error: __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb(skb); return 0; } void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif) { memset(fl6, 0, sizeof(*fl6)); fl6->saddr = *saddr; fl6->daddr = *daddr; fl6->flowi6_proto = IPPROTO_ICMPV6; fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } static int __net_init icmpv6_sk_init(struct net *net) { struct sock *sk; int err, i, j; net->ipv6.icmp_sk = kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL); if (!net->ipv6.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", err); goto fail; } net->ipv6.icmp_sk[i] = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } return 0; fail: for (j = 0; j < i; j++) inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]); kfree(net->ipv6.icmp_sk); return err; } static void __net_exit icmpv6_sk_exit(struct net *net) { int i; for_each_possible_cpu(i) { inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]); } kfree(net->ipv6.icmp_sk); } static struct pernet_operations icmpv6_sk_ops = { .init = icmpv6_sk_init, .exit = icmpv6_sk_exit, }; int __init icmpv6_init(void) { int err; err = register_pernet_subsys(&icmpv6_sk_ops); if (err < 0) return err; err = -EAGAIN; if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) goto fail; err = inet6_register_icmp_sender(icmp6_send); if (err) goto sender_reg_err; return 0; sender_reg_err: inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); unregister_pernet_subsys(&icmpv6_sk_ops); return err; } void icmpv6_cleanup(void) { inet6_unregister_icmp_sender(icmp6_send); unregister_pernet_subsys(&icmpv6_sk_ops); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } static const struct icmp6_err { int err; int fatal; } tab_unreach[] = { { /* NOROUTE */ .err = ENETUNREACH, .fatal = 0, }, { /* ADM_PROHIBITED */ .err = EACCES, .fatal = 1, }, { /* Was NOT_NEIGHBOUR, now reserved */ .err = EHOSTUNREACH, .fatal = 0, }, { /* ADDR_UNREACH */ .err = EHOSTUNREACH, .fatal = 0, }, { /* PORT_UNREACH */ .err = ECONNREFUSED, .fatal = 1, }, { /* POLICY_FAIL */ .err = EACCES, .fatal = 1, }, { /* REJECT_ROUTE */ .err = EACCES, .fatal = 1, }, }; int icmpv6_err_convert(u8 type, u8 code, int *err) { int fatal = 0; *err = EPROTO; switch (type) { case ICMPV6_DEST_UNREACH: fatal = 1; if (code < ARRAY_SIZE(tab_unreach)) { *err = tab_unreach[code].err; fatal = tab_unreach[code].fatal; } break; case ICMPV6_PKT_TOOBIG: *err = EMSGSIZE; break; case ICMPV6_PARAMPROB: *err = EPROTO; fatal = 1; break; case ICMPV6_TIME_EXCEED: *err = EHOSTUNREACH; break; } return fatal; } EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, { .procname = "echo_ignore_all", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { }, }; struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) { struct ctl_table *table; table = kmemdup(ipv6_icmp_table_template, sizeof(ipv6_icmp_table_template), GFP_KERNEL); if (table) { table[0].data = &net->ipv6.sysctl.icmpv6_time; table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; } return table; } #endif
5 5 22 22 20 95 95 64 88 26 60 68 30 62 62 2 61 36 24 57 1 56 56 2 56 5 3 1 4 3 3 3 3 3 4 4 2 2 2 1 2 5 4 3 2 2 6 5 4 3 3 3 3 3 6 1 5 5 1 1 30 5 5 5 4 4 4 27 26 24 13 27 4 8 2 4 4 4 5 11 2 15 12 15 1 6 6 1 1 2 12 10 2 8 10 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016,2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. */ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/filter.h> #include <linux/perf_event.h> #include <uapi/linux/btf.h> #include "map_in_map.h" #define ARRAY_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) static void bpf_array_free_percpu(struct bpf_array *array) { int i; for (i = 0; i < array->map.max_entries; i++) { free_percpu(array->pptrs[i]); cond_resched(); } } static int bpf_array_alloc_percpu(struct bpf_array *array) { void __percpu *ptr; int i; for (i = 0; i < array->map.max_entries; i++) { ptr = __alloc_percpu_gfp(array->elem_size, 8, GFP_USER | __GFP_NOWARN); if (!ptr) { bpf_array_free_percpu(array); return -ENOMEM; } array->pptrs[i] = ptr; cond_resched(); } return 0; } /* Called from syscall */ int array_map_alloc_check(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size == 0 || attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || (percpu && numa_node != NUMA_NO_NODE)) return -EINVAL; if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. */ return -E2BIG; return 0; } static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int ret, numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); u64 cost, array_size, mask64; struct bpf_array *array; elem_size = round_up(attr->value_size, 8); max_entries = attr->max_entries; /* On 32 bit archs roundup_pow_of_two() with max_entries that has * upper most bit set in u32 space is undefined behavior due to * resulting 1U << 32, so do it manually here in u64 space. */ mask64 = fls_long(max_entries - 1); mask64 = 1ULL << mask64; mask64 -= 1; index_mask = mask64; if (unpriv) { /* round up array size to nearest power of 2, * since cpu will speculate within index_mask limits */ max_entries = index_mask + 1; /* Check for overflows. */ if (max_entries < attr->max_entries) return ERR_PTR(-E2BIG); } array_size = sizeof(*array); if (percpu) array_size += (u64) max_entries * sizeof(void *); else array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ cost = array_size; if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); if (percpu) { cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); } cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; ret = bpf_map_precharge_memlock(cost); if (ret < 0) return ERR_PTR(ret); /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(array_size, numa_node); if (!array) return ERR_PTR(-ENOMEM); array->index_mask = index_mask; array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); array->map.pages = cost; array->elem_size = elem_size; if (percpu && bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } return &array->map; } /* Called from syscall or from eBPF program */ static void *array_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (unlikely(index >= array->map.max_entries)) return NULL; return array->value + array->elem_size * (index & array->index_mask); } /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; u32 elem_size = round_up(map->value_size, 8); const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (map->unpriv_array) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); } if (is_power_of_2(elem_size)) { *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); } else { *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); } *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); return insn - insn_buf; } /* Called from eBPF program */ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (unlikely(index >= array->map.max_entries)) return NULL; return this_cpu_ptr(array->pptrs[index & array->index_mask]); } int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu, off = 0; u32 size; if (unlikely(index >= array->map.max_entries)) return -ENOENT; /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas * will not leak any kernel data */ size = round_up(map->value_size, 8); rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); off += size; } rcu_read_unlock(); return 0; } /* Called from syscall */ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { *next = 0; return 0; } if (index == array->map.max_entries - 1) return -ENOENT; *next = index + 1; return 0; } /* Called from syscall or from eBPF program */ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (unlikely(map_flags > BPF_EXIST)) /* unknown flags */ return -EINVAL; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; if (unlikely(map_flags == BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); else memcpy(array->value + array->elem_size * (index & array->index_mask), value, map->value_size); return 0; } int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu, off = 0; u32 size; if (unlikely(map_flags > BPF_EXIST)) /* unknown flags */ return -EINVAL; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; if (unlikely(map_flags == BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; /* the user space will provide round_up(value_size, 8) bytes that * will be copied into per-cpu area. bpf programs can only access * value_size of it. During lookup the same extra bytes will be * returned or zeros which were zero-filled by percpu_alloc, * so no kernel data leaks possible */ size = round_up(map->value_size, 8); rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); off += size; } rcu_read_unlock(); return 0; } /* Called from syscall or from eBPF program */ static int array_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, * so the programs (can be more than one that used this map) were * disconnected from events. Wait for outstanding programs to complete * and free the array */ synchronize_rcu(); if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); bpf_map_area_free(array); } static void array_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { void *value; rcu_read_lock(); value = array_map_lookup_elem(map, key); if (!value) { rcu_read_unlock(); return; } seq_printf(m, "%u: ", *(u32 *)key); btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); seq_puts(m, "\n"); rcu_read_unlock(); } static int array_map_check_btf(const struct bpf_map *map, const struct btf_type *key_type, const struct btf_type *value_type) { u32 int_data; if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) return -EINVAL; int_data = *(u32 *)(key_type + 1); /* bpf array can only take a u32 key. This check makes sure * that the btf matches the attr used during map_create. */ if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) return -EINVAL; return 0; } const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_gen_lookup = array_map_gen_lookup, .map_seq_show_elem = array_map_seq_show_elem, .map_check_btf = array_map_check_btf, }; const struct bpf_map_ops percpu_array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = percpu_array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_check_btf = array_map_check_btf, }; static int fd_array_map_alloc_check(union bpf_attr *attr) { /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return -EINVAL; return array_map_alloc_check(attr); } static void fd_array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; synchronize_rcu(); /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) BUG_ON(array->ptrs[i] != NULL); bpf_map_area_free(array); } static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) { return NULL; } /* only called from syscall */ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) { void **elem, *ptr; int ret = 0; if (!map->ops->map_fd_sys_lookup_elem) return -ENOTSUPP; rcu_read_lock(); elem = array_map_lookup_elem(map, key); if (elem && (ptr = READ_ONCE(*elem))) *value = map->ops->map_fd_sys_lookup_elem(ptr); else ret = -ENOENT; rcu_read_unlock(); return ret; } /* only called from syscall */ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *new_ptr, *old_ptr; u32 index = *(u32 *)key, ufd; if (map_flags != BPF_ANY) return -EINVAL; if (index >= array->map.max_entries) return -E2BIG; ufd = *(u32 *)value; new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); if (IS_ERR(new_ptr)) return PTR_ERR(new_ptr); old_ptr = xchg(array->ptrs + index, new_ptr); if (old_ptr) map->ops->map_fd_put_ptr(old_ptr); return 0; } static int fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *old_ptr; u32 index = *(u32 *)key; if (index >= array->map.max_entries) return -E2BIG; old_ptr = xchg(array->ptrs + index, NULL); if (old_ptr) { map->ops->map_fd_put_ptr(old_ptr); return 0; } else { return -ENOENT; } } static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog = bpf_prog_get(fd); if (IS_ERR(prog)) return prog; if (!bpf_prog_array_compatible(array, prog)) { bpf_prog_put(prog); return ERR_PTR(-EINVAL); } return prog; } static void prog_fd_array_put_ptr(void *ptr) { bpf_prog_put(ptr); } static u32 prog_fd_array_sys_lookup_elem(void *ptr) { return ((struct bpf_prog *)ptr)->aux->id; } /* decrement refcnt of all bpf_progs that are stored in this map */ static void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; for (i = 0; i < array->map.max_entries; i++) fd_array_map_delete_elem(map, &i); } const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = bpf_fd_array_map_clear, .map_check_btf = map_check_no_btf, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, struct file *map_file) { struct bpf_event_entry *ee; ee = kzalloc(sizeof(*ee), GFP_ATOMIC); if (ee) { ee->event = perf_file->private_data; ee->perf_file = perf_file; ee->map_file = map_file; } return ee; } static void __bpf_event_entry_free(struct rcu_head *rcu) { struct bpf_event_entry *ee; ee = container_of(rcu, struct bpf_event_entry, rcu); fput(ee->perf_file); kfree(ee); } static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) { call_rcu(&ee->rcu, __bpf_event_entry_free); } static void *perf_event_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_event_entry *ee; struct perf_event *event; struct file *perf_file; u64 value; perf_file = perf_event_get(fd); if (IS_ERR(perf_file)) return perf_file; ee = ERR_PTR(-EOPNOTSUPP); event = perf_file->private_data; if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) goto err_out; ee = bpf_event_entry_gen(perf_file, map_file); if (ee) return ee; ee = ERR_PTR(-ENOMEM); err_out: fput(perf_file); return ee; } static void perf_event_fd_array_put_ptr(void *ptr) { bpf_event_entry_free_rcu(ptr); } static void perf_event_fd_array_release(struct bpf_map *map, struct file *map_file) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_event_entry *ee; int i; rcu_read_lock(); for (i = 0; i < array->map.max_entries; i++) { ee = READ_ONCE(array->ptrs[i]); if (ee && ee->map_file == map_file) fd_array_map_delete_elem(map, &i); } rcu_read_unlock(); } const struct bpf_map_ops perf_event_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = perf_event_fd_array_get_ptr, .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, }; #ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int fd) { return cgroup_get_from_fd(fd); } static void cgroup_fd_array_put_ptr(void *ptr) { /* cgroup_put free cgrp after a rcu grace period */ cgroup_put(ptr); } static void cgroup_fd_array_free(struct bpf_map *map) { bpf_fd_array_map_clear(map); fd_array_map_free(map); } const struct bpf_map_ops cgroup_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = cgroup_fd_array_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, }; #endif static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) { struct bpf_map *map, *inner_map_meta; inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); if (IS_ERR(inner_map_meta)) return inner_map_meta; map = array_map_alloc(attr); if (IS_ERR(map)) { bpf_map_meta_free(inner_map_meta); return map; } map->inner_map_meta = inner_map_meta; return map; } static void array_of_map_free(struct bpf_map *map) { /* map->inner_map_meta is only accessed by syscall which * is protected by fdget/fdput. */ bpf_map_meta_free(map->inner_map_meta); bpf_fd_array_map_clear(map); fd_array_map_free(map); } static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_map **inner_map = array_map_lookup_elem(map, key); if (!inner_map) return NULL; return READ_ONCE(*inner_map); } static u32 array_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 elem_size = round_up(map->value_size, 8); struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (map->unpriv_array) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); } if (is_power_of_2(elem_size)) *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); else *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); return insn - insn_buf; } const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, .map_free = array_of_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = array_of_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, .map_fd_put_ptr = bpf_map_fd_put_ptr, .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_check_btf = map_check_no_btf, };
421 478 475 472 389 6 322 1 321 2 381 371 381 381 388 124 4 114 38 111 109 7 7 7 108 114 114 124 24 16 22 23 13 12 11 6 12 13 28 10 7 25 27 9 2 6 9 8 3 6 8 10 9 10 15 14 11 15 17 5 1 10 9 9 12 17 4 29 21 7 20 18 5 2 11 18 29 36 75 74 75 81 4 76 75 69 69 63 60 59 69 77 81 30 24 30 29 6 5 21 10 10 10 9 9 9 9 9 17 13 10 30 13 12 2 9 10 13 5 5 22 18 21 22 20 17 1 17 20 4 4 3 4 10 8 7 6 10 14 2 12 12 8 1 11 11 1 1 20 3 1 17 17 20 7 6 1 7 8 4 1 4 8 16 16 16 16 16 16 16 16 16 13 12 11 11 11 10 16 1 16 10 43 38 33 2 6 31 43 509 6 18 13 28 17 8 10 15 37 81 30 13 20 3 14 20 7 8 16 7 4 9 25 139 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 /* Userspace key control operations * * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/key.h> #include <linux/keyctl.h> #include <linux/fs.h> #include <linux/capability.h> #include <linux/cred.h> #include <linux/string.h> #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/security.h> #include <linux/uio.h> #include <linux/uaccess.h> #include <keys/request_key_auth-type.h> #include "internal.h" #define KEY_MAX_DESC_SIZE 4096 static int key_get_type_from_user(char *type, const char __user *_type, unsigned len) { int ret; ret = strncpy_from_user(type, _type, len); if (ret < 0) return ret; if (ret == 0 || ret >= len) return -EINVAL; if (type[0] == '.') return -EPERM; type[len - 1] = '\0'; return 0; } /* * Extract the description of a new key from userspace and either add it as a * new key to the specified keyring or update a matching key in that keyring. * * If the description is NULL or an empty string, the key type is asked to * generate one from the payload. * * The keyring must be writable so that we can attach the key to it. * * If successful, the new key's serial number is returned, otherwise an error * code is returned. */ SYSCALL_DEFINE5(add_key, const char __user *, _type, const char __user *, _description, const void __user *, _payload, size_t, plen, key_serial_t, ringid) { key_ref_t keyring_ref, key_ref; char type[32], *description; void *payload; long ret; ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; /* draw all the data into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; description = NULL; if (_description) { description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } if (!*description) { kfree(description); description = NULL; } else if ((description[0] == '.') && (strncmp(type, "keyring", 7) == 0)) { ret = -EPERM; goto error2; } } /* pull the payload in if one was supplied */ payload = NULL; if (plen) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error2; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) goto error3; } /* find the target keyring (which must be writable) */ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error3; } /* create or update the requested key and add it to the target * keyring */ key_ref = key_create_or_update(keyring_ref, type, description, payload, plen, KEY_PERM_UNDEF, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key_ref)) { ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); } else { ret = PTR_ERR(key_ref); } key_ref_put(keyring_ref); error3: kvfree_sensitive(payload, plen); error2: kfree(description); error: return ret; } /* * Search the process keyrings and keyring trees linked from those for a * matching key. Keyrings must have appropriate Search permission to be * searched. * * If a key is found, it will be attached to the destination keyring if there's * one specified and the serial number of the key will be returned. * * If no key is found, /sbin/request-key will be invoked if _callout_info is * non-NULL in an attempt to create a key. The _callout_info string will be * passed to /sbin/request-key to aid with completing the request. If the * _callout_info string is "" then it will be changed to "-". */ SYSCALL_DEFINE4(request_key, const char __user *, _type, const char __user *, _description, const char __user *, _callout_info, key_serial_t, destringid) { struct key_type *ktype; struct key *key; key_ref_t dest_ref; size_t callout_len; char type[32], *description, *callout_info; long ret; /* pull the type into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; /* pull the description into kernel space */ description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } /* pull the callout info into kernel space */ callout_info = NULL; callout_len = 0; if (_callout_info) { callout_info = strndup_user(_callout_info, PAGE_SIZE); if (IS_ERR(callout_info)) { ret = PTR_ERR(callout_info); goto error2; } callout_len = strlen(callout_info); } /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; } } /* find the key type */ ktype = key_type_lookup(type); if (IS_ERR(ktype)) { ret = PTR_ERR(ktype); goto error4; } /* do the search */ key = request_key_and_link(ktype, description, callout_info, callout_len, NULL, key_ref_to_ptr(dest_ref), KEY_ALLOC_IN_QUOTA); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error5; } /* wait for the key to finish being constructed */ ret = wait_for_key_construction(key, 1); if (ret < 0) goto error6; ret = key->serial; error6: key_put(key); error5: key_type_put(ktype); error4: key_ref_put(dest_ref); error3: kfree(callout_info); error2: kfree(description); error: return ret; } /* * Get the ID of the specified process keyring. * * The requested keyring must have search permission to be found. * * If successful, the ID of the requested keyring will be returned. */ long keyctl_get_keyring_ID(key_serial_t id, int create) { key_ref_t key_ref; unsigned long lflags; long ret; lflags = create ? KEY_LOOKUP_CREATE : 0; key_ref = lookup_user_key(id, lflags, KEY_NEED_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); error: return ret; } /* * Join a (named) session keyring. * * Create and join an anonymous session keyring or join a named session * keyring, creating it if necessary. A named session keyring must have Search * permission for it to be joined. Session keyrings without this permit will * be skipped over. It is not permitted for userspace to create or join * keyrings whose name begin with a dot. * * If successful, the ID of the joined session keyring will be returned. */ long keyctl_join_session_keyring(const char __user *_name) { char *name; long ret; /* fetch the name from userspace */ name = NULL; if (_name) { name = strndup_user(_name, KEY_MAX_DESC_SIZE); if (IS_ERR(name)) { ret = PTR_ERR(name); goto error; } ret = -EPERM; if (name[0] == '.') goto error_name; } /* join the session */ ret = join_session_keyring(name); error_name: kfree(name); error: return ret; } /* * Update a key's data payload from the given data. * * The key must grant the caller Write permission and the key type must support * updating for this to work. A negative key can be positively instantiated * with this call. * * If successful, 0 will be returned. If the key type does not support * updating, then -EOPNOTSUPP will be returned. */ long keyctl_update_key(key_serial_t id, const void __user *_payload, size_t plen) { key_ref_t key_ref; void *payload; long ret; ret = -EINVAL; if (plen > PAGE_SIZE) goto error; /* pull the payload in if one was supplied */ payload = NULL; if (plen) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) goto error2; } /* find the target key (which must be writable) */ key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } /* update the key */ ret = key_update(key_ref, payload, plen); key_ref_put(key_ref); error2: kvfree_sensitive(payload, plen); error: return ret; } /* * Revoke a key. * * The key must be grant the caller Write or Setattr permission for this to * work. The key type should give up its quota claim when revoked. The key * and any links to the key will be automatically garbage collected after a * certain amount of time (/proc/sys/kernel/keys/gc_delay). * * Keys with KEY_FLAG_KEEP set should not be revoked. * * If successful, 0 is returned. */ long keyctl_revoke_key(key_serial_t id) { key_ref_t key_ref; struct key *key; long ret; key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); if (ret != -EACCES) goto error; key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } } key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else key_revoke(key); key_ref_put(key_ref); error: return ret; } /* * Invalidate a key. * * The key must be grant the caller Invalidate permission for this to work. * The key and any links to the key will be automatically garbage collected * immediately. * * Keys with KEY_FLAG_KEEP set should not be invalidated. * * If successful, 0 is returned. */ long keyctl_invalidate_key(key_serial_t id) { key_ref_t key_ref; struct key *key; long ret; kenter("%d", id); key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); /* Root is permitted to invalidate certain special keys */ if (capable(CAP_SYS_ADMIN)) { key_ref = lookup_user_key(id, 0, 0); if (IS_ERR(key_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_INVAL, &key_ref_to_ptr(key_ref)->flags)) goto invalidate; goto error_put; } goto error; } invalidate: key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else key_invalidate(key); error_put: key_ref_put(key_ref); error: kleave(" = %ld", ret); return ret; } /* * Clear the specified keyring, creating an empty process keyring if one of the * special keyring IDs is used. * * The keyring must grant the caller Write permission and not have * KEY_FLAG_KEEP set for this to work. If successful, 0 will be returned. */ long keyctl_keyring_clear(key_serial_t ringid) { key_ref_t keyring_ref; struct key *keyring; long ret; keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); /* Root is permitted to invalidate certain special keyrings */ if (capable(CAP_SYS_ADMIN)) { keyring_ref = lookup_user_key(ringid, 0, 0); if (IS_ERR(keyring_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_CLEAR, &key_ref_to_ptr(keyring_ref)->flags)) goto clear; goto error_put; } goto error; } clear: keyring = key_ref_to_ptr(keyring_ref); if (test_bit(KEY_FLAG_KEEP, &keyring->flags)) ret = -EPERM; else ret = keyring_clear(keyring); error_put: key_ref_put(keyring_ref); error: return ret; } /* * Create a link from a keyring to a key if there's no matching key in the * keyring, otherwise replace the link to the matching key with a link to the * new key. * * The key must grant the caller Link permission and the the keyring must grant * the caller Write permission. Furthermore, if an additional link is created, * the keyring's quota will be extended. * * If successful, 0 will be returned. */ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid) { key_ref_t keyring_ref, key_ref; long ret; keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } ret = key_link(key_ref_to_ptr(keyring_ref), key_ref_to_ptr(key_ref)); key_ref_put(key_ref); error2: key_ref_put(keyring_ref); error: return ret; } /* * Unlink a key from a keyring. * * The keyring must grant the caller Write permission for this to work; the key * itself need not grant the caller anything. If the last link to a key is * removed then that key will be scheduled for destruction. * * Keys or keyrings with KEY_FLAG_KEEP set should not be unlinked. * * If successful, 0 will be returned. */ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid) { key_ref_t keyring_ref, key_ref; struct key *keyring, *key; long ret; keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } key_ref = lookup_user_key(id, KEY_LOOKUP_FOR_UNLINK, 0); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } keyring = key_ref_to_ptr(keyring_ref); key = key_ref_to_ptr(key_ref); if (test_bit(KEY_FLAG_KEEP, &keyring->flags) && test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else ret = key_unlink(keyring, key); key_ref_put(key_ref); error2: key_ref_put(keyring_ref); error: return ret; } /* * Return a description of a key to userspace. * * The key must grant the caller View permission for this to work. * * If there's a buffer, we place up to buflen bytes of data into it formatted * in the following way: * * type;uid;gid;perm;description<NUL> * * If successful, we return the amount of description available, irrespective * of how much we may have copied into the buffer. */ long keyctl_describe_key(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key, *instkey; key_ref_t key_ref; char *infobuf; long ret; int desclen, infolen; key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_VIEW); if (IS_ERR(key_ref)) { /* viewing a key under construction is permitted if we have the * authorisation token handy */ if (PTR_ERR(key_ref) == -EACCES) { instkey = key_get_instantiation_authkey(keyid); if (!IS_ERR(instkey)) { key_put(instkey); key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, 0); if (!IS_ERR(key_ref)) goto okay; } } ret = PTR_ERR(key_ref); goto error; } okay: key = key_ref_to_ptr(key_ref); desclen = strlen(key->description); /* calculate how much information we're going to return */ ret = -ENOMEM; infobuf = kasprintf(GFP_KERNEL, "%s;%d;%d;%08x;", key->type->name, from_kuid_munged(current_user_ns(), key->uid), from_kgid_munged(current_user_ns(), key->gid), key->perm); if (!infobuf) goto error2; infolen = strlen(infobuf); ret = infolen + desclen + 1; /* consider returning the data */ if (buffer && buflen >= ret) { if (copy_to_user(buffer, infobuf, infolen) != 0 || copy_to_user(buffer + infolen, key->description, desclen + 1) != 0) ret = -EFAULT; } kfree(infobuf); error2: key_ref_put(key_ref); error: return ret; } /* * Search the specified keyring and any keyrings it links to for a matching * key. Only keyrings that grant the caller Search permission will be searched * (this includes the starting keyring). Only keys with Search permission can * be found. * * If successful, the found key will be linked to the destination keyring if * supplied and the key has Link permission, and the found key ID will be * returned. */ long keyctl_keyring_search(key_serial_t ringid, const char __user *_type, const char __user *_description, key_serial_t destringid) { struct key_type *ktype; key_ref_t keyring_ref, key_ref, dest_ref; char type[32], *description; long ret; /* pull the type and description into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } /* get the keyring at which to begin the search */ keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_SEARCH); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error2; } /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; } } /* find the key type */ ktype = key_type_lookup(type); if (IS_ERR(ktype)) { ret = PTR_ERR(ktype); goto error4; } /* do the search */ key_ref = keyring_search(keyring_ref, ktype, description); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); /* treat lack or presence of a negative key the same */ if (ret == -EAGAIN) ret = -ENOKEY; goto error5; } /* link the resulting key to the destination keyring if we can */ if (dest_ref) { ret = key_permission(key_ref, KEY_NEED_LINK); if (ret < 0) goto error6; ret = key_link(key_ref_to_ptr(dest_ref), key_ref_to_ptr(key_ref)); if (ret < 0) goto error6; } ret = key_ref_to_ptr(key_ref)->serial; error6: key_ref_put(key_ref); error5: key_type_put(ktype); error4: key_ref_put(dest_ref); error3: key_ref_put(keyring_ref); error2: kfree(description); error: return ret; } /* * Call the read method */ static long __keyctl_read_key(struct key *key, char *buffer, size_t buflen) { long ret; down_read(&key->sem); ret = key_validate(key); if (ret == 0) ret = key->type->read(key, buffer, buflen); up_read(&key->sem); return ret; } /* * Read a key's payload. * * The key must either grant the caller Read permission, or it must grant the * caller Search permission when searched for from the process keyrings. * * If successful, we place up to buflen bytes of data into the buffer, if one * is provided, and return the amount of data that is available in the key, * irrespective of how much we copied into the buffer. */ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key; key_ref_t key_ref; long ret; char *key_data = NULL; size_t key_data_len; /* find the key first */ key_ref = lookup_user_key(keyid, 0, 0); if (IS_ERR(key_ref)) { ret = -ENOKEY; goto out; } key = key_ref_to_ptr(key_ref); ret = key_read_state(key); if (ret < 0) goto key_put_out; /* Negatively instantiated */ /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); if (ret == 0) goto can_read_key; if (ret != -EACCES) goto key_put_out; /* we can't; see if it's searchable from this process's keyrings * - we automatically take account of the fact that it may be * dangling off an instantiation key */ if (!is_key_possessed(key_ref)) { ret = -EACCES; goto key_put_out; } /* the key is probably readable - now try to read it */ can_read_key: if (!key->type->read) { ret = -EOPNOTSUPP; goto key_put_out; } if (!buffer || !buflen) { /* Get the key length from the read method */ ret = __keyctl_read_key(key, NULL, 0); goto key_put_out; } /* * Read the data with the semaphore held (since we might sleep) * to protect against the key being updated or revoked. * * Allocating a temporary buffer to hold the keys before * transferring them to user buffer to avoid potential * deadlock involving page fault and mmap_sem. * * key_data_len = (buflen <= PAGE_SIZE) * ? buflen : actual length of key data * * This prevents allocating arbitrary large buffer which can * be much larger than the actual key length. In the latter case, * at least 2 passes of this loop is required. */ key_data_len = (buflen <= PAGE_SIZE) ? buflen : 0; for (;;) { if (key_data_len) { key_data = kvmalloc(key_data_len, GFP_KERNEL); if (!key_data) { ret = -ENOMEM; goto key_put_out; } } ret = __keyctl_read_key(key, key_data, key_data_len); /* * Read methods will just return the required length without * any copying if the provided length isn't large enough. */ if (ret <= 0 || ret > buflen) break; /* * The key may change (unlikely) in between 2 consecutive * __keyctl_read_key() calls. In this case, we reallocate * a larger buffer and redo the key read when * key_data_len < ret <= buflen. */ if (ret > key_data_len) { if (unlikely(key_data)) kvfree_sensitive(key_data, key_data_len); key_data_len = ret; continue; /* Allocate buffer */ } if (copy_to_user(buffer, key_data, ret)) ret = -EFAULT; break; } kvfree_sensitive(key_data, key_data_len); key_put_out: key_put(key); out: return ret; } /* * Change the ownership of a key * * The key must grant the caller Setattr permission for this to work, though * the key need not be fully instantiated yet. For the UID to be changed, or * for the GID to be changed to a group the caller is not a member of, the * caller must have sysadmin capability. If either uid or gid is -1 then that * attribute is not changed. * * If the UID is to be changed, the new user must have sufficient quota to * accept the key. The quota deduction will be removed from the old user to * the new user should the attribute be changed. * * If successful, 0 will be returned. */ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) { struct key_user *newowner, *zapowner = NULL; struct key *key; key_ref_t key_ref; long ret; kuid_t uid; kgid_t gid; uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); ret = -EINVAL; if ((user != (uid_t) -1) && !uid_valid(uid)) goto error; if ((group != (gid_t) -1) && !gid_valid(gid)) goto error; ret = 0; if (user == (uid_t) -1 && group == (gid_t) -1) goto error; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } key = key_ref_to_ptr(key_ref); /* make the changes with the locks held to prevent chown/chown races */ ret = -EACCES; down_write(&key->sem); if (!capable(CAP_SYS_ADMIN)) { /* only the sysadmin can chown a key to some other UID */ if (user != (uid_t) -1 && !uid_eq(key->uid, uid)) goto error_put; /* only the sysadmin can set the key's GID to a group other * than one of those that the current process subscribes to */ if (group != (gid_t) -1 && !gid_eq(gid, key->gid) && !in_group_p(gid)) goto error_put; } /* change the UID */ if (user != (uid_t) -1 && !uid_eq(uid, key->uid)) { ret = -ENOMEM; newowner = key_user_lookup(uid); if (!newowner) goto error_put; /* transfer the quota burden to the new user */ if (test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { unsigned maxkeys = uid_eq(uid, GLOBAL_ROOT_UID) ? key_quota_root_maxkeys : key_quota_maxkeys; unsigned maxbytes = uid_eq(uid, GLOBAL_ROOT_UID) ? key_quota_root_maxbytes : key_quota_maxbytes; spin_lock(&newowner->lock); if (newowner->qnkeys + 1 > maxkeys || newowner->qnbytes + key->quotalen > maxbytes || newowner->qnbytes + key->quotalen < newowner->qnbytes) goto quota_overrun; newowner->qnkeys++; newowner->qnbytes += key->quotalen; spin_unlock(&newowner->lock); spin_lock(&key->user->lock); key->user->qnkeys--; key->user->qnbytes -= key->quotalen; spin_unlock(&key->user->lock); } atomic_dec(&key->user->nkeys); atomic_inc(&newowner->nkeys); if (key->state != KEY_IS_UNINSTANTIATED) { atomic_dec(&key->user->nikeys); atomic_inc(&newowner->nikeys); } zapowner = key->user; key->user = newowner; key->uid = uid; } /* change the GID */ if (group != (gid_t) -1) key->gid = gid; ret = 0; error_put: up_write(&key->sem); key_put(key); if (zapowner) key_user_put(zapowner); error: return ret; quota_overrun: spin_unlock(&newowner->lock); zapowner = newowner; ret = -EDQUOT; goto error_put; } /* * Change the permission mask on a key. * * The key must grant the caller Setattr permission for this to work, though * the key need not be fully instantiated yet. If the caller does not have * sysadmin capability, it may only change the permission on keys that it owns. */ long keyctl_setperm_key(key_serial_t id, key_perm_t perm) { struct key *key; key_ref_t key_ref; long ret; ret = -EINVAL; if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) goto error; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } key = key_ref_to_ptr(key_ref); /* make the changes with the locks held to prevent chown/chmod races */ ret = -EACCES; down_write(&key->sem); /* if we're not the sysadmin, we can only change a key that we own */ if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) { key->perm = perm; ret = 0; } up_write(&key->sem); key_put(key); error: return ret; } /* * Get the destination keyring for instantiation and check that the caller has * Write permission on it. */ static long get_instantiation_keyring(key_serial_t ringid, struct request_key_auth *rka, struct key **_dest_keyring) { key_ref_t dkref; *_dest_keyring = NULL; /* just return a NULL pointer if we weren't asked to make a link */ if (ringid == 0) return 0; /* if a specific keyring is nominated by ID, then use that */ if (ringid > 0) { dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dkref)) return PTR_ERR(dkref); *_dest_keyring = key_ref_to_ptr(dkref); return 0; } if (ringid == KEY_SPEC_REQKEY_AUTH_KEY) return -EINVAL; /* otherwise specify the destination keyring recorded in the * authorisation key (any KEY_SPEC_*_KEYRING) */ if (ringid >= KEY_SPEC_REQUESTOR_KEYRING) { *_dest_keyring = key_get(rka->dest_keyring); return 0; } return -ENOKEY; } /* * Change the request_key authorisation key on the current process. */ static int keyctl_change_reqkey_auth(struct key *key) { struct cred *new; new = prepare_creds(); if (!new) return -ENOMEM; key_put(new->request_key_auth); new->request_key_auth = key_get(key); return commit_creds(new); } /* * Instantiate a key with the specified payload and link the key into the * destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * If successful, 0 will be returned. */ long keyctl_instantiate_key_common(key_serial_t id, struct iov_iter *from, key_serial_t ringid) { const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; size_t plen = from ? iov_iter_count(from) : 0; void *payload; long ret; kenter("%d,,%zu,%d", id, plen, ringid); if (!plen) from = NULL; ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; instkey = cred->request_key_auth; if (!instkey) goto error; rka = instkey->payload.data[0]; if (rka->target_key->serial != id) goto error; /* pull the payload in if one was supplied */ payload = NULL; if (from) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; ret = -EFAULT; if (!copy_from_iter_full(payload, plen, from)) goto error2; } /* find the destination keyring amongst those belonging to the * requesting task */ ret = get_instantiation_keyring(ringid, rka, &dest_keyring); if (ret < 0) goto error2; /* instantiate the key and link it into a keyring */ ret = key_instantiate_and_link(rka->target_key, payload, plen, dest_keyring, instkey); key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) keyctl_change_reqkey_auth(NULL); error2: kvfree_sensitive(payload, plen); error: return ret; } /* * Instantiate a key with the specified payload and link the key into the * destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * If successful, 0 will be returned. */ long keyctl_instantiate_key(key_serial_t id, const void __user *_payload, size_t plen, key_serial_t ringid) { if (_payload && plen) { struct iovec iov; struct iov_iter from; int ret; ret = import_single_range(WRITE, (void __user *)_payload, plen, &iov, &from); if (unlikely(ret)) return ret; return keyctl_instantiate_key_common(id, &from, ringid); } return keyctl_instantiate_key_common(id, NULL, ringid); } /* * Instantiate a key with the specified multipart payload and link the key into * the destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * If successful, 0 will be returned. */ long keyctl_instantiate_key_iov(key_serial_t id, const struct iovec __user *_payload_iov, unsigned ioc, key_serial_t ringid) { struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; struct iov_iter from; long ret; if (!_payload_iov) ioc = 0; ret = import_iovec(WRITE, _payload_iov, ioc, ARRAY_SIZE(iovstack), &iov, &from); if (ret < 0) return ret; ret = keyctl_instantiate_key_common(id, &from, ringid); kfree(iov); return ret; } /* * Negatively instantiate the key with the given timeout (in seconds) and link * the key into the destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * The key and any links to the key will be automatically garbage collected * after the timeout expires. * * Negative keys are used to rate limit repeated request_key() calls by causing * them to return -ENOKEY until the negative key expires. * * If successful, 0 will be returned. */ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { return keyctl_reject_key(id, timeout, ENOKEY, ringid); } /* * Negatively instantiate the key with the given timeout (in seconds) and error * code and link the key into the destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * The key and any links to the key will be automatically garbage collected * after the timeout expires. * * Negative keys are used to rate limit repeated request_key() calls by causing * them to return the specified error code until the negative key expires. * * If successful, 0 will be returned. */ long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error, key_serial_t ringid) { const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; long ret; kenter("%d,%u,%u,%d", id, timeout, error, ringid); /* must be a valid error code and mustn't be a kernel special */ if (error <= 0 || error >= MAX_ERRNO || error == ERESTARTSYS || error == ERESTARTNOINTR || error == ERESTARTNOHAND || error == ERESTART_RESTARTBLOCK) return -EINVAL; /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; instkey = cred->request_key_auth; if (!instkey) goto error; rka = instkey->payload.data[0]; if (rka->target_key->serial != id) goto error; /* find the destination keyring if present (which must also be * writable) */ ret = get_instantiation_keyring(ringid, rka, &dest_keyring); if (ret < 0) goto error; /* instantiate the key and link it into a keyring */ ret = key_reject_and_link(rka->target_key, timeout, error, dest_keyring, instkey); key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) keyctl_change_reqkey_auth(NULL); error: return ret; } /* * Read or set the default keyring in which request_key() will cache keys and * return the old setting. * * If a thread or process keyring is specified then it will be created if it * doesn't yet exist. The old setting will be returned if successful. */ long keyctl_set_reqkey_keyring(int reqkey_defl) { struct cred *new; int ret, old_setting; old_setting = current_cred_xxx(jit_keyring); if (reqkey_defl == KEY_REQKEY_DEFL_NO_CHANGE) return old_setting; new = prepare_creds(); if (!new) return -ENOMEM; switch (reqkey_defl) { case KEY_REQKEY_DEFL_THREAD_KEYRING: ret = install_thread_keyring_to_cred(new); if (ret < 0) goto error; goto set; case KEY_REQKEY_DEFL_PROCESS_KEYRING: ret = install_process_keyring_to_cred(new); if (ret < 0) goto error; goto set; case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_SESSION_KEYRING: case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: goto set; case KEY_REQKEY_DEFL_NO_CHANGE: case KEY_REQKEY_DEFL_GROUP_KEYRING: default: ret = -EINVAL; goto error; } set: new->jit_keyring = reqkey_defl; commit_creds(new); return old_setting; error: abort_creds(new); return ret; } /* * Set or clear the timeout on a key. * * Either the key must grant the caller Setattr permission or else the caller * must hold an instantiation authorisation token for the key. * * The timeout is either 0 to clear the timeout, or a number of seconds from * the current time. The key and any links to the key will be automatically * garbage collected after the timeout expires. * * Keys with KEY_FLAG_KEEP set should not be timed out. * * If successful, 0 is returned. */ long keyctl_set_timeout(key_serial_t id, unsigned timeout) { struct key *key, *instkey; key_ref_t key_ref; long ret; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { /* setting the timeout on a key under construction is permitted * if we have the authorisation token handy */ if (PTR_ERR(key_ref) == -EACCES) { instkey = key_get_instantiation_authkey(id); if (!IS_ERR(instkey)) { key_put(instkey); key_ref = lookup_user_key(id, KEY_LOOKUP_PARTIAL, 0); if (!IS_ERR(key_ref)) goto okay; } } ret = PTR_ERR(key_ref); goto error; } okay: key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else key_set_timeout(key, timeout); key_put(key); error: return ret; } /* * Assume (or clear) the authority to instantiate the specified key. * * This sets the authoritative token currently in force for key instantiation. * This must be done for a key to be instantiated. It has the effect of making * available all the keys from the caller of the request_key() that created a * key to request_key() calls made by the caller of this function. * * The caller must have the instantiation key in their process keyrings with a * Search permission grant available to the caller. * * If the ID given is 0, then the setting will be cleared and 0 returned. * * If the ID given has a matching an authorisation key, then that key will be * set and its ID will be returned. The authorisation key can be read to get * the callout information passed to request_key(). */ long keyctl_assume_authority(key_serial_t id) { struct key *authkey; long ret; /* special key IDs aren't permitted */ ret = -EINVAL; if (id < 0) goto error; /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { ret = keyctl_change_reqkey_auth(NULL); goto error; } /* attempt to assume the authority temporarily granted to us whilst we * instantiate the specified key * - the authorisation key must be in the current task's keyrings * somewhere */ authkey = key_get_instantiation_authkey(id); if (IS_ERR(authkey)) { ret = PTR_ERR(authkey); goto error; } ret = keyctl_change_reqkey_auth(authkey); if (ret == 0) ret = authkey->serial; key_put(authkey); error: return ret; } /* * Get a key's the LSM security label. * * The key must grant the caller View permission for this to work. * * If there's a buffer, then up to buflen bytes of data will be placed into it. * * If successful, the amount of information available will be returned, * irrespective of how much was copied (including the terminal NUL). */ long keyctl_get_security(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key, *instkey; key_ref_t key_ref; char *context; long ret; key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_VIEW); if (IS_ERR(key_ref)) { if (PTR_ERR(key_ref) != -EACCES) return PTR_ERR(key_ref); /* viewing a key under construction is also permitted if we * have the authorisation token handy */ instkey = key_get_instantiation_authkey(keyid); if (IS_ERR(instkey)) return PTR_ERR(instkey); key_put(instkey); key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, 0); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); } key = key_ref_to_ptr(key_ref); ret = security_key_getsecurity(key, &context); if (ret == 0) { /* if no information was returned, give userspace an empty * string */ ret = 1; if (buffer && buflen > 0 && copy_to_user(buffer, "", 1) != 0) ret = -EFAULT; } else if (ret > 0) { /* return as much data as there's room for */ if (buffer && buflen > 0) { if (buflen > ret) buflen = ret; if (copy_to_user(buffer, context, buflen) != 0) ret = -EFAULT; } kfree(context); } key_ref_put(key_ref); return ret; } /* * Attempt to install the calling process's session keyring on the process's * parent process. * * The keyring must exist and must grant the caller LINK permission, and the * parent process must be single-threaded and must have the same effective * ownership as this process and mustn't be SUID/SGID. * * The keyring will be emplaced on the parent when it next resumes userspace. * * If successful, 0 will be returned. */ long keyctl_session_to_parent(void) { struct task_struct *me, *parent; const struct cred *mycred, *pcred; struct callback_head *newwork, *oldwork; key_ref_t keyring_r; struct cred *cred; int ret; keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK); if (IS_ERR(keyring_r)) return PTR_ERR(keyring_r); ret = -ENOMEM; /* our parent is going to need a new cred struct, a new tgcred struct * and new security data, so we allocate them here to prevent ENOMEM in * our parent */ cred = cred_alloc_blank(); if (!cred) goto error_keyring; newwork = &cred->rcu; cred->session_keyring = key_ref_to_ptr(keyring_r); keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); me = current; rcu_read_lock(); write_lock_irq(&tasklist_lock); ret = -EPERM; oldwork = NULL; parent = me->real_parent; /* the parent mustn't be init and mustn't be a kernel thread */ if (parent->pid <= 1 || !parent->mm) goto unlock; /* the parent must be single threaded */ if (!thread_group_empty(parent)) goto unlock; /* the parent and the child must have different session keyrings or * there's no point */ mycred = current_cred(); pcred = __task_cred(parent); if (mycred == pcred || mycred->session_keyring == pcred->session_keyring) { ret = 0; goto unlock; } /* the parent must have the same effective ownership and mustn't be * SUID/SGID */ if (!uid_eq(pcred->uid, mycred->euid) || !uid_eq(pcred->euid, mycred->euid) || !uid_eq(pcred->suid, mycred->euid) || !gid_eq(pcred->gid, mycred->egid) || !gid_eq(pcred->egid, mycred->egid) || !gid_eq(pcred->sgid, mycred->egid)) goto unlock; /* the keyrings must have the same UID */ if ((pcred->session_keyring && !uid_eq(pcred->session_keyring->uid, mycred->euid)) || !uid_eq(mycred->session_keyring->uid, mycred->euid)) goto unlock; /* cancel an already pending keyring replacement */ oldwork = task_work_cancel(parent, key_change_session_keyring); /* the replacement session keyring is applied just prior to userspace * restarting */ ret = task_work_add(parent, newwork, true); if (!ret) newwork = NULL; unlock: write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) put_cred(container_of(oldwork, struct cred, rcu)); if (newwork) put_cred(cred); return ret; error_keyring: key_ref_put(keyring_r); return ret; } /* * Apply a restriction to a given keyring. * * The caller must have Setattr permission to change keyring restrictions. * * The requested type name may be a NULL pointer to reject all attempts * to link to the keyring. In this case, _restriction must also be NULL. * Otherwise, both _type and _restriction must be non-NULL. * * Returns 0 if successful. */ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type, const char __user *_restriction) { key_ref_t key_ref; char type[32]; char *restriction = NULL; long ret; key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); ret = -EINVAL; if (_type) { if (!_restriction) goto error; ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; restriction = strndup_user(_restriction, PAGE_SIZE); if (IS_ERR(restriction)) { ret = PTR_ERR(restriction); goto error; } } else { if (_restriction) goto error; } ret = keyring_restrict(key_ref, _type ? type : NULL, restriction); kfree(restriction); error: key_ref_put(key_ref); return ret; } /* * The key control system call */ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { switch (option) { case KEYCTL_GET_KEYRING_ID: return keyctl_get_keyring_ID((key_serial_t) arg2, (int) arg3); case KEYCTL_JOIN_SESSION_KEYRING: return keyctl_join_session_keyring((const char __user *) arg2); case KEYCTL_UPDATE: return keyctl_update_key((key_serial_t) arg2, (const void __user *) arg3, (size_t) arg4); case KEYCTL_REVOKE: return keyctl_revoke_key((key_serial_t) arg2); case KEYCTL_DESCRIBE: return keyctl_describe_key((key_serial_t) arg2, (char __user *) arg3, (unsigned) arg4); case KEYCTL_CLEAR: return keyctl_keyring_clear((key_serial_t) arg2); case KEYCTL_LINK: return keyctl_keyring_link((key_serial_t) arg2, (key_serial_t) arg3); case KEYCTL_UNLINK: return keyctl_keyring_unlink((key_serial_t) arg2, (key_serial_t) arg3); case KEYCTL_SEARCH: return keyctl_keyring_search((key_serial_t) arg2, (const char __user *) arg3, (const char __user *) arg4, (key_serial_t) arg5); case KEYCTL_READ: return keyctl_read_key((key_serial_t) arg2, (char __user *) arg3, (size_t) arg4); case KEYCTL_CHOWN: return keyctl_chown_key((key_serial_t) arg2, (uid_t) arg3, (gid_t) arg4); case KEYCTL_SETPERM: return keyctl_setperm_key((key_serial_t) arg2, (key_perm_t) arg3); case KEYCTL_INSTANTIATE: return keyctl_instantiate_key((key_serial_t) arg2, (const void __user *) arg3, (size_t) arg4, (key_serial_t) arg5); case KEYCTL_NEGATE: return keyctl_negate_key((key_serial_t) arg2, (unsigned) arg3, (key_serial_t) arg4); case KEYCTL_SET_REQKEY_KEYRING: return keyctl_set_reqkey_keyring(arg2); case KEYCTL_SET_TIMEOUT: return keyctl_set_timeout((key_serial_t) arg2, (unsigned) arg3); case KEYCTL_ASSUME_AUTHORITY: return keyctl_assume_authority((key_serial_t) arg2); case KEYCTL_GET_SECURITY: return keyctl_get_security((key_serial_t) arg2, (char __user *) arg3, (size_t) arg4); case KEYCTL_SESSION_TO_PARENT: return keyctl_session_to_parent(); case KEYCTL_REJECT: return keyctl_reject_key((key_serial_t) arg2, (unsigned) arg3, (unsigned) arg4, (key_serial_t) arg5); case KEYCTL_INSTANTIATE_IOV: return keyctl_instantiate_key_iov( (key_serial_t) arg2, (const struct iovec __user *) arg3, (unsigned) arg4, (key_serial_t) arg5); case KEYCTL_INVALIDATE: return keyctl_invalidate_key((key_serial_t) arg2); case KEYCTL_GET_PERSISTENT: return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3); case KEYCTL_DH_COMPUTE: return keyctl_dh_compute((struct keyctl_dh_params __user *) arg2, (char __user *) arg3, (size_t) arg4, (struct keyctl_kdf_params __user *) arg5); case KEYCTL_RESTRICT_KEYRING: return keyctl_restrict_keyring((key_serial_t) arg2, (const char __user *) arg3, (const char __user *) arg4); default: return -EOPNOTSUPP; } }
645 645 6 23 23 23 564 2 562 564 564 8 564 2 564 564 563 8 564 564 8 564 562 564 2 564 564 126 126 126 126 125 126 126 4 126 1 126 126 126 4 126 126 125 126 126 126 4 126 125 126 1 126 126 126 126 116 116 116 116 112 112 112 1 1 1 9 381 378 379 142 142 142 23 142 142 142 142 142 142 10 10 1 10 10 10 10 10 10 25 16 16 25 17 24 8 7 3 2 1 24 25 17 17 17 38 37 38 27 27 27 27 651 652 652 652 16 173 156 17 652 652 652 636 652 652 651 625 625 648 648 648 238 647 361 648 650 378 377 377 141 123 123 118 122 141 141 140 140 140 126 126 126 3 3 3 112 2 112 112 2 2 112 112 112 112 112 112 112 112 105 112 112 4 4 4 4 4 4 4 4 4 4 4 157 4 156 157 157 157 157 157 157 157 157 157 157 157 154 153 153 153 130 130 130 130 130 130 1 130 129 130 130 130 130 130 130 116 130 1 1 131 152 33 44 45 45 45 126 86 40 40 126 126 126 126 102 102 116 116 116 129 1 129 129 1 600 600 600 25 25 25 25 1 6 3 2 3 3 1 1 1 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2002 Intel Corp. * * This file is part of the SCTP kernel implementation * * These functions work with the state functions in sctp_sm_statefuns.c * to implement the state operations. These functions implement the * steps which require modifying existing data structures. * * This SCTP implementation is free software; * you can redistribute it and/or modify it under the terms of * the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This SCTP implementation is distributed in the hope that it * will be useful, but WITHOUT ANY WARRANTY; without even the implied * ************************ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU CC; see the file COPYING. If not, see * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * C. Robin <chris@hundredacre.ac.uk> * Jon Grimm <jgrimm@us.ibm.com> * Xingang Guo <xingang.guo@intel.com> * Dajiang Zhang <dajiang.zhang@nokia.com> * Sridhar Samudrala <sri@us.ibm.com> * Daisy Chang <daisyc@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * Kevin Gao <kevin.gao@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/hash.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/net.h> #include <linux/inet.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <net/sock.h> #include <linux/skbuff.h> #include <linux/random.h> /* for get_random_bytes */ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp); static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, __u8 flags, int paylen, gfp_t gfp); static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp); static struct sctp_cookie_param *sctp_pack_cookie( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, int *cookie_len, const __u8 *raw_addrs, int addrs_len); static int sctp_process_param(struct sctp_association *asoc, union sctp_params param, const union sctp_addr *peer_addr, gfp_t gfp); static void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data); /* Control chunk destructor */ static void sctp_control_release_owner(struct sk_buff *skb) { struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg; if (chunk->shkey) { struct sctp_shared_key *shkey = chunk->shkey; struct sctp_association *asoc = chunk->asoc; /* refcnt == 2 and !list_empty mean after this release, it's * not being used anywhere, and it's time to notify userland * that this shkey can be freed if it's been deactivated. */ if (shkey->deactivated && !list_empty(&shkey->key_list) && refcount_read(&shkey->refcnt) == 2) { struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, SCTP_AUTH_FREE_KEY, GFP_KERNEL); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } sctp_auth_shkey_release(chunk->shkey); } } static void sctp_control_set_owner_w(struct sctp_chunk *chunk) { struct sctp_association *asoc = chunk->asoc; struct sk_buff *skb = chunk->skb; /* TODO: properly account for control chunks. * To do it right we'll need: * 1) endpoint if association isn't known. * 2) proper memory accounting. * * For now don't do anything for now. */ if (chunk->auth) { chunk->shkey = asoc->shkey; sctp_auth_shkey_hold(chunk->shkey); } skb->sk = asoc ? asoc->base.sk : NULL; skb_shinfo(skb)->destructor_arg = chunk; skb->destructor = sctp_control_release_owner; } /* What was the inbound interface for this chunk? */ int sctp_chunk_iif(const struct sctp_chunk *chunk) { struct sk_buff *skb = chunk->skb; return SCTP_INPUT_CB(skb)->af->skb_iif(skb); } /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 2: The ECN capable field is reserved for future use of * Explicit Congestion Notification. */ static const struct sctp_paramhdr ecap_param = { SCTP_PARAM_ECN_CAPABLE, cpu_to_be16(sizeof(struct sctp_paramhdr)), }; static const struct sctp_paramhdr prsctp_param = { SCTP_PARAM_FWD_TSN_SUPPORT, cpu_to_be16(sizeof(struct sctp_paramhdr)), }; /* A helper to initialize an op error inside a provided chunk, as most * cause codes will be embedded inside an abort chunk. */ int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, size_t paylen) { struct sctp_errhdr err; __u16 len; /* Cause code constants are now defined in network order. */ err.cause = cause_code; len = sizeof(err) + paylen; err.length = htons(len); if (skb_tailroom(chunk->skb) < len) return -ENOSPC; chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err); return 0; } /* 3.3.2 Initiation (INIT) (1) * * This chunk is used to initiate a SCTP association between two * endpoints. The format of the INIT chunk is shown below: * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 1 | Chunk Flags | Chunk Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Initiate Tag | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Advertised Receiver Window Credit (a_rwnd) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Number of Outbound Streams | Number of Inbound Streams | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Initial TSN | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * \ \ * / Optional/Variable-Length Parameters / * \ \ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * * The INIT chunk contains the following parameters. Unless otherwise * noted, each parameter MUST only be included once in the INIT chunk. * * Fixed Parameters Status * ---------------------------------------------- * Initiate Tag Mandatory * Advertised Receiver Window Credit Mandatory * Number of Outbound Streams Mandatory * Number of Inbound Streams Mandatory * Initial TSN Mandatory * * Variable Parameters Status Type Value * ------------------------------------------------------------- * IPv4 Address (Note 1) Optional 5 * IPv6 Address (Note 1) Optional 6 * Cookie Preservative Optional 9 * Reserved for ECN Capable (Note 2) Optional 32768 (0x8000) * Host Name Address (Note 3) Optional 11 * Supported Address Types (Note 4) Optional 12 */ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len) { struct net *net = sock_net(asoc->base.sk); struct sctp_supported_ext_param ext_param; struct sctp_adaptation_ind_param aiparam; struct sctp_paramhdr *auth_chunks = NULL; struct sctp_paramhdr *auth_hmacs = NULL; struct sctp_supported_addrs_param sat; struct sctp_endpoint *ep = asoc->ep; struct sctp_chunk *retval = NULL; int num_types, addrs_len = 0; struct sctp_inithdr init; union sctp_params addrs; struct sctp_sock *sp; __u8 extensions[5]; size_t chunksize; __be16 types[2]; int num_ext = 0; /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 1: The INIT chunks can contain multiple addresses that * can be IPv4 and/or IPv6 in any combination. */ /* Convert the provided bind address list to raw format. */ addrs = sctp_bind_addrs_to_raw(bp, &addrs_len, gfp); init.init_tag = htonl(asoc->c.my_vtag); init.a_rwnd = htonl(asoc->rwnd); init.num_outbound_streams = htons(asoc->c.sinit_num_ostreams); init.num_inbound_streams = htons(asoc->c.sinit_max_instreams); init.initial_tsn = htonl(asoc->c.initial_tsn); /* How many address types are needed? */ sp = sctp_sk(asoc->base.sk); num_types = sp->pf->supported_addrs(sp, types); chunksize = sizeof(init) + addrs_len; chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (asoc->prsctp_enable) chunksize += sizeof(prsctp_param); /* ADDIP: Section 4.2.7: * An implementation supporting this extension [ADDIP] MUST list * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and * INIT-ACK parameters. */ if (net->sctp.addip_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; } if (asoc->reconf_enable) { extensions[num_ext] = SCTP_CID_RECONF; num_ext += 1; } if (sp->adaptation_ind) chunksize += sizeof(aiparam); if (sp->strm_interleave) { extensions[num_ext] = SCTP_CID_I_DATA; num_ext += 1; } chunksize += vparam_len; /* Account for AUTH related parameters */ if (ep->auth_enable) { /* Add random parameter length*/ chunksize += sizeof(asoc->c.auth_random); /* Add HMACS parameter length if any were defined */ auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs; if (auth_hmacs->length) chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks; if (auth_chunks->length) chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; extensions[num_ext] = SCTP_CID_AUTH; num_ext += 1; } /* If we have any extensions to report, account for that */ if (num_ext) chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 3: An INIT chunk MUST NOT contain more than one Host * Name address parameter. Moreover, the sender of the INIT * MUST NOT combine any other address types with the Host Name * address in the INIT. The receiver of INIT MUST ignore any * other address types if the Host Name address parameter is * present in the received INIT chunk. * * PLEASE DO NOT FIXME [This version does not support Host Name.] */ retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize, gfp); if (!retval) goto nodata; retval->subh.init_hdr = sctp_addto_chunk(retval, sizeof(init), &init); retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v); /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 4: This parameter, when present, specifies all the * address types the sending endpoint can support. The absence * of this parameter indicates that the sending endpoint can * support any address type. */ sat.param_hdr.type = SCTP_PARAM_SUPPORTED_ADDRESS_TYPES; sat.param_hdr.length = htons(SCTP_SAT_LEN(num_types)); sctp_addto_chunk(retval, sizeof(sat), &sat); sctp_addto_chunk(retval, num_types * sizeof(__u16), &types); sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); /* Add the supported extensions parameter. Be nice and add this * fist before addiding the parameters for the extensions themselves */ if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } if (asoc->prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); if (sp->adaptation_ind) { aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; aiparam.param_hdr.length = htons(sizeof(aiparam)); aiparam.adaptation_ind = htonl(sp->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); } /* Add SCTP-AUTH chunks to the parameter list */ if (ep->auth_enable) { sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), asoc->c.auth_random); if (auth_hmacs) sctp_addto_chunk(retval, ntohs(auth_hmacs->length), auth_hmacs); if (auth_chunks) sctp_addto_chunk(retval, ntohs(auth_chunks->length), auth_chunks); } nodata: kfree(addrs.v); return retval; } struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, gfp_t gfp, int unkparam_len) { struct sctp_supported_ext_param ext_param; struct sctp_adaptation_ind_param aiparam; struct sctp_paramhdr *auth_chunks = NULL; struct sctp_paramhdr *auth_random = NULL; struct sctp_paramhdr *auth_hmacs = NULL; struct sctp_chunk *retval = NULL; struct sctp_cookie_param *cookie; struct sctp_inithdr initack; union sctp_params addrs; struct sctp_sock *sp; __u8 extensions[5]; size_t chunksize; int num_ext = 0; int cookie_len; int addrs_len; /* Note: there may be no addresses to embed. */ addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, gfp); initack.init_tag = htonl(asoc->c.my_vtag); initack.a_rwnd = htonl(asoc->rwnd); initack.num_outbound_streams = htons(asoc->c.sinit_num_ostreams); initack.num_inbound_streams = htons(asoc->c.sinit_max_instreams); initack.initial_tsn = htonl(asoc->c.initial_tsn); /* FIXME: We really ought to build the cookie right * into the packet instead of allocating more fresh memory. */ cookie = sctp_pack_cookie(asoc->ep, asoc, chunk, &cookie_len, addrs.v, addrs_len); if (!cookie) goto nomem_cookie; /* Calculate the total size of allocation, include the reserved * space for reporting unknown parameters if it is specified. */ sp = sctp_sk(asoc->base.sk); chunksize = sizeof(initack) + addrs_len + cookie_len + unkparam_len; /* Tell peer that we'll do ECN only if peer advertised such cap. */ if (asoc->peer.ecn_capable) chunksize += sizeof(ecap_param); if (asoc->peer.prsctp_capable) chunksize += sizeof(prsctp_param); if (asoc->peer.asconf_capable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; } if (asoc->peer.reconf_capable) { extensions[num_ext] = SCTP_CID_RECONF; num_ext += 1; } if (sp->adaptation_ind) chunksize += sizeof(aiparam); if (asoc->intl_enable) { extensions[num_ext] = SCTP_CID_I_DATA; num_ext += 1; } if (asoc->peer.auth_capable) { auth_random = (struct sctp_paramhdr *)asoc->c.auth_random; chunksize += ntohs(auth_random->length); auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs; if (auth_hmacs->length) chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks; if (auth_chunks->length) chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; extensions[num_ext] = SCTP_CID_AUTH; num_ext += 1; } if (num_ext) chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); if (!retval) goto nomem_chunk; /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it received the DATA or control chunk * to which it is replying. * * [INIT ACK back to where the INIT came from.] */ if (chunk->transport) retval->transport = sctp_assoc_lookup_paddr(asoc, &chunk->transport->ipaddr); retval->subh.init_hdr = sctp_addto_chunk(retval, sizeof(initack), &initack); retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v); sctp_addto_chunk(retval, cookie_len, cookie); if (asoc->peer.ecn_capable) sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } if (asoc->peer.prsctp_capable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); if (sp->adaptation_ind) { aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; aiparam.param_hdr.length = htons(sizeof(aiparam)); aiparam.adaptation_ind = htonl(sp->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); } if (asoc->peer.auth_capable) { sctp_addto_chunk(retval, ntohs(auth_random->length), auth_random); if (auth_hmacs) sctp_addto_chunk(retval, ntohs(auth_hmacs->length), auth_hmacs); if (auth_chunks) sctp_addto_chunk(retval, ntohs(auth_chunks->length), auth_chunks); } /* We need to remove the const qualifier at this point. */ retval->asoc = (struct sctp_association *) asoc; nomem_chunk: kfree(cookie); nomem_cookie: kfree(addrs.v); return retval; } /* 3.3.11 Cookie Echo (COOKIE ECHO) (10): * * This chunk is used only during the initialization of an association. * It is sent by the initiator of an association to its peer to complete * the initialization process. This chunk MUST precede any DATA chunk * sent within the association, but MAY be bundled with one or more DATA * chunks in the same packet. * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 10 |Chunk Flags | Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * / Cookie / * \ \ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Chunk Flags: 8 bit * * Set to zero on transmit and ignored on receipt. * * Length: 16 bits (unsigned integer) * * Set to the size of the chunk in bytes, including the 4 bytes of * the chunk header and the size of the Cookie. * * Cookie: variable size * * This field must contain the exact cookie received in the * State Cookie parameter from the previous INIT ACK. * * An implementation SHOULD make the cookie as small as possible * to insure interoperability. */ struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_chunk *retval; int cookie_len; void *cookie; cookie = asoc->peer.cookie; cookie_len = asoc->peer.cookie_len; /* Build a cookie echo chunk. */ retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len, GFP_ATOMIC); if (!retval) goto nodata; retval->subh.cookie_hdr = sctp_addto_chunk(retval, cookie_len, cookie); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [COOKIE ECHO back to where the INIT ACK came from.] */ if (chunk) retval->transport = chunk->transport; nodata: return retval; } /* 3.3.12 Cookie Acknowledgement (COOKIE ACK) (11): * * This chunk is used only during the initialization of an * association. It is used to acknowledge the receipt of a COOKIE * ECHO chunk. This chunk MUST precede any DATA or SACK chunk sent * within the association, but MAY be bundled with one or more DATA * chunks or SACK chunk in the same SCTP packet. * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 11 |Chunk Flags | Length = 4 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Chunk Flags: 8 bits * * Set to zero on transmit and ignored on receipt. */ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0, GFP_ATOMIC); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [COOKIE ACK back to where the COOKIE ECHO came from.] */ if (retval && chunk && chunk->transport) retval->transport = sctp_assoc_lookup_paddr(asoc, &chunk->transport->ipaddr); return retval; } /* * Appendix A: Explicit Congestion Notification: * CWR: * * RFC 2481 details a specific bit for a sender to send in the header of * its next outbound TCP segment to indicate to its peer that it has * reduced its congestion window. This is termed the CWR bit. For * SCTP the same indication is made by including the CWR chunk. * This chunk contains one data element, i.e. the TSN number that * was sent in the ECNE chunk. This element represents the lowest * TSN number in the datagram that was originally marked with the * CE bit. * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Chunk Type=13 | Flags=00000000| Chunk Length = 8 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Lowest TSN Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Note: The CWR is considered a Control chunk. */ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, const struct sctp_chunk *chunk) { struct sctp_chunk *retval; struct sctp_cwrhdr cwr; cwr.lowest_tsn = htonl(lowest_tsn); retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0, sizeof(cwr), GFP_ATOMIC); if (!retval) goto nodata; retval->subh.ecn_cwr_hdr = sctp_addto_chunk(retval, sizeof(cwr), &cwr); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [Report a reduced congestion window back to where the ECNE * came from.] */ if (chunk) retval->transport = chunk->transport; nodata: return retval; } /* Make an ECNE chunk. This is a congestion experienced report. */ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn) { struct sctp_chunk *retval; struct sctp_ecnehdr ecne; ecne.lowest_tsn = htonl(lowest_tsn); retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0, sizeof(ecne), GFP_ATOMIC); if (!retval) goto nodata; retval->subh.ecne_hdr = sctp_addto_chunk(retval, sizeof(ecne), &ecne); nodata: return retval; } /* Make a DATA chunk for the given association from the provided * parameters. However, do not populate the data payload. */ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, const struct sctp_sndrcvinfo *sinfo, int len, __u8 flags, gfp_t gfp) { struct sctp_chunk *retval; struct sctp_datahdr dp; /* We assign the TSN as LATE as possible, not here when * creating the chunk. */ memset(&dp, 0, sizeof(dp)); dp.ppid = sinfo->sinfo_ppid; dp.stream = htons(sinfo->sinfo_stream); /* Set the flags for an unordered send. */ if (sinfo->sinfo_flags & SCTP_UNORDERED) flags |= SCTP_DATA_UNORDERED; retval = sctp_make_data(asoc, flags, sizeof(dp) + len, gfp); if (!retval) return NULL; retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); return retval; } /* Create a selective ackowledgement (SACK) for the given * association. This reports on which TSN's we've seen to date, * including duplicates and gaps. */ struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc) { struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; __u16 num_gabs, num_dup_tsns; struct sctp_transport *trans; struct sctp_chunk *retval; struct sctp_sackhdr sack; __u32 ctsn; int len; memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); pr_debug("%s: sackCTSNAck sent:0x%x\n", __func__, ctsn); /* How much room is needed in the chunk? */ num_gabs = sctp_tsnmap_num_gabs(map, gabs); num_dup_tsns = sctp_tsnmap_num_dups(map); /* Initialize the SACK header. */ sack.cum_tsn_ack = htonl(ctsn); sack.a_rwnd = htonl(asoc->a_rwnd); sack.num_gap_ack_blocks = htons(num_gabs); sack.num_dup_tsns = htons(num_dup_tsns); len = sizeof(sack) + sizeof(struct sctp_gap_ack_block) * num_gabs + sizeof(__u32) * num_dup_tsns; /* Create the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len, GFP_ATOMIC); if (!retval) goto nodata; /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, etc.) to the same destination transport * address from which it received the DATA or control chunk to * which it is replying. This rule should also be followed if * the endpoint is bundling DATA chunks together with the * reply chunk. * * However, when acknowledging multiple DATA chunks received * in packets from different source addresses in a single * SACK, the SACK chunk may be transmitted to one of the * destination transport addresses from which the DATA or * control chunks being acknowledged were received. * * [BUG: We do not implement the following paragraph. * Perhaps we should remember the last transport we used for a * SACK and avoid that (if possible) if we have seen any * duplicates. --piggy] * * When a receiver of a duplicate DATA chunk sends a SACK to a * multi- homed endpoint it MAY be beneficial to vary the * destination address and not use the source address of the * DATA chunk. The reason being that receiving a duplicate * from a multi-homed endpoint might indicate that the return * path (as specified in the source address of the DATA chunk) * for the SACK is broken. * * [Send to the address from which we last received a DATA chunk.] */ retval->transport = asoc->peer.last_data_from; retval->subh.sack_hdr = sctp_addto_chunk(retval, sizeof(sack), &sack); /* Add the gap ack block information. */ if (num_gabs) sctp_addto_chunk(retval, sizeof(__u32) * num_gabs, gabs); /* Add the duplicate TSN information. */ if (num_dup_tsns) { asoc->stats.idupchunks += num_dup_tsns; sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns, sctp_tsnmap_get_dups(map)); } /* Once we have a sack generated, check to see what our sack * generation is, if its 0, reset the transports to 0, and reset * the association generation to 1 * * The idea is that zero is never used as a valid generation for the * association so no transport will match after a wrap event like this, * Until the next sack */ if (++asoc->peer.sack_generation == 0) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) trans->sack_generation = 0; asoc->peer.sack_generation = 1; } nodata: return retval; } /* Make a SHUTDOWN chunk. */ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_shutdownhdr shut; struct sctp_chunk *retval; __u32 ctsn; if (chunk && chunk->asoc) ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map); else ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); shut.cum_tsn_ack = htonl(ctsn); retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, sizeof(shut), GFP_ATOMIC); if (!retval) goto nodata; retval->subh.shutdown_hdr = sctp_addto_chunk(retval, sizeof(shut), &shut); if (chunk) retval->transport = chunk->transport; nodata: return retval; } struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0, GFP_ATOMIC); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [ACK back to where the SHUTDOWN came from.] */ if (retval && chunk) retval->transport = chunk->transport; return retval; } struct sctp_chunk *sctp_make_shutdown_complete( const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_chunk *retval; __u8 flags = 0; /* Set the T-bit if we have no association (vtag will be * reflected) */ flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T; retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0, GFP_ATOMIC); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [Report SHUTDOWN COMPLETE back to where the SHUTDOWN ACK * came from.] */ if (retval && chunk) retval->transport = chunk->transport; return retval; } /* Create an ABORT. Note that we set the T bit if we have no * association, except when responding to an INIT (sctpimpguide 2.41). */ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, const struct sctp_chunk *chunk, const size_t hint) { struct sctp_chunk *retval; __u8 flags = 0; /* Set the T-bit if we have no association and 'chunk' is not * an INIT (vtag will be reflected). */ if (!asoc) { if (chunk && chunk->chunk_hdr && chunk->chunk_hdr->type == SCTP_CID_INIT) flags = 0; else flags = SCTP_CHUNK_FLAG_T; } retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint, GFP_ATOMIC); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [ABORT back to where the offender came from.] */ if (retval && chunk) retval->transport = chunk->transport; return retval; } /* Helper to create ABORT with a NO_USER_DATA error. */ struct sctp_chunk *sctp_make_abort_no_data( const struct sctp_association *asoc, const struct sctp_chunk *chunk, __u32 tsn) { struct sctp_chunk *retval; __be32 payload; retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) + sizeof(tsn)); if (!retval) goto no_mem; /* Put the tsn back into network byte order. */ payload = htonl(tsn); sctp_init_cause(retval, SCTP_ERROR_NO_DATA, sizeof(payload)); sctp_addto_chunk(retval, sizeof(payload), (const void *)&payload); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [ABORT back to where the offender came from.] */ if (chunk) retval->transport = chunk->transport; no_mem: return retval; } /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error. */ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, struct msghdr *msg, size_t paylen) { struct sctp_chunk *retval; void *payload = NULL; int err; retval = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr) + paylen); if (!retval) goto err_chunk; if (paylen) { /* Put the msg_iov together into payload. */ payload = kmalloc(paylen, GFP_KERNEL); if (!payload) goto err_payload; err = memcpy_from_msg(payload, msg, paylen); if (err < 0) goto err_copy; } sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, paylen); sctp_addto_chunk(retval, paylen, payload); if (paylen) kfree(payload); return retval; err_copy: kfree(payload); err_payload: sctp_chunk_free(retval); retval = NULL; err_chunk: return retval; } /* Append bytes to the end of a parameter. Will panic if chunk is not big * enough. */ static void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data) { int chunklen = ntohs(chunk->chunk_hdr->length); void *target; target = skb_put(chunk->skb, len); if (data) memcpy(target, data, len); else memset(target, 0, len); /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + len); chunk->chunk_end = skb_tail_pointer(chunk->skb); return target; } /* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */ struct sctp_chunk *sctp_make_abort_violation( const struct sctp_association *asoc, const struct sctp_chunk *chunk, const __u8 *payload, const size_t paylen) { struct sctp_chunk *retval; struct sctp_paramhdr phdr; retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) + paylen + sizeof(phdr)); if (!retval) goto end; sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen + sizeof(phdr)); phdr.type = htons(chunk->chunk_hdr->type); phdr.length = chunk->chunk_hdr->length; sctp_addto_chunk(retval, paylen, payload); sctp_addto_param(retval, sizeof(phdr), &phdr); end: return retval; } struct sctp_chunk *sctp_make_violation_paramlen( const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_paramhdr *param) { static const char error[] = "The following parameter had invalid length:"; size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) + sizeof(*param); struct sctp_chunk *retval; retval = sctp_make_abort(asoc, chunk, payload_len); if (!retval) goto nodata; sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, sizeof(error) + sizeof(*param)); sctp_addto_chunk(retval, sizeof(error), error); sctp_addto_param(retval, sizeof(*param), param); nodata: return retval; } struct sctp_chunk *sctp_make_violation_max_retrans( const struct sctp_association *asoc, const struct sctp_chunk *chunk) { static const char error[] = "Association exceeded its max_retrans count"; size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr); struct sctp_chunk *retval; retval = sctp_make_abort(asoc, chunk, payload_len); if (!retval) goto nodata; sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, sizeof(error)); sctp_addto_chunk(retval, sizeof(error), error); nodata: return retval; } /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport) { struct sctp_sender_hb_info hbinfo; struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo), GFP_ATOMIC); if (!retval) goto nodata; hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO; hbinfo.param_hdr.length = htons(sizeof(hbinfo)); hbinfo.daddr = transport->ipaddr; hbinfo.sent_at = jiffies; hbinfo.hb_nonce = transport->hb_nonce; /* Cast away the 'const', as this is just telling the chunk * what transport it belongs to. */ retval->transport = (struct sctp_transport *) transport; retval->subh.hbs_hdr = sctp_addto_chunk(retval, sizeof(hbinfo), &hbinfo); nodata: return retval; } struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, const void *payload, const size_t paylen) { struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen, GFP_ATOMIC); if (!retval) goto nodata; retval->subh.hbs_hdr = sctp_addto_chunk(retval, paylen, payload); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, * etc.) to the same destination transport * address from which it * received the DATA or control chunk * to which it is replying. * * [HBACK back to where the HEARTBEAT came from.] */ if (chunk) retval->transport = chunk->transport; nodata: return retval; } /* Create an Operation Error chunk with the specified space reserved. * This routine can be used for containing multiple causes in the chunk. */ static struct sctp_chunk *sctp_make_op_error_space( const struct sctp_association *asoc, const struct sctp_chunk *chunk, size_t size) { struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0, sizeof(struct sctp_errhdr) + size, GFP_ATOMIC); if (!retval) goto nodata; /* RFC 2960 6.4 Multi-homed SCTP Endpoints * * An endpoint SHOULD transmit reply chunks (e.g., SACK, * HEARTBEAT ACK, etc.) to the same destination transport * address from which it received the DATA or control chunk * to which it is replying. * */ if (chunk) retval->transport = chunk->transport; nodata: return retval; } /* Create an Operation Error chunk of a fixed size, specifically, * min(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) - overheads. * This is a helper function to allocate an error chunk for for those * invalid parameter codes in which we may not want to report all the * errors, if the incoming chunk is large. If it can't fit in a single * packet, we ignore it. */ static inline struct sctp_chunk *sctp_make_op_error_limited( const struct sctp_association *asoc, const struct sctp_chunk *chunk) { size_t size = SCTP_DEFAULT_MAXSEGMENT; struct sctp_sock *sp = NULL; if (asoc) { size = min_t(size_t, size, asoc->pathmtu); sp = sctp_sk(asoc->base.sk); } size = sctp_mtu_payload(sp, size, sizeof(struct sctp_errhdr)); return sctp_make_op_error_space(asoc, chunk, size); } /* Create an Operation Error chunk. */ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, size_t paylen, size_t reserve_tail) { struct sctp_chunk *retval; retval = sctp_make_op_error_space(asoc, chunk, paylen + reserve_tail); if (!retval) goto nodata; sctp_init_cause(retval, cause_code, paylen + reserve_tail); sctp_addto_chunk(retval, paylen, payload); if (reserve_tail) sctp_addto_param(retval, reserve_tail, NULL); nodata: return retval; } struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc, __u16 key_id) { struct sctp_authhdr auth_hdr; struct sctp_hmac *hmac_desc; struct sctp_chunk *retval; /* Get the first hmac that the peer told us to use */ hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (unlikely(!hmac_desc)) return NULL; retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0, hmac_desc->hmac_len + sizeof(auth_hdr), GFP_ATOMIC); if (!retval) return NULL; auth_hdr.hmac_id = htons(hmac_desc->hmac_id); auth_hdr.shkey_id = htons(key_id); retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), &auth_hdr); skb_put_zero(retval->skb, hmac_desc->hmac_len); /* Adjust the chunk header to include the empty MAC */ retval->chunk_hdr->length = htons(ntohs(retval->chunk_hdr->length) + hmac_desc->hmac_len); retval->chunk_end = skb_tail_pointer(retval->skb); return retval; } /******************************************************************** * 2nd Level Abstractions ********************************************************************/ /* Turn an skb into a chunk. * FIXME: Eventually move the structure directly inside the skb->cb[]. * * sctpimpguide-05.txt Section 2.8.2 * M1) Each time a new DATA chunk is transmitted * set the 'TSN.Missing.Report' count for that TSN to 0. The * 'TSN.Missing.Report' count will be used to determine missing chunks * and when to fast retransmit. * */ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, const struct sctp_association *asoc, struct sock *sk, gfp_t gfp) { struct sctp_chunk *retval; retval = kmem_cache_zalloc(sctp_chunk_cachep, gfp); if (!retval) goto nodata; if (!sk) pr_debug("%s: chunkifying skb:%p w/o an sk\n", __func__, skb); INIT_LIST_HEAD(&retval->list); retval->skb = skb; retval->asoc = (struct sctp_association *)asoc; retval->singleton = 1; retval->fast_retransmit = SCTP_CAN_FRTX; /* Polish the bead hole. */ INIT_LIST_HEAD(&retval->transmitted_list); INIT_LIST_HEAD(&retval->frag_list); SCTP_DBG_OBJCNT_INC(chunk); refcount_set(&retval->refcnt, 1); nodata: return retval; } /* Set chunk->source and dest based on the IP header in chunk->skb. */ void sctp_init_addrs(struct sctp_chunk *chunk, union sctp_addr *src, union sctp_addr *dest) { memcpy(&chunk->source, src, sizeof(union sctp_addr)); memcpy(&chunk->dest, dest, sizeof(union sctp_addr)); } /* Extract the source address from a chunk. */ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk) { /* If we have a known transport, use that. */ if (chunk->transport) { return &chunk->transport->ipaddr; } else { /* Otherwise, extract it from the IP header. */ return &chunk->source; } } /* Create a new chunk, setting the type and flags headers from the * arguments, reserving enough space for a 'paylen' byte payload. */ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp) { struct sctp_chunkhdr *chunk_hdr; struct sctp_chunk *retval; struct sk_buff *skb; struct sock *sk; int chunklen; chunklen = SCTP_PAD4(sizeof(*chunk_hdr) + paylen); if (chunklen > SCTP_MAX_CHUNK_LEN) goto nodata; /* No need to allocate LL here, as this is only a chunk. */ skb = alloc_skb(chunklen, gfp); if (!skb) goto nodata; /* Make room for the chunk header. */ chunk_hdr = (struct sctp_chunkhdr *)skb_put(skb, sizeof(*chunk_hdr)); chunk_hdr->type = type; chunk_hdr->flags = flags; chunk_hdr->length = htons(sizeof(*chunk_hdr)); sk = asoc ? asoc->base.sk : NULL; retval = sctp_chunkify(skb, asoc, sk, gfp); if (!retval) { kfree_skb(skb); goto nodata; } retval->chunk_hdr = chunk_hdr; retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(*chunk_hdr); /* Determine if the chunk needs to be authenticated */ if (sctp_auth_send_cid(type, asoc)) retval->auth = 1; return retval; nodata: return NULL; } static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, __u8 flags, int paylen, gfp_t gfp) { return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp); } struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc, __u8 flags, int paylen, gfp_t gfp) { return _sctp_make_chunk(asoc, SCTP_CID_I_DATA, flags, paylen, gfp); } static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp) { struct sctp_chunk *chunk; chunk = _sctp_make_chunk(asoc, type, flags, paylen, gfp); if (chunk) sctp_control_set_owner_w(chunk); return chunk; } /* Release the memory occupied by a chunk. */ static void sctp_chunk_destroy(struct sctp_chunk *chunk) { BUG_ON(!list_empty(&chunk->list)); list_del_init(&chunk->transmitted_list); consume_skb(chunk->skb); consume_skb(chunk->auth_chunk); SCTP_DBG_OBJCNT_DEC(chunk); kmem_cache_free(sctp_chunk_cachep, chunk); } /* Possibly, free the chunk. */ void sctp_chunk_free(struct sctp_chunk *chunk) { /* Release our reference on the message tracker. */ if (chunk->msg) sctp_datamsg_put(chunk->msg); sctp_chunk_put(chunk); } /* Grab a reference to the chunk. */ void sctp_chunk_hold(struct sctp_chunk *ch) { refcount_inc(&ch->refcnt); } /* Release a reference to the chunk. */ void sctp_chunk_put(struct sctp_chunk *ch) { if (refcount_dec_and_test(&ch->refcnt)) sctp_chunk_destroy(ch); } /* Append bytes to the end of a chunk. Will panic if chunk is not big * enough. */ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) { int chunklen = ntohs(chunk->chunk_hdr->length); int padlen = SCTP_PAD4(chunklen) - chunklen; void *target; skb_put_zero(chunk->skb, padlen); target = skb_put_data(chunk->skb, data, len); /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + padlen + len); chunk->chunk_end = skb_tail_pointer(chunk->skb); return target; } /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. */ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len, struct iov_iter *from) { void *target; /* Make room in chunk for data. */ target = skb_put(chunk->skb, len); /* Copy data (whole iovec) into chunk */ if (!copy_from_iter_full(target, len, from)) return -EFAULT; /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(ntohs(chunk->chunk_hdr->length) + len); chunk->chunk_end = skb_tail_pointer(chunk->skb); return 0; } /* Helper function to assign a TSN if needed. This assumes that both * the data_hdr and association have already been assigned. */ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) { struct sctp_stream *stream; struct sctp_chunk *lchunk; struct sctp_datamsg *msg; __u16 ssn, sid; if (chunk->has_ssn) return; /* All fragments will be on the same stream */ sid = ntohs(chunk->subh.data_hdr->stream); stream = &chunk->asoc->stream; /* Now assign the sequence number to the entire message. * All fragments must have the same stream sequence number. */ msg = chunk->msg; list_for_each_entry(lchunk, &msg->chunks, frag_list) { if (lchunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { ssn = 0; } else { if (lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG) ssn = sctp_ssn_next(stream, out, sid); else ssn = sctp_ssn_peek(stream, out, sid); } lchunk->subh.data_hdr->ssn = htons(ssn); lchunk->has_ssn = 1; } } /* Helper function to assign a TSN if needed. This assumes that both * the data_hdr and association have already been assigned. */ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk) { if (!chunk->has_tsn) { /* This is the last possible instant to * assign a TSN. */ chunk->subh.data_hdr->tsn = htonl(sctp_association_get_next_tsn(chunk->asoc)); chunk->has_tsn = 1; } } /* Create a CLOSED association to use with an incoming packet. */ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, struct sctp_chunk *chunk, gfp_t gfp) { struct sctp_association *asoc; enum sctp_scope scope; struct sk_buff *skb; /* Create the bare association. */ scope = sctp_scope(sctp_source(chunk)); asoc = sctp_association_new(ep, ep->base.sk, scope, gfp); if (!asoc) goto nodata; asoc->temp = 1; skb = chunk->skb; /* Create an entry for the source address of the packet. */ SCTP_INPUT_CB(skb)->af->from_skb(&asoc->c.peer_addr, skb, 1); nodata: return asoc; } /* Build a cookie representing asoc. * This INCLUDES the param header needed to put the cookie in the INIT ACK. */ static struct sctp_cookie_param *sctp_pack_cookie( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, int *cookie_len, const __u8 *raw_addrs, int addrs_len) { struct sctp_signed_cookie *cookie; struct sctp_cookie_param *retval; int headersize, bodysize; /* Header size is static data prior to the actual cookie, including * any padding. */ headersize = sizeof(struct sctp_paramhdr) + (sizeof(struct sctp_signed_cookie) - sizeof(struct sctp_cookie)); bodysize = sizeof(struct sctp_cookie) + ntohs(init_chunk->chunk_hdr->length) + addrs_len; /* Pad out the cookie to a multiple to make the signature * functions simpler to write. */ if (bodysize % SCTP_COOKIE_MULTIPLE) bodysize += SCTP_COOKIE_MULTIPLE - (bodysize % SCTP_COOKIE_MULTIPLE); *cookie_len = headersize + bodysize; /* Clear this memory since we are sending this data structure * out on the network. */ retval = kzalloc(*cookie_len, GFP_ATOMIC); if (!retval) goto nodata; cookie = (struct sctp_signed_cookie *) retval->body; /* Set up the parameter header. */ retval->p.type = SCTP_PARAM_STATE_COOKIE; retval->p.length = htons(*cookie_len); /* Copy the cookie part of the association itself. */ cookie->c = asoc->c; /* Save the raw address list length in the cookie. */ cookie->c.raw_addr_list_len = addrs_len; /* Remember PR-SCTP capability. */ cookie->c.prsctp_capable = asoc->peer.prsctp_capable; /* Save adaptation indication in the cookie. */ cookie->c.adaptation_ind = asoc->peer.adaptation_ind; /* Set an expiration time for the cookie. */ cookie->c.expiration = ktime_add(asoc->cookie_life, ktime_get_real()); /* Copy the peer's init packet. */ memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, ntohs(init_chunk->chunk_hdr->length)); /* Copy the raw local address list of the association. */ memcpy((__u8 *)&cookie->c.peer_init[0] + ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len); if (sctp_sk(ep->base.sk)->hmac) { SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac); int err; /* Sign the message. */ desc->tfm = sctp_sk(ep->base.sk)->hmac; desc->flags = 0; err = crypto_shash_setkey(desc->tfm, ep->secret_key, sizeof(ep->secret_key)) ?: crypto_shash_digest(desc, (u8 *)&cookie->c, bodysize, cookie->signature); shash_desc_zero(desc); if (err) goto free_cookie; } return retval; free_cookie: kfree(retval); nodata: *cookie_len = 0; return NULL; } /* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */ struct sctp_association *sctp_unpack_cookie( const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, gfp_t gfp, int *error, struct sctp_chunk **errp) { struct sctp_association *retval = NULL; int headersize, bodysize, fixed_size; struct sctp_signed_cookie *cookie; struct sk_buff *skb = chunk->skb; struct sctp_cookie *bear_cookie; __u8 *digest = ep->digest; enum sctp_scope scope; unsigned int len; ktime_t kt; /* Header size is static data prior to the actual cookie, including * any padding. */ headersize = sizeof(struct sctp_chunkhdr) + (sizeof(struct sctp_signed_cookie) - sizeof(struct sctp_cookie)); bodysize = ntohs(chunk->chunk_hdr->length) - headersize; fixed_size = headersize + sizeof(struct sctp_cookie); /* Verify that the chunk looks like it even has a cookie. * There must be enough room for our cookie and our peer's * INIT chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len < fixed_size + sizeof(struct sctp_chunkhdr)) goto malformed; /* Verify that the cookie has been padded out. */ if (bodysize % SCTP_COOKIE_MULTIPLE) goto malformed; /* Process the cookie. */ cookie = chunk->subh.cookie_hdr; bear_cookie = &cookie->c; if (!sctp_sk(ep->base.sk)->hmac) goto no_hmac; /* Check the signature. */ { SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac); int err; desc->tfm = sctp_sk(ep->base.sk)->hmac; desc->flags = 0; err = crypto_shash_setkey(desc->tfm, ep->secret_key, sizeof(ep->secret_key)) ?: crypto_shash_digest(desc, (u8 *)bear_cookie, bodysize, digest); shash_desc_zero(desc); if (err) { *error = -SCTP_IERROR_NOMEM; goto fail; } } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { *error = -SCTP_IERROR_BAD_SIG; goto fail; } no_hmac: /* IG Section 2.35.2: * 3) Compare the port numbers and the verification tag contained * within the COOKIE ECHO chunk to the actual port numbers and the * verification tag within the SCTP common header of the received * packet. If these values do not match the packet MUST be silently * discarded, */ if (ntohl(chunk->sctp_hdr->vtag) != bear_cookie->my_vtag) { *error = -SCTP_IERROR_BAD_TAG; goto fail; } if (chunk->sctp_hdr->source != bear_cookie->peer_addr.v4.sin_port || ntohs(chunk->sctp_hdr->dest) != bear_cookie->my_port) { *error = -SCTP_IERROR_BAD_PORTS; goto fail; } /* Check to see if the cookie is stale. If there is already * an association, there is no need to check cookie's expiration * for init collision case of lost COOKIE ACK. * If skb has been timestamped, then use the stamp, otherwise * use current time. This introduces a small possibility that * that a cookie may be considered expired, but his would only slow * down the new association establishment instead of every packet. */ if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) kt = skb_get_ktime(skb); else kt = ktime_get_real(); if (!asoc && ktime_before(bear_cookie->expiration, kt)) { suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration)); __be32 n = htonl(usecs); /* * Section 3.3.10.3 Stale Cookie Error (3) * * Cause of error * --------------- * Stale Cookie Error: Indicates the receipt of a valid State * Cookie that has expired. */ *errp = sctp_make_op_error(asoc, chunk, SCTP_ERROR_STALE_COOKIE, &n, sizeof(n), 0); if (*errp) *error = -SCTP_IERROR_STALE_COOKIE; else *error = -SCTP_IERROR_NOMEM; goto fail; } /* Make a new base association. */ scope = sctp_scope(sctp_source(chunk)); retval = sctp_association_new(ep, ep->base.sk, scope, gfp); if (!retval) { *error = -SCTP_IERROR_NOMEM; goto fail; } /* Set up our peer's port number. */ retval->peer.port = ntohs(chunk->sctp_hdr->source); /* Populate the association from the cookie. */ memcpy(&retval->c, bear_cookie, sizeof(*bear_cookie)); if (sctp_assoc_set_bind_addr_from_cookie(retval, bear_cookie, GFP_ATOMIC) < 0) { *error = -SCTP_IERROR_NOMEM; goto fail; } /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, sizeof(chunk->dest), SCTP_ADDR_SRC, GFP_ATOMIC); } retval->next_tsn = retval->c.initial_tsn; retval->ctsn_ack_point = retval->next_tsn - 1; retval->addip_serial = retval->c.initial_tsn; retval->strreset_outseq = retval->c.initial_tsn; retval->adv_peer_ack_point = retval->ctsn_ack_point; retval->peer.prsctp_capable = retval->c.prsctp_capable; retval->peer.adaptation_ind = retval->c.adaptation_ind; /* The INIT stuff will be done by the side effects. */ return retval; fail: if (retval) sctp_association_free(retval); return NULL; malformed: /* Yikes! The packet is either corrupt or deliberately * malformed. */ *error = -SCTP_IERROR_MALFORMED; goto fail; } /******************************************************************** * 3rd Level Abstractions ********************************************************************/ struct __sctp_missing { __be32 num_missing; __be16 type; } __packed; /* * Report a missing mandatory parameter. */ static int sctp_process_missing_param(const struct sctp_association *asoc, enum sctp_param paramtype, struct sctp_chunk *chunk, struct sctp_chunk **errp) { struct __sctp_missing report; __u16 len; len = SCTP_PAD4(sizeof(report)); /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. */ if (!*errp) *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { report.num_missing = htonl(1); report.type = paramtype; sctp_init_cause(*errp, SCTP_ERROR_MISS_PARAM, sizeof(report)); sctp_addto_chunk(*errp, sizeof(report), &report); } /* Stop processing this chunk. */ return 0; } /* Report an Invalid Mandatory Parameter. */ static int sctp_process_inv_mandatory(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_chunk **errp) { /* Invalid Mandatory Parameter Error has no payload. */ if (!*errp) *errp = sctp_make_op_error_space(asoc, chunk, 0); if (*errp) sctp_init_cause(*errp, SCTP_ERROR_INV_PARAM, 0); /* Stop processing this chunk. */ return 0; } static int sctp_process_inv_paramlength(const struct sctp_association *asoc, struct sctp_paramhdr *param, const struct sctp_chunk *chunk, struct sctp_chunk **errp) { /* This is a fatal error. Any accumulated non-fatal errors are * not reported. */ if (*errp) sctp_chunk_free(*errp); /* Create an error chunk and fill it in with our payload. */ *errp = sctp_make_violation_paramlen(asoc, chunk, param); return 0; } /* Do not attempt to handle the HOST_NAME parm. However, do * send back an indicator to the peer. */ static int sctp_process_hn_param(const struct sctp_association *asoc, union sctp_params param, struct sctp_chunk *chunk, struct sctp_chunk **errp) { __u16 len = ntohs(param.p->length); /* Processing of the HOST_NAME parameter will generate an * ABORT. If we've accumulated any non-fatal errors, they * would be unrecognized parameters and we should not include * them in the ABORT. */ if (*errp) sctp_chunk_free(*errp); *errp = sctp_make_op_error(asoc, chunk, SCTP_ERROR_DNS_FAILED, param.v, len, 0); /* Stop processing this chunk. */ return 0; } static int sctp_verify_ext_param(struct net *net, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); int have_asconf = 0; int have_auth = 0; int i; for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { case SCTP_CID_AUTH: have_auth = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: have_asconf = 1; break; } } /* ADD-IP Security: The draft requires us to ABORT or ignore the * INIT/INIT-ACK if ADD-IP is listed, but AUTH is not. Do this * only if ADD-IP is turned on and we are not backward-compatible * mode. */ if (net->sctp.addip_noauth) return 1; if (net->sctp.addip_enable && !have_auth && have_asconf) return 0; return 1; } static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); struct net *net = sock_net(asoc->base.sk); int i; for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { case SCTP_CID_RECONF: if (asoc->reconf_enable && !asoc->peer.reconf_capable) asoc->peer.reconf_capable = 1; break; case SCTP_CID_FWD_TSN: if (asoc->prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; break; case SCTP_CID_AUTH: /* if the peer reports AUTH, assume that he * supports AUTH. */ if (asoc->ep->auth_enable) asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: if (net->sctp.addip_enable) asoc->peer.asconf_capable = 1; break; case SCTP_CID_I_DATA: if (sctp_sk(asoc->base.sk)->strm_interleave) asoc->intl_enable = 1; break; default: break; } } } /* RFC 3.2.1 & the Implementers Guide 2.2. * * The Parameter Types are encoded such that the * highest-order two bits specify the action that must be * taken if the processing endpoint does not recognize the * Parameter Type. * * 00 - Stop processing this parameter; do not process any further * parameters within this chunk * * 01 - Stop processing this parameter, do not process any further * parameters within this chunk, and report the unrecognized * parameter in an 'Unrecognized Parameter' ERROR chunk. * * 10 - Skip this parameter and continue processing. * * 11 - Skip this parameter and continue processing but * report the unrecognized parameter in an * 'Unrecognized Parameter' ERROR chunk. * * Return value: * SCTP_IERROR_NO_ERROR - continue with the chunk * SCTP_IERROR_ERROR - stop and report an error. * SCTP_IERROR_NOMEME - out of memory. */ static enum sctp_ierror sctp_process_unk_param( const struct sctp_association *asoc, union sctp_params param, struct sctp_chunk *chunk, struct sctp_chunk **errp) { int retval = SCTP_IERROR_NO_ERROR; switch (param.p->type & SCTP_PARAM_ACTION_MASK) { case SCTP_PARAM_ACTION_DISCARD: retval = SCTP_IERROR_ERROR; break; case SCTP_PARAM_ACTION_SKIP: break; case SCTP_PARAM_ACTION_DISCARD_ERR: retval = SCTP_IERROR_ERROR; /* Fall through */ case SCTP_PARAM_ACTION_SKIP_ERR: /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. */ if (!*errp) { *errp = sctp_make_op_error_limited(asoc, chunk); if (!*errp) { /* If there is no memory for generating the * ERROR report as specified, an ABORT will be * triggered to the peer and the association * won't be established. */ retval = SCTP_IERROR_NOMEM; break; } } if (!sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, ntohs(param.p->length))) sctp_addto_chunk(*errp, ntohs(param.p->length), param.v); break; default: break; } return retval; } /* Verify variable length parameters * Return values: * SCTP_IERROR_ABORT - trigger an ABORT * SCTP_IERROR_NOMEM - out of memory (abort) * SCTP_IERROR_ERROR - stop processing, trigger an ERROR * SCTP_IERROR_NO_ERROR - continue with the chunk */ static enum sctp_ierror sctp_verify_param(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, union sctp_params param, enum sctp_cid cid, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk) { struct sctp_hmac_algo_param *hmacs; int retval = SCTP_IERROR_NO_ERROR; __u16 n_elt, id = 0; int i; /* FIXME - This routine is not looking at each parameter per the * chunk type, i.e., unrecognized parameters should be further * identified based on the chunk id. */ switch (param.p->type) { case SCTP_PARAM_IPV4_ADDRESS: case SCTP_PARAM_IPV6_ADDRESS: case SCTP_PARAM_COOKIE_PRESERVATIVE: case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES: case SCTP_PARAM_STATE_COOKIE: case SCTP_PARAM_HEARTBEAT_INFO: case SCTP_PARAM_UNRECOGNIZED_PARAMETERS: case SCTP_PARAM_ECN_CAPABLE: case SCTP_PARAM_ADAPTATION_LAYER_IND: break; case SCTP_PARAM_SUPPORTED_EXT: if (!sctp_verify_ext_param(net, param)) return SCTP_IERROR_ABORT; break; case SCTP_PARAM_SET_PRIMARY: if (!net->sctp.addip_enable) goto fallthrough; if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) + sizeof(struct sctp_paramhdr)) { sctp_process_inv_paramlength(asoc, param.p, chunk, err_chunk); retval = SCTP_IERROR_ABORT; } break; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ sctp_process_hn_param(asoc, param, chunk, err_chunk); retval = SCTP_IERROR_ABORT; break; case SCTP_PARAM_FWD_TSN_SUPPORT: if (ep->prsctp_enable) break; goto fallthrough; case SCTP_PARAM_RANDOM: if (!ep->auth_enable) goto fallthrough; /* SCTP-AUTH: Secion 6.1 * If the random number is not 32 byte long the association * MUST be aborted. The ABORT chunk SHOULD contain the error * cause 'Protocol Violation'. */ if (SCTP_AUTH_RANDOM_LENGTH != ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) { sctp_process_inv_paramlength(asoc, param.p, chunk, err_chunk); retval = SCTP_IERROR_ABORT; } break; case SCTP_PARAM_CHUNKS: if (!ep->auth_enable) goto fallthrough; /* SCTP-AUTH: Section 3.2 * The CHUNKS parameter MUST be included once in the INIT or * INIT-ACK chunk if the sender wants to receive authenticated * chunks. Its maximum length is 260 bytes. */ if (260 < ntohs(param.p->length)) { sctp_process_inv_paramlength(asoc, param.p, chunk, err_chunk); retval = SCTP_IERROR_ABORT; } break; case SCTP_PARAM_HMAC_ALGO: if (!ep->auth_enable) goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; n_elt = (ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) >> 1; /* SCTP-AUTH: Section 6.1 * The HMAC algorithm based on SHA-1 MUST be supported and * included in the HMAC-ALGO parameter. */ for (i = 0; i < n_elt; i++) { id = ntohs(hmacs->hmac_ids[i]); if (id == SCTP_AUTH_HMAC_ID_SHA1) break; } if (id != SCTP_AUTH_HMAC_ID_SHA1) { sctp_process_inv_paramlength(asoc, param.p, chunk, err_chunk); retval = SCTP_IERROR_ABORT; } break; fallthrough: default: pr_debug("%s: unrecognized param:%d for chunk:%d\n", __func__, ntohs(param.p->type), cid); retval = sctp_process_unk_param(asoc, param, chunk, err_chunk); break; } return retval; } /* Verify the INIT packet before we process it. */ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, enum sctp_cid cid, struct sctp_init_chunk *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **errp) { union sctp_params param; bool has_cookie = false; int result; /* Check for missing mandatory parameters. Note: Initial TSN is * also mandatory, but is not checked here since the valid range * is 0..2**32-1. RFC4960, section 3.3.3. */ if (peer_init->init_hdr.num_outbound_streams == 0 || peer_init->init_hdr.num_inbound_streams == 0 || peer_init->init_hdr.init_tag == 0 || ntohl(peer_init->init_hdr.a_rwnd) < SCTP_DEFAULT_MINWINDOW) return sctp_process_inv_mandatory(asoc, chunk, errp); sctp_walk_params(param, peer_init, init_hdr.params) { if (param.p->type == SCTP_PARAM_STATE_COOKIE) has_cookie = true; } /* There is a possibility that a parameter length was bad and * in that case we would have stoped walking the parameters. * The current param.p would point at the bad one. * Current consensus on the mailing list is to generate a PROTOCOL * VIOLATION error. We build the ERROR chunk here and let the normal * error handling code build and send the packet. */ if (param.v != (void *)chunk->chunk_end) return sctp_process_inv_paramlength(asoc, param.p, chunk, errp); /* The only missing mandatory param possible today is * the state cookie for an INIT-ACK chunk. */ if ((SCTP_CID_INIT_ACK == cid) && !has_cookie) return sctp_process_missing_param(asoc, SCTP_PARAM_STATE_COOKIE, chunk, errp); /* Verify all the variable length parameters */ sctp_walk_params(param, peer_init, init_hdr.params) { result = sctp_verify_param(net, ep, asoc, param, cid, chunk, errp); switch (result) { case SCTP_IERROR_ABORT: case SCTP_IERROR_NOMEM: return 0; case SCTP_IERROR_ERROR: return 1; case SCTP_IERROR_NO_ERROR: default: break; } } /* for (loop through all parameters) */ return 1; } /* Unpack the parameters in an INIT packet into an association. * Returns 0 on failure, else success. * FIXME: This is an association method. */ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, struct sctp_init_chunk *peer_init, gfp_t gfp) { struct net *net = sock_net(asoc->base.sk); struct sctp_transport *transport; struct list_head *pos, *temp; union sctp_params param; union sctp_addr addr; struct sctp_af *af; int src_match = 0; /* We must include the address that the INIT packet came from. * This is the only address that matters for an INIT packet. * When processing a COOKIE ECHO, we retrieve the from address * of the INIT from the cookie. */ /* This implementation defaults to making the first transport * added as the primary transport. The source address seems to * be a a better choice than any of the embedded addresses. */ if (!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; if (sctp_cmp_addr_exact(sctp_source(chunk), peer_addr)) src_match = 1; /* Process the initialization parameters. */ sctp_walk_params(param, peer_init, init_hdr.params) { if (!src_match && (param.p->type == SCTP_PARAM_IPV4_ADDRESS || param.p->type == SCTP_PARAM_IPV6_ADDRESS)) { af = sctp_get_af_specific(param_type2af(param.p->type)); if (!af->from_addr_param(&addr, param.addr, chunk->sctp_hdr->source, 0)) continue; if (sctp_cmp_addr_exact(sctp_source(chunk), &addr)) src_match = 1; } if (!sctp_process_param(asoc, param, peer_addr, gfp)) goto clean_up; } /* source address of chunk may not match any valid address */ if (!src_match) goto clean_up; /* AUTH: After processing the parameters, make sure that we * have all the required info to potentially do authentications. */ if (asoc->peer.auth_capable && (!asoc->peer.peer_random || !asoc->peer.peer_hmacs)) asoc->peer.auth_capable = 0; /* In a non-backward compatible mode, if the peer claims * support for ADD-IP but not AUTH, the ADD-IP spec states * that we MUST ABORT the association. Section 6. The section * also give us an option to silently ignore the packet, which * is what we'll do here. */ if (!net->sctp.addip_noauth && (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | SCTP_PARAM_SET_PRIMARY); asoc->peer.asconf_capable = 0; goto clean_up; } /* Walk list of transports, removing transports in the UNKNOWN state. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); if (transport->state == SCTP_UNKNOWN) { sctp_assoc_rm_peer(asoc, transport); } } /* The fixed INIT headers are always in network byte * order. */ asoc->peer.i.init_tag = ntohl(peer_init->init_hdr.init_tag); asoc->peer.i.a_rwnd = ntohl(peer_init->init_hdr.a_rwnd); asoc->peer.i.num_outbound_streams = ntohs(peer_init->init_hdr.num_outbound_streams); asoc->peer.i.num_inbound_streams = ntohs(peer_init->init_hdr.num_inbound_streams); asoc->peer.i.initial_tsn = ntohl(peer_init->init_hdr.initial_tsn); asoc->strreset_inseq = asoc->peer.i.initial_tsn; /* Apply the upper bounds for output streams based on peer's * number of inbound streams. */ if (asoc->c.sinit_num_ostreams > ntohs(peer_init->init_hdr.num_inbound_streams)) { asoc->c.sinit_num_ostreams = ntohs(peer_init->init_hdr.num_inbound_streams); } if (asoc->c.sinit_max_instreams > ntohs(peer_init->init_hdr.num_outbound_streams)) { asoc->c.sinit_max_instreams = ntohs(peer_init->init_hdr.num_outbound_streams); } /* Copy Initiation tag from INIT to VT_peer in cookie. */ asoc->c.peer_vtag = asoc->peer.i.init_tag; /* Peer Rwnd : Current calculated value of the peer's rwnd. */ asoc->peer.rwnd = asoc->peer.i.a_rwnd; /* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily * high (for example, implementations MAY use the size of the receiver * advertised window). */ list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { transport->ssthresh = asoc->peer.i.a_rwnd; } /* Set up the TSN tracking pieces. */ if (!sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, asoc->peer.i.initial_tsn, gfp)) goto clean_up; /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number * * The stream sequence number in all the streams shall start * from 0 when the association is established. Also, when the * stream sequence number reaches the value 65535 the next * stream sequence number shall be set to 0. */ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, asoc->c.sinit_max_instreams, gfp)) goto clean_up; /* Update frag_point when stream_interleave may get changed. */ sctp_assoc_update_frag_point(asoc); if (!asoc->temp && sctp_assoc_set_id(asoc, gfp)) goto clean_up; /* ADDIP Section 4.1 ASCONF Chunk Procedures * * When an endpoint has an ASCONF signaled change to be sent to the * remote endpoint it should do the following: * ... * A2) A serial number should be assigned to the Chunk. The serial * number should be a monotonically increasing number. All serial * numbers are defined to be initialized at the start of the * association to the same value as the Initial TSN. */ asoc->peer.addip_serial = asoc->peer.i.initial_tsn - 1; return 1; clean_up: /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); if (transport->state != SCTP_ACTIVE) sctp_assoc_rm_peer(asoc, transport); } nomem: return 0; } /* Update asoc with the option described in param. * * RFC2960 3.3.2.1 Optional/Variable Length Parameters in INIT * * asoc is the association to update. * param is the variable length parameter to use for update. * cid tells us if this is an INIT, INIT ACK or COOKIE ECHO. * If the current packet is an INIT we want to minimize the amount of * work we do. In particular, we should not build transport * structures for the addresses. */ static int sctp_process_param(struct sctp_association *asoc, union sctp_params param, const union sctp_addr *peer_addr, gfp_t gfp) { struct net *net = sock_net(asoc->base.sk); struct sctp_endpoint *ep = asoc->ep; union sctp_addr_param *addr_param; struct sctp_transport *t; enum sctp_scope scope; union sctp_addr addr; struct sctp_af *af; int retval = 1, i; u32 stale; __u16 sat; /* We maintain all INIT parameters in network byte order all the * time. This allows us to not worry about whether the parameters * came from a fresh INIT, and INIT ACK, or were stored in a cookie. */ switch (param.p->type) { case SCTP_PARAM_IPV6_ADDRESS: if (PF_INET6 != asoc->base.sk->sk_family) break; goto do_addr_param; case SCTP_PARAM_IPV4_ADDRESS: /* v4 addresses are not allowed on v6-only socket */ if (ipv6_only_sock(asoc->base.sk)) break; do_addr_param: af = sctp_get_af_specific(param_type2af(param.p->type)); if (!af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0)) break; scope = sctp_scope(peer_addr); if (sctp_in_scope(net, &addr, scope)) if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) return 0; break; case SCTP_PARAM_COOKIE_PRESERVATIVE: if (!net->sctp.cookie_preserve_enable) break; stale = ntohl(param.life->lifespan_increment); /* Suggested Cookie Life span increment's unit is msec, * (1/1000sec). */ asoc->cookie_life = ktime_add_ms(asoc->cookie_life, stale); break; case SCTP_PARAM_HOST_NAME_ADDRESS: pr_debug("%s: unimplemented SCTP_HOST_NAME_ADDRESS\n", __func__); break; case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES: /* Turn off the default values first so we'll know which * ones are really set by the peer. */ asoc->peer.ipv4_address = 0; asoc->peer.ipv6_address = 0; /* Assume that peer supports the address family * by which it sends a packet. */ if (peer_addr->sa.sa_family == AF_INET6) asoc->peer.ipv6_address = 1; else if (peer_addr->sa.sa_family == AF_INET) asoc->peer.ipv4_address = 1; /* Cycle through address types; avoid divide by 0. */ sat = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); if (sat) sat /= sizeof(__u16); for (i = 0; i < sat; ++i) { switch (param.sat->types[i]) { case SCTP_PARAM_IPV4_ADDRESS: asoc->peer.ipv4_address = 1; break; case SCTP_PARAM_IPV6_ADDRESS: if (PF_INET6 == asoc->base.sk->sk_family) asoc->peer.ipv6_address = 1; break; case SCTP_PARAM_HOST_NAME_ADDRESS: asoc->peer.hostname_address = 1; break; default: /* Just ignore anything else. */ break; } } break; case SCTP_PARAM_STATE_COOKIE: asoc->peer.cookie_len = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); if (asoc->peer.cookie) kfree(asoc->peer.cookie); asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp); if (!asoc->peer.cookie) retval = 0; break; case SCTP_PARAM_HEARTBEAT_INFO: /* Would be odd to receive, but it causes no problems. */ break; case SCTP_PARAM_UNRECOGNIZED_PARAMETERS: /* Rejected during verify stage. */ break; case SCTP_PARAM_ECN_CAPABLE: asoc->peer.ecn_capable = 1; break; case SCTP_PARAM_ADAPTATION_LAYER_IND: asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind); break; case SCTP_PARAM_SET_PRIMARY: if (!net->sctp.addip_enable) goto fall_through; addr_param = param.v + sizeof(struct sctp_addip_param); af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (!af) break; if (!af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0)) break; if (!af->addr_valid(&addr, NULL, NULL)) break; t = sctp_assoc_lookup_paddr(asoc, &addr); if (!t) break; sctp_assoc_set_primary(asoc, t); break; case SCTP_PARAM_SUPPORTED_EXT: sctp_process_ext_param(asoc, param); break; case SCTP_PARAM_FWD_TSN_SUPPORT: if (asoc->prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } /* Fall Through */ goto fall_through; case SCTP_PARAM_RANDOM: if (!ep->auth_enable) goto fall_through; /* Save peer's random parameter */ if (asoc->peer.peer_random) kfree(asoc->peer.peer_random); asoc->peer.peer_random = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_random) { retval = 0; break; } break; case SCTP_PARAM_HMAC_ALGO: if (!ep->auth_enable) goto fall_through; /* Save peer's HMAC list */ if (asoc->peer.peer_hmacs) kfree(asoc->peer.peer_hmacs); asoc->peer.peer_hmacs = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_hmacs) { retval = 0; break; } /* Set the default HMAC the peer requested*/ sctp_auth_asoc_set_default_hmac(asoc, param.hmac_algo); break; case SCTP_PARAM_CHUNKS: if (!ep->auth_enable) goto fall_through; if (asoc->peer.peer_chunks) kfree(asoc->peer.peer_chunks); asoc->peer.peer_chunks = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_chunks) retval = 0; break; fall_through: default: /* Any unrecognized parameters should have been caught * and handled by sctp_verify_param() which should be * called prior to this routine. Simply log the error * here. */ pr_debug("%s: ignoring param:%d for association:%p.\n", __func__, ntohs(param.p->type), asoc); break; } return retval; } /* Select a new verification tag. */ __u32 sctp_generate_tag(const struct sctp_endpoint *ep) { /* I believe that this random number generator complies with RFC1750. * A tag of 0 is reserved for special cases (e.g. INIT). */ __u32 x; do { get_random_bytes(&x, sizeof(__u32)); } while (x == 0); return x; } /* Select an initial TSN to send during startup. */ __u32 sctp_generate_tsn(const struct sctp_endpoint *ep) { __u32 retval; get_random_bytes(&retval, sizeof(__u32)); return retval; } /* * ADDIP 3.1.1 Address Configuration Change Chunk (ASCONF) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 0xC1 | Chunk Flags | Chunk Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Serial Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Address Parameter | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ASCONF Parameter #1 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * \ \ * / .... / * \ \ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ASCONF Parameter #N | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Address Parameter and other parameter will not be wrapped in this function */ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc, union sctp_addr *addr, int vparam_len) { struct sctp_addiphdr asconf; struct sctp_chunk *retval; int length = sizeof(asconf) + vparam_len; union sctp_addr_param addrparam; int addrlen; struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family); addrlen = af->to_addr_param(addr, &addrparam); if (!addrlen) return NULL; length += addrlen; /* Create the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length, GFP_ATOMIC); if (!retval) return NULL; asconf.serial = htonl(asoc->addip_serial++); retval->subh.addip_hdr = sctp_addto_chunk(retval, sizeof(asconf), &asconf); retval->param_hdr.v = sctp_addto_chunk(retval, addrlen, &addrparam); return retval; } /* ADDIP * 3.2.1 Add IP Address * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 0xC001 | Length = Variable | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ASCONF-Request Correlation ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Address Parameter | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * 3.2.2 Delete IP Address * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 0xC002 | Length = Variable | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ASCONF-Request Correlation ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Address Parameter | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * */ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, union sctp_addr *laddr, struct sockaddr *addrs, int addrcnt, __be16 flags) { union sctp_addr_param addr_param; struct sctp_addip_param param; int paramlen = sizeof(param); struct sctp_chunk *retval; int addr_param_len = 0; union sctp_addr *addr; int totallen = 0, i; int del_pickup = 0; struct sctp_af *af; void *addr_buf; /* Get total length of all the address parameters. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); totallen += paramlen; totallen += addr_param_len; addr_buf += af->sockaddr_len; if (asoc->asconf_addr_del_pending && !del_pickup) { /* reuse the parameter length from the same scope one */ totallen += paramlen; totallen += addr_param_len; del_pickup = 1; pr_debug("%s: picked same-scope del_pending addr, " "totallen for all addresses is %d\n", __func__, totallen); } } /* Create an asconf chunk with the required length. */ retval = sctp_make_asconf(asoc, laddr, totallen); if (!retval) return NULL; /* Add the address parameters to the asconf chunk. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; param.param_hdr.length = htons(paramlen + addr_param_len); param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, &param); sctp_addto_chunk(retval, addr_param_len, &addr_param); addr_buf += af->sockaddr_len; } if (flags == SCTP_PARAM_ADD_IP && del_pickup) { addr = asoc->asconf_addr_del_pending; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = SCTP_PARAM_DEL_IP; param.param_hdr.length = htons(paramlen + addr_param_len); param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, &param); sctp_addto_chunk(retval, addr_param_len, &addr_param); } return retval; } /* ADDIP * 3.2.4 Set Primary IP Address * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type =0xC004 | Length = Variable | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ASCONF-Request Correlation ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Address Parameter | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Create an ASCONF chunk with Set Primary IP address parameter. */ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr) { struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family); union sctp_addr_param addrparam; struct sctp_addip_param param; struct sctp_chunk *retval; int len = sizeof(param); int addrlen; addrlen = af->to_addr_param(addr, &addrparam); if (!addrlen) return NULL; len += addrlen; /* Create the chunk and make asconf header. */ retval = sctp_make_asconf(asoc, addr, len); if (!retval) return NULL; param.param_hdr.type = SCTP_PARAM_SET_PRIMARY; param.param_hdr.length = htons(len); param.crr_id = 0; sctp_addto_chunk(retval, sizeof(param), &param); sctp_addto_chunk(retval, addrlen, &addrparam); return retval; } /* ADDIP 3.1.2 Address Configuration Acknowledgement Chunk (ASCONF-ACK) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 0x80 | Chunk Flags | Chunk Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Serial Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ASCONF Parameter Response#1 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * \ \ * / .... / * \ \ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ASCONF Parameter Response#N | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Create an ASCONF_ACK chunk with enough space for the parameter responses. */ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc, __u32 serial, int vparam_len) { struct sctp_addiphdr asconf; struct sctp_chunk *retval; int length = sizeof(asconf) + vparam_len; /* Create the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length, GFP_ATOMIC); if (!retval) return NULL; asconf.serial = htonl(serial); retval->subh.addip_hdr = sctp_addto_chunk(retval, sizeof(asconf), &asconf); return retval; } /* Add response parameters to an ASCONF_ACK chunk. */ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id, __be16 err_code, struct sctp_addip_param *asconf_param) { struct sctp_addip_param ack_param; struct sctp_errhdr err_param; int asconf_param_len = 0; int err_param_len = 0; __be16 response_type; if (SCTP_ERROR_NO_ERROR == err_code) { response_type = SCTP_PARAM_SUCCESS_REPORT; } else { response_type = SCTP_PARAM_ERR_CAUSE; err_param_len = sizeof(err_param); if (asconf_param) asconf_param_len = ntohs(asconf_param->param_hdr.length); } /* Add Success Indication or Error Cause Indication parameter. */ ack_param.param_hdr.type = response_type; ack_param.param_hdr.length = htons(sizeof(ack_param) + err_param_len + asconf_param_len); ack_param.crr_id = crr_id; sctp_addto_chunk(chunk, sizeof(ack_param), &ack_param); if (SCTP_ERROR_NO_ERROR == err_code) return; /* Add Error Cause parameter. */ err_param.cause = err_code; err_param.length = htons(err_param_len + asconf_param_len); sctp_addto_chunk(chunk, err_param_len, &err_param); /* Add the failed TLV copied from ASCONF chunk. */ if (asconf_param) sctp_addto_chunk(chunk, asconf_param_len, asconf_param); } /* Process a asconf parameter. */ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, struct sctp_chunk *asconf, struct sctp_addip_param *asconf_param) { union sctp_addr_param *addr_param; struct sctp_transport *peer; union sctp_addr addr; struct sctp_af *af; addr_param = (void *)asconf_param + sizeof(*asconf_param); if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP && asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP && asconf_param->param_hdr.type != SCTP_PARAM_SET_PRIMARY) return SCTP_ERROR_UNKNOWN_PARAM; switch (addr_param->p.type) { case SCTP_PARAM_IPV6_ADDRESS: if (!asoc->peer.ipv6_address) return SCTP_ERROR_DNS_FAILED; break; case SCTP_PARAM_IPV4_ADDRESS: if (!asoc->peer.ipv4_address) return SCTP_ERROR_DNS_FAILED; break; default: return SCTP_ERROR_DNS_FAILED; } af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (unlikely(!af)) return SCTP_ERROR_DNS_FAILED; if (!af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0)) return SCTP_ERROR_DNS_FAILED; /* ADDIP 4.2.1 This parameter MUST NOT contain a broadcast * or multicast address. * (note: wildcard is permitted and requires special handling so * make sure we check for that) */ if (!af->is_any(&addr) && !af->addr_valid(&addr, NULL, asconf->skb)) return SCTP_ERROR_DNS_FAILED; switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: /* Section 4.2.1: * If the address 0.0.0.0 or ::0 is provided, the source * address of the packet MUST be added. */ if (af->is_any(&addr)) memcpy(&addr, &asconf->source, sizeof(addr)); if (security_sctp_bind_connect(asoc->ep->base.sk, SCTP_PARAM_ADD_IP, (struct sockaddr *)&addr, af->sockaddr_len)) return SCTP_ERROR_REQ_REFUSED; /* ADDIP 4.3 D9) If an endpoint receives an ADD IP address * request and does not have the local resources to add this * new address to the association, it MUST return an Error * Cause TLV set to the new error code 'Operation Refused * Due to Resource Shortage'. */ peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_UNCONFIRMED); if (!peer) return SCTP_ERROR_RSRC_LOW; /* Start the heartbeat timer. */ sctp_transport_reset_hb_timer(peer); asoc->new_transport = peer; break; case SCTP_PARAM_DEL_IP: /* ADDIP 4.3 D7) If a request is received to delete the * last remaining IP address of a peer endpoint, the receiver * MUST send an Error Cause TLV with the error cause set to the * new error code 'Request to Delete Last Remaining IP Address'. */ if (asoc->peer.transport_count == 1) return SCTP_ERROR_DEL_LAST_IP; /* ADDIP 4.3 D8) If a request is received to delete an IP * address which is also the source address of the IP packet * which contained the ASCONF chunk, the receiver MUST reject * this request. To reject the request the receiver MUST send * an Error Cause TLV set to the new error code 'Request to * Delete Source IP Address' */ if (sctp_cmp_addr_exact(&asconf->source, &addr)) return SCTP_ERROR_DEL_SRC_IP; /* Section 4.2.2 * If the address 0.0.0.0 or ::0 is provided, all * addresses of the peer except the source address of the * packet MUST be deleted. */ if (af->is_any(&addr)) { sctp_assoc_set_primary(asoc, asconf->transport); sctp_assoc_del_nonprimary_peers(asoc, asconf->transport); return SCTP_ERROR_NO_ERROR; } /* If the address is not part of the association, the * ASCONF-ACK with Error Cause Indication Parameter * which including cause of Unresolvable Address should * be sent. */ peer = sctp_assoc_lookup_paddr(asoc, &addr); if (!peer) return SCTP_ERROR_DNS_FAILED; sctp_assoc_rm_peer(asoc, peer); break; case SCTP_PARAM_SET_PRIMARY: /* ADDIP Section 4.2.4 * If the address 0.0.0.0 or ::0 is provided, the receiver * MAY mark the source address of the packet as its * primary. */ if (af->is_any(&addr)) memcpy(&addr, sctp_source(asconf), sizeof(addr)); if (security_sctp_bind_connect(asoc->ep->base.sk, SCTP_PARAM_SET_PRIMARY, (struct sockaddr *)&addr, af->sockaddr_len)) return SCTP_ERROR_REQ_REFUSED; peer = sctp_assoc_lookup_paddr(asoc, &addr); if (!peer) return SCTP_ERROR_DNS_FAILED; sctp_assoc_set_primary(asoc, peer); break; } return SCTP_ERROR_NO_ERROR; } /* Verify the ASCONF packet before we process it. */ bool sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, bool addr_param_needed, struct sctp_paramhdr **errp) { struct sctp_addip_chunk *addip; bool addr_param_seen = false; union sctp_params param; addip = (struct sctp_addip_chunk *)chunk->chunk_hdr; sctp_walk_params(param, addip, addip_hdr.params) { size_t length = ntohs(param.p->length); *errp = param.p; switch (param.p->type) { case SCTP_PARAM_ERR_CAUSE: break; case SCTP_PARAM_IPV4_ADDRESS: if (length != sizeof(struct sctp_ipv4addr_param)) return false; /* ensure there is only one addr param and it's in the * beginning of addip_hdr params, or we reject it. */ if (param.v != addip->addip_hdr.params) return false; addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: if (length != sizeof(struct sctp_ipv6addr_param)) return false; if (param.v != addip->addip_hdr.params) return false; addr_param_seen = true; break; case SCTP_PARAM_ADD_IP: case SCTP_PARAM_DEL_IP: case SCTP_PARAM_SET_PRIMARY: /* In ASCONF chunks, these need to be first. */ if (addr_param_needed && !addr_param_seen) return false; length = ntohs(param.addip->param_hdr.length); if (length < sizeof(struct sctp_addip_param) + sizeof(**errp)) return false; break; case SCTP_PARAM_SUCCESS_REPORT: case SCTP_PARAM_ADAPTATION_LAYER_IND: if (length != sizeof(struct sctp_addip_param)) return false; break; default: /* This is unkown to us, reject! */ return false; } } /* Remaining sanity checks. */ if (addr_param_needed && !addr_param_seen) return false; if (!addr_param_needed && addr_param_seen) return false; if (param.v != chunk->chunk_end) return false; return true; } /* Process an incoming ASCONF chunk with the next expected serial no. and * return an ASCONF_ACK chunk to be sent in response. */ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf) { union sctp_addr_param *addr_param; struct sctp_addip_chunk *addip; struct sctp_chunk *asconf_ack; bool all_param_pass = true; struct sctp_addiphdr *hdr; int length = 0, chunk_len; union sctp_params param; __be16 err_code; __u32 serial; addip = (struct sctp_addip_chunk *)asconf->chunk_hdr; chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); hdr = (struct sctp_addiphdr *)asconf->skb->data; serial = ntohl(hdr->serial); /* Skip the addiphdr and store a pointer to address parameter. */ length = sizeof(*hdr); addr_param = (union sctp_addr_param *)(asconf->skb->data + length); chunk_len -= length; /* Skip the address parameter and store a pointer to the first * asconf parameter. */ length = ntohs(addr_param->p.length); chunk_len -= length; /* create an ASCONF_ACK chunk. * Based on the definitions of parameters, we know that the size of * ASCONF_ACK parameters are less than or equal to the fourfold of ASCONF * parameters. */ asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 4); if (!asconf_ack) goto done; /* Process the TLVs contained within the ASCONF chunk. */ sctp_walk_params(param, addip, addip_hdr.params) { /* Skip preceeding address parameters. */ if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || param.p->type == SCTP_PARAM_IPV6_ADDRESS) continue; err_code = sctp_process_asconf_param(asoc, asconf, param.addip); /* ADDIP 4.1 A7) * If an error response is received for a TLV parameter, * all TLVs with no response before the failed TLV are * considered successful if not reported. All TLVs after * the failed response are considered unsuccessful unless * a specific success indication is present for the parameter. */ if (err_code != SCTP_ERROR_NO_ERROR) all_param_pass = false; if (!all_param_pass) sctp_add_asconf_response(asconf_ack, param.addip->crr_id, err_code, param.addip); /* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add * an IP address sends an 'Out of Resource' in its response, it * MUST also fail any subsequent add or delete requests bundled * in the ASCONF. */ if (err_code == SCTP_ERROR_RSRC_LOW) goto done; } done: asoc->peer.addip_serial++; /* If we are sending a new ASCONF_ACK hold a reference to it in assoc * after freeing the reference to old asconf ack if any. */ if (asconf_ack) { sctp_chunk_hold(asconf_ack); list_add_tail(&asconf_ack->transmitted_list, &asoc->asconf_ack_list); } return asconf_ack; } /* Process a asconf parameter that is successfully acked. */ static void sctp_asconf_param_success(struct sctp_association *asoc, struct sctp_addip_param *asconf_param) { struct sctp_bind_addr *bp = &asoc->base.bind_addr; union sctp_addr_param *addr_param; struct sctp_sockaddr_entry *saddr; struct sctp_transport *transport; union sctp_addr addr; struct sctp_af *af; addr_param = (void *)asconf_param + sizeof(*asconf_param); /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (!af->from_addr_param(&addr, addr_param, htons(bp->port), 0)) return; switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: /* This is always done in BH context with a socket lock * held, so the list can not change. */ local_bh_disable(); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, &addr)) saddr->state = SCTP_ADDR_SRC; } local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { sctp_transport_dst_release(transport); } break; case SCTP_PARAM_DEL_IP: local_bh_disable(); sctp_del_bind_addr(bp, &addr); if (asoc->asconf_addr_del_pending != NULL && sctp_cmp_addr_exact(asoc->asconf_addr_del_pending, &addr)) { kfree(asoc->asconf_addr_del_pending); asoc->asconf_addr_del_pending = NULL; } local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { sctp_transport_dst_release(transport); } break; default: break; } } /* Get the corresponding ASCONF response error code from the ASCONF_ACK chunk * for the given asconf parameter. If there is no response for this parameter, * return the error code based on the third argument 'no_err'. * ADDIP 4.1 * A7) If an error response is received for a TLV parameter, all TLVs with no * response before the failed TLV are considered successful if not reported. * All TLVs after the failed response are considered unsuccessful unless a * specific success indication is present for the parameter. */ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, struct sctp_addip_param *asconf_param, int no_err) { struct sctp_addip_param *asconf_ack_param; struct sctp_errhdr *err_param; int asconf_ack_len; __be16 err_code; int length; if (no_err) err_code = SCTP_ERROR_NO_ERROR; else err_code = SCTP_ERROR_REQ_REFUSED; asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); /* Skip the addiphdr from the asconf_ack chunk and store a pointer to * the first asconf_ack parameter. */ length = sizeof(struct sctp_addiphdr); asconf_ack_param = (struct sctp_addip_param *)(asconf_ack->skb->data + length); asconf_ack_len -= length; while (asconf_ack_len > 0) { if (asconf_ack_param->crr_id == asconf_param->crr_id) { switch (asconf_ack_param->param_hdr.type) { case SCTP_PARAM_SUCCESS_REPORT: return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: length = sizeof(*asconf_ack_param); err_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; if (asconf_ack_len > 0) return err_param->cause; else return SCTP_ERROR_INV_PARAM; break; default: return SCTP_ERROR_INV_PARAM; } } length = ntohs(asconf_ack_param->param_hdr.length); asconf_ack_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; } return err_code; } /* Process an incoming ASCONF_ACK chunk against the cached last ASCONF chunk. */ int sctp_process_asconf_ack(struct sctp_association *asoc, struct sctp_chunk *asconf_ack) { struct sctp_chunk *asconf = asoc->addip_last_asconf; struct sctp_addip_param *asconf_param; __be16 err_code = SCTP_ERROR_NO_ERROR; union sctp_addr_param *addr_param; int asconf_len = asconf->skb->len; int all_param_pass = 0; int length = 0; int no_err = 1; int retval = 0; /* Skip the chunkhdr and addiphdr from the last asconf sent and store * a pointer to address parameter. */ length = sizeof(struct sctp_addip_chunk); addr_param = (union sctp_addr_param *)(asconf->skb->data + length); asconf_len -= length; /* Skip the address parameter in the last asconf sent and store a * pointer to the first asconf parameter. */ length = ntohs(addr_param->p.length); asconf_param = (void *)addr_param + length; asconf_len -= length; /* ADDIP 4.1 * A8) If there is no response(s) to specific TLV parameter(s), and no * failures are indicated, then all request(s) are considered * successful. */ if (asconf_ack->skb->len == sizeof(struct sctp_addiphdr)) all_param_pass = 1; /* Process the TLVs contained in the last sent ASCONF chunk. */ while (asconf_len > 0) { if (all_param_pass) err_code = SCTP_ERROR_NO_ERROR; else { err_code = sctp_get_asconf_response(asconf_ack, asconf_param, no_err); if (no_err && (SCTP_ERROR_NO_ERROR != err_code)) no_err = 0; } switch (err_code) { case SCTP_ERROR_NO_ERROR: sctp_asconf_param_success(asoc, asconf_param); break; case SCTP_ERROR_RSRC_LOW: retval = 1; break; case SCTP_ERROR_UNKNOWN_PARAM: /* Disable sending this type of asconf parameter in * future. */ asoc->peer.addip_disabled_mask |= asconf_param->param_hdr.type; break; case SCTP_ERROR_REQ_REFUSED: case SCTP_ERROR_DEL_LAST_IP: case SCTP_ERROR_DEL_SRC_IP: default: break; } /* Skip the processed asconf parameter and move to the next * one. */ length = ntohs(asconf_param->param_hdr.length); asconf_param = (void *)asconf_param + length; asconf_len -= length; } if (no_err && asoc->src_out_of_asoc_ok) { asoc->src_out_of_asoc_ok = 0; sctp_transport_immediate_rtx(asoc->peer.primary_path); } /* Free the cached last sent asconf chunk. */ list_del_init(&asconf->transmitted_list); sctp_chunk_free(asconf); asoc->addip_last_asconf = NULL; return retval; } /* Make a FWD TSN chunk. */ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist) { struct sctp_chunk *retval = NULL; struct sctp_fwdtsn_hdr ftsn_hdr; struct sctp_fwdtsn_skip skip; size_t hint; int i; hint = (nstreams + 1) * sizeof(__u32); retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint, GFP_ATOMIC); if (!retval) return NULL; ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn); retval->subh.fwdtsn_hdr = sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr); for (i = 0; i < nstreams; i++) { skip.stream = skiplist[i].stream; skip.ssn = skiplist[i].ssn; sctp_addto_chunk(retval, sizeof(skip), &skip); } return retval; } struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_ifwdtsn_skip *skiplist) { struct sctp_chunk *retval = NULL; struct sctp_ifwdtsn_hdr ftsn_hdr; size_t hint; hint = (nstreams + 1) * sizeof(__u32); retval = sctp_make_control(asoc, SCTP_CID_I_FWD_TSN, 0, hint, GFP_ATOMIC); if (!retval) return NULL; ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn); retval->subh.ifwdtsn_hdr = sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr); sctp_addto_chunk(retval, nstreams * sizeof(skiplist[0]), skiplist); return retval; } /* RE-CONFIG 3.1 (RE-CONFIG chunk) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Type = 130 | Chunk Flags | Chunk Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * \ \ * / Re-configuration Parameter / * \ \ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * \ \ * / Re-configuration Parameter (optional) / * \ \ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc, int length) { struct sctp_reconf_chunk *reconf; struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_RECONF, 0, length, GFP_ATOMIC); if (!retval) return NULL; reconf = (struct sctp_reconf_chunk *)retval->chunk_hdr; retval->param_hdr.v = reconf->params; return retval; } /* RE-CONFIG 4.1 (STREAM OUT RESET) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Parameter Type = 13 | Parameter Length = 16 + 2 * N | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Re-configuration Request Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Re-configuration Response Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Sender's Last Assigned TSN | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Stream Number 1 (optional) | Stream Number 2 (optional) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * / ...... / * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Stream Number N-1 (optional) | Stream Number N (optional) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * RE-CONFIG 4.2 (STREAM IN RESET) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Parameter Type = 14 | Parameter Length = 8 + 2 * N | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Re-configuration Request Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Stream Number 1 (optional) | Stream Number 2 (optional) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * / ...... / * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Stream Number N-1 (optional) | Stream Number N (optional) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_req( const struct sctp_association *asoc, __u16 stream_num, __be16 *stream_list, bool out, bool in) { __u16 stream_len = stream_num * sizeof(__u16); struct sctp_strreset_outreq outreq; struct sctp_strreset_inreq inreq; struct sctp_chunk *retval; __u16 outlen, inlen; outlen = (sizeof(outreq) + stream_len) * out; inlen = (sizeof(inreq) + stream_len) * in; retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen)); if (!retval) return NULL; if (outlen) { outreq.param_hdr.type = SCTP_PARAM_RESET_OUT_REQUEST; outreq.param_hdr.length = htons(outlen); outreq.request_seq = htonl(asoc->strreset_outseq); outreq.response_seq = htonl(asoc->strreset_inseq - 1); outreq.send_reset_at_tsn = htonl(asoc->next_tsn - 1); sctp_addto_chunk(retval, sizeof(outreq), &outreq); if (stream_len) sctp_addto_chunk(retval, stream_len, stream_list); } if (inlen) { inreq.param_hdr.type = SCTP_PARAM_RESET_IN_REQUEST; inreq.param_hdr.length = htons(inlen); inreq.request_seq = htonl(asoc->strreset_outseq + out); sctp_addto_chunk(retval, sizeof(inreq), &inreq); if (stream_len) sctp_addto_chunk(retval, stream_len, stream_list); } return retval; } /* RE-CONFIG 4.3 (SSN/TSN RESET ALL) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Parameter Type = 15 | Parameter Length = 8 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Re-configuration Request Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_tsnreq( const struct sctp_association *asoc) { struct sctp_strreset_tsnreq tsnreq; __u16 length = sizeof(tsnreq); struct sctp_chunk *retval; retval = sctp_make_reconf(asoc, length); if (!retval) return NULL; tsnreq.param_hdr.type = SCTP_PARAM_RESET_TSN_REQUEST; tsnreq.param_hdr.length = htons(length); tsnreq.request_seq = htonl(asoc->strreset_outseq); sctp_addto_chunk(retval, sizeof(tsnreq), &tsnreq); return retval; } /* RE-CONFIG 4.5/4.6 (ADD STREAM) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Parameter Type = 17 | Parameter Length = 12 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Re-configuration Request Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Number of new streams | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_addstrm( const struct sctp_association *asoc, __u16 out, __u16 in) { struct sctp_strreset_addstrm addstrm; __u16 size = sizeof(addstrm); struct sctp_chunk *retval; retval = sctp_make_reconf(asoc, (!!out + !!in) * size); if (!retval) return NULL; if (out) { addstrm.param_hdr.type = SCTP_PARAM_RESET_ADD_OUT_STREAMS; addstrm.param_hdr.length = htons(size); addstrm.number_of_streams = htons(out); addstrm.request_seq = htonl(asoc->strreset_outseq); addstrm.reserved = 0; sctp_addto_chunk(retval, size, &addstrm); } if (in) { addstrm.param_hdr.type = SCTP_PARAM_RESET_ADD_IN_STREAMS; addstrm.param_hdr.length = htons(size); addstrm.number_of_streams = htons(in); addstrm.request_seq = htonl(asoc->strreset_outseq + !!out); addstrm.reserved = 0; sctp_addto_chunk(retval, size, &addstrm); } return retval; } /* RE-CONFIG 4.4 (RESP) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Parameter Type = 16 | Parameter Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Re-configuration Response Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Result | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc, __u32 result, __u32 sn) { struct sctp_strreset_resp resp; __u16 length = sizeof(resp); struct sctp_chunk *retval; retval = sctp_make_reconf(asoc, length); if (!retval) return NULL; resp.param_hdr.type = SCTP_PARAM_RESET_RESPONSE; resp.param_hdr.length = htons(length); resp.response_seq = htonl(sn); resp.result = htonl(result); sctp_addto_chunk(retval, sizeof(resp), &resp); return retval; } /* RE-CONFIG 4.4 OPTIONAL (TSNRESP) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Parameter Type = 16 | Parameter Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Re-configuration Response Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Result | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Sender's Next TSN (optional) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Receiver's Next TSN (optional) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc, __u32 result, __u32 sn, __u32 sender_tsn, __u32 receiver_tsn) { struct sctp_strreset_resptsn tsnresp; __u16 length = sizeof(tsnresp); struct sctp_chunk *retval; retval = sctp_make_reconf(asoc, length); if (!retval) return NULL; tsnresp.param_hdr.type = SCTP_PARAM_RESET_RESPONSE; tsnresp.param_hdr.length = htons(length); tsnresp.response_seq = htonl(sn); tsnresp.result = htonl(result); tsnresp.senders_next_tsn = htonl(sender_tsn); tsnresp.receivers_next_tsn = htonl(receiver_tsn); sctp_addto_chunk(retval, sizeof(tsnresp), &tsnresp); return retval; } bool sctp_verify_reconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_paramhdr **errp) { struct sctp_reconf_chunk *hdr; union sctp_params param; __be16 last = 0; __u16 cnt = 0; hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; sctp_walk_params(param, hdr, params) { __u16 length = ntohs(param.p->length); *errp = param.p; if (cnt++ > 2) return false; switch (param.p->type) { case SCTP_PARAM_RESET_OUT_REQUEST: if (length < sizeof(struct sctp_strreset_outreq) || (last && last != SCTP_PARAM_RESET_RESPONSE && last != SCTP_PARAM_RESET_IN_REQUEST)) return false; break; case SCTP_PARAM_RESET_IN_REQUEST: if (length < sizeof(struct sctp_strreset_inreq) || (last && last != SCTP_PARAM_RESET_OUT_REQUEST)) return false; break; case SCTP_PARAM_RESET_RESPONSE: if ((length != sizeof(struct sctp_strreset_resp) && length != sizeof(struct sctp_strreset_resptsn)) || (last && last != SCTP_PARAM_RESET_RESPONSE && last != SCTP_PARAM_RESET_OUT_REQUEST)) return false; break; case SCTP_PARAM_RESET_TSN_REQUEST: if (length != sizeof(struct sctp_strreset_tsnreq) || last) return false; break; case SCTP_PARAM_RESET_ADD_IN_STREAMS: if (length != sizeof(struct sctp_strreset_addstrm) || (last && last != SCTP_PARAM_RESET_ADD_OUT_STREAMS)) return false; break; case SCTP_PARAM_RESET_ADD_OUT_STREAMS: if (length != sizeof(struct sctp_strreset_addstrm) || (last && last != SCTP_PARAM_RESET_ADD_IN_STREAMS)) return false; break; default: return false; } last = param.p->type; } return true; }
530 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 /* SPDX-License-Identifier: GPL-2.0 */ /* * procfs namespace bits */ #ifndef _LINUX_PROC_NS_H #define _LINUX_PROC_NS_H #include <linux/ns_common.h> struct pid_namespace; struct nsproxy; struct path; struct task_struct; struct inode; struct proc_ns_operations { const char *name; const char *real_ns_name; int type; struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); struct user_namespace *(*owner)(struct ns_common *ns); struct ns_common *(*get_parent)(struct ns_common *ns); } __randomize_layout; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; extern const struct proc_ns_operations pidns_for_children_operations; extern const struct proc_ns_operations userns_operations; extern const struct proc_ns_operations mntns_operations; extern const struct proc_ns_operations cgroupns_operations; /* * We always define these enumerators */ enum { PROC_ROOT_INO = 1, PROC_IPC_INIT_INO = 0xEFFFFFFFU, PROC_UTS_INIT_INO = 0xEFFFFFFEU, PROC_USER_INIT_INO = 0xEFFFFFFDU, PROC_PID_INIT_INO = 0xEFFFFFFCU, PROC_CGROUP_INIT_INO = 0xEFFFFFFBU, }; #ifdef CONFIG_PROC_FS extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); #else /* CONFIG_PROC_FS */ static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } static inline void pid_ns_release_proc(struct pid_namespace *ns) {} static inline int proc_alloc_inum(unsigned int *inum) { *inum = 1; return 0; } static inline void proc_free_inum(unsigned int inum) {} #endif /* CONFIG_PROC_FS */ static inline int ns_alloc_inum(struct ns_common *ns) { atomic_long_set(&ns->stashed, 0); return proc_alloc_inum(&ns->inum); } #define ns_free_inum(ns) proc_free_inum((ns)->inum) extern struct file *proc_ns_fget(int fd); #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) extern void *ns_get_path(struct path *path, struct task_struct *task, const struct proc_ns_operations *ns_ops); typedef struct ns_common *ns_get_path_helper_t(void *); extern void *ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, void *private_data); extern int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops); extern void nsfs_init(void); #endif /* _LINUX_PROC_NS_H */
10 7 2 1 5 4 7 12 5 7 1 6 5 3 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 /* Xtables module to match packets using a BPF filter. * Copyright 2013 Google Inc. * Written by Willem de Bruijn <willemb@google.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/syscalls.h> #include <linux/skbuff.h> #include <linux/filter.h> #include <linux/bpf.h> #include <linux/netfilter/xt_bpf.h> #include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); MODULE_DESCRIPTION("Xtables: BPF filter match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_bpf"); MODULE_ALIAS("ip6t_bpf"); static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, struct bpf_prog **ret) { struct sock_fprog_kern program; if (len > XT_BPF_MAX_NUM_INSTR) return -EINVAL; program.len = len; program.filter = insns; if (bpf_prog_create(ret, &program)) { pr_info_ratelimited("check failed: parse error\n"); return -EINVAL; } return 0; } static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) { struct bpf_prog *prog; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(prog)) return PTR_ERR(prog); *ret = prog; return 0; } static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) { if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) return -EINVAL; *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); return PTR_ERR_OR_ZERO(*ret); } static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); } static int bpf_mt_check_v1(const struct xt_mtchk_param *par) { struct xt_bpf_info_v1 *info = par->matchinfo; if (info->mode == XT_BPF_MODE_BYTECODE) return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); else if (info->mode == XT_BPF_MODE_FD_ELF) return __bpf_mt_check_fd(info->fd, &info->filter); else if (info->mode == XT_BPF_MODE_PATH_PINNED) return __bpf_mt_check_path(info->path, &info->filter); else return -EINVAL; } static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; return BPF_PROG_RUN(info->filter, skb); } static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info_v1 *info = par->matchinfo; return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb); } static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; bpf_prog_destroy(info->filter); } static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par) { const struct xt_bpf_info_v1 *info = par->matchinfo; bpf_prog_destroy(info->filter); } static struct xt_match bpf_mt_reg[] __read_mostly = { { .name = "bpf", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = bpf_mt_check, .match = bpf_mt, .destroy = bpf_mt_destroy, .matchsize = sizeof(struct xt_bpf_info), .usersize = offsetof(struct xt_bpf_info, filter), .me = THIS_MODULE, }, { .name = "bpf", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = bpf_mt_check_v1, .match = bpf_mt_v1, .destroy = bpf_mt_destroy_v1, .matchsize = sizeof(struct xt_bpf_info_v1), .usersize = offsetof(struct xt_bpf_info_v1, filter), .me = THIS_MODULE, }, }; static int __init bpf_mt_init(void) { return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } static void __exit bpf_mt_exit(void) { xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } module_init(bpf_mt_init); module_exit(bpf_mt_exit);
40 40 40 40 39 40 40 40 40 40 40 40 40 40 40 40 40 40 40 9 9 9 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 40 40 40 40 40 36 36 36 40 40 40 40 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 /* * Copyright (c) International Business Machines Corp., 2006 * Copyright (c) Nokia Corporation, 2006, 2007 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Author: Artem Bityutskiy (Битюцкий Артём) */ /* * UBI input/output sub-system. * * This sub-system provides a uniform way to work with all kinds of the * underlying MTD devices. It also implements handy functions for reading and * writing UBI headers. * * We are trying to have a paranoid mindset and not to trust to what we read * from the flash media in order to be more secure and robust. So this * sub-system validates every single header it reads from the flash media. * * Some words about how the eraseblock headers are stored. * * The erase counter header is always stored at offset zero. By default, the * VID header is stored after the EC header at the closest aligned offset * (i.e. aligned to the minimum I/O unit size). Data starts next to the VID * header at the closest aligned offset. But this default layout may be * changed. For example, for different reasons (e.g., optimization) UBI may be * asked to put the VID header at further offset, and even at an unaligned * offset. Of course, if the offset of the VID header is unaligned, UBI adds * proper padding in front of it. Data offset may also be changed but it has to * be aligned. * * About minimal I/O units. In general, UBI assumes flash device model where * there is only one minimal I/O unit size. E.g., in case of NOR flash it is 1, * in case of NAND flash it is a NAND page, etc. This is reported by MTD in the * @ubi->mtd->writesize field. But as an exception, UBI admits use of another * (smaller) minimal I/O unit size for EC and VID headers to make it possible * to do different optimizations. * * This is extremely useful in case of NAND flashes which admit of several * write operations to one NAND page. In this case UBI can fit EC and VID * headers at one NAND page. Thus, UBI may use "sub-page" size as the minimal * I/O unit for the headers (the @ubi->hdrs_min_io_size field). But it still * reports NAND page size (@ubi->min_io_size) as a minimal I/O unit for the UBI * users. * * Example: some Samsung NANDs with 2KiB pages allow 4x 512-byte writes, so * although the minimal I/O unit is 2K, UBI uses 512 bytes for EC and VID * headers. * * Q: why not just to treat sub-page as a minimal I/O unit of this flash * device, e.g., make @ubi->min_io_size = 512 in the example above? * * A: because when writing a sub-page, MTD still writes a full 2K page but the * bytes which are not relevant to the sub-page are 0xFF. So, basically, * writing 4x512 sub-pages is 4 times slower than writing one 2KiB NAND page. * Thus, we prefer to use sub-pages only for EC and VID headers. * * As it was noted above, the VID header may start at a non-aligned offset. * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page, * the VID header may reside at offset 1984 which is the last 64 bytes of the * last sub-page (EC header is always at offset zero). This causes some * difficulties when reading and writing VID headers. * * Suppose we have a 64-byte buffer and we read a VID header at it. We change * the data and want to write this VID header out. As we can only write in * 512-byte chunks, we have to allocate one more buffer and copy our VID header * to offset 448 of this buffer. * * The I/O sub-system does the following trick in order to avoid this extra * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. * When the VID header is being written out, it shifts the VID header pointer * back and writes the whole sub-page. */ #include <linux/crc32.h> #include <linux/err.h> #include <linux/slab.h> #include "ubi.h" static int self_check_not_bad(const struct ubi_device *ubi, int pnum); static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_ec_hdr *ec_hdr); static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_vid_hdr *vid_hdr); static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len); /** * ubi_io_read - read data from a physical eraseblock. * @ubi: UBI device description object * @buf: buffer where to store the read data * @pnum: physical eraseblock number to read from * @offset: offset within the physical eraseblock from where to read * @len: how many bytes to read * * This function reads data from offset @offset of physical eraseblock @pnum * and stores the read data in the @buf buffer. The following return codes are * possible: * * o %0 if all the requested data were successfully read; * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but * correctable bit-flips were detected; this is harmless but may indicate * that this eraseblock may become bad soon (but do not have to); * o %-EBADMSG if the MTD subsystem reported about data integrity problems, for * example it can be an ECC error in case of NAND; this most probably means * that the data is corrupted; * o %-EIO if some I/O error occurred; * o other negative error codes in case of other errors. */ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, int len) { int err, retries = 0; size_t read; loff_t addr; dbg_io("read %d bytes from PEB %d:%d", len, pnum, offset); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); ubi_assert(len > 0); err = self_check_not_bad(ubi, pnum); if (err) return err; /* * Deliberately corrupt the buffer to improve robustness. Indeed, if we * do not do this, the following may happen: * 1. The buffer contains data from previous operation, e.g., read from * another PEB previously. The data looks like expected, e.g., if we * just do not read anything and return - the caller would not * notice this. E.g., if we are reading a VID header, the buffer may * contain a valid VID header from another PEB. * 2. The driver is buggy and returns us success or -EBADMSG or * -EUCLEAN, but it does not actually put any data to the buffer. * * This may confuse UBI or upper layers - they may think the buffer * contains valid data while in fact it is just old data. This is * especially possible because UBI (and UBIFS) relies on CRC, and * treats data as correct even in case of ECC errors if the CRC is * correct. * * Try to prevent this situation by changing the first byte of the * buffer. */ *((uint8_t *)buf) ^= 0xFF; addr = (loff_t)pnum * ubi->peb_size + offset; retry: err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err) { const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : ""; if (mtd_is_bitflip(err)) { /* * -EUCLEAN is reported if there was a bit-flip which * was corrected, so this is harmless. * * We do not report about it here unless debugging is * enabled. A corresponding message will be printed * later, when it is has been scrubbed. */ ubi_msg(ubi, "fixable bit-flip detected at PEB %d", pnum); ubi_assert(len == read); return UBI_IO_BITFLIPS; } if (retries++ < UBI_IO_RETRIES) { ubi_warn(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read only %zd bytes, retry", err, errstr, len, pnum, offset, read); yield(); goto retry; } ubi_err(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read %zd bytes", err, errstr, len, pnum, offset, read); dump_stack(); /* * The driver should never return -EBADMSG if it failed to read * all the requested data. But some buggy drivers might do * this, so we change it to -EIO. */ if (read != len && mtd_is_eccerr(err)) { ubi_assert(0); err = -EIO; } } else { ubi_assert(len == read); if (ubi_dbg_is_bitflip(ubi)) { dbg_gen("bit-flip (emulated)"); err = UBI_IO_BITFLIPS; } } return err; } /** * ubi_io_write - write data to a physical eraseblock. * @ubi: UBI device description object * @buf: buffer with the data to write * @pnum: physical eraseblock number to write to * @offset: offset within the physical eraseblock where to write * @len: how many bytes to write * * This function writes @len bytes of data from buffer @buf to offset @offset * of physical eraseblock @pnum. If all the data were successfully written, * zero is returned. If an error occurred, this function returns a negative * error code. If %-EIO is returned, the physical eraseblock most probably went * bad. * * Note, in case of an error, it is possible that something was still written * to the flash media, but may be some garbage. */ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len) { int err; size_t written; loff_t addr; dbg_io("write %d bytes to PEB %d:%d", len, pnum, offset); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); ubi_assert(offset % ubi->hdrs_min_io_size == 0); ubi_assert(len > 0 && len % ubi->hdrs_min_io_size == 0); if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } err = self_check_not_bad(ubi, pnum); if (err) return err; /* The area we are writing to has to contain all 0xFF bytes */ err = ubi_self_check_all_ff(ubi, pnum, offset, len); if (err) return err; if (offset >= ubi->leb_start) { /* * We write to the data area of the physical eraseblock. Make * sure it has valid EC and VID headers. */ err = self_check_peb_ec_hdr(ubi, pnum); if (err) return err; err = self_check_peb_vid_hdr(ubi, pnum); if (err) return err; } if (ubi_dbg_is_write_failure(ubi)) { ubi_err(ubi, "cannot write %d bytes to PEB %d:%d (emulated)", len, pnum, offset); dump_stack(); return -EIO; } addr = (loff_t)pnum * ubi->peb_size + offset; err = mtd_write(ubi->mtd, addr, len, &written, buf); if (err) { ubi_err(ubi, "error %d while writing %d bytes to PEB %d:%d, written %zd bytes", err, len, pnum, offset, written); dump_stack(); ubi_dump_flash(ubi, pnum, offset, len); } else ubi_assert(written == len); if (!err) { err = self_check_write(ubi, buf, pnum, offset, len); if (err) return err; /* * Since we always write sequentially, the rest of the PEB has * to contain only 0xFF bytes. */ offset += len; len = ubi->peb_size - offset; if (len) err = ubi_self_check_all_ff(ubi, pnum, offset, len); } return err; } /** * do_sync_erase - synchronously erase a physical eraseblock. * @ubi: UBI device description object * @pnum: the physical eraseblock number to erase * * This function synchronously erases physical eraseblock @pnum and returns * zero in case of success and a negative error code in case of failure. If * %-EIO is returned, the physical eraseblock most probably went bad. */ static int do_sync_erase(struct ubi_device *ubi, int pnum) { int err, retries = 0; struct erase_info ei; dbg_io("erase PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } retry: memset(&ei, 0, sizeof(struct erase_info)); ei.addr = (loff_t)pnum * ubi->peb_size; ei.len = ubi->peb_size; err = mtd_erase(ubi->mtd, &ei); if (err) { if (retries++ < UBI_IO_RETRIES) { ubi_warn(ubi, "error %d while erasing PEB %d, retry", err, pnum); yield(); goto retry; } ubi_err(ubi, "cannot erase PEB %d, error %d", pnum, err); dump_stack(); return err; } err = ubi_self_check_all_ff(ubi, pnum, 0, ubi->peb_size); if (err) return err; if (ubi_dbg_is_erase_failure(ubi)) { ubi_err(ubi, "cannot erase PEB %d (emulated)", pnum); return -EIO; } return 0; } /* Patterns to write to a physical eraseblock when torturing it */ static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; /** * torture_peb - test a supposedly bad physical eraseblock. * @ubi: UBI device description object * @pnum: the physical eraseblock number to test * * This function returns %-EIO if the physical eraseblock did not pass the * test, a positive number of erase operations done if the test was * successfully passed, and other negative error codes in case of other errors. */ static int torture_peb(struct ubi_device *ubi, int pnum) { int err, i, patt_count; ubi_msg(ubi, "run torture test for PEB %d", pnum); patt_count = ARRAY_SIZE(patterns); ubi_assert(patt_count > 0); mutex_lock(&ubi->buf_mutex); for (i = 0; i < patt_count; i++) { err = do_sync_erase(ubi, pnum); if (err) goto out; /* Make sure the PEB contains only 0xFF bytes */ err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; err = ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->peb_size); if (err == 0) { ubi_err(ubi, "erased PEB %d, but a non-0xFF byte found", pnum); err = -EIO; goto out; } /* Write a pattern and check it */ memset(ubi->peb_buf, patterns[i], ubi->peb_size); err = ubi_io_write(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; memset(ubi->peb_buf, ~patterns[i], ubi->peb_size); err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; err = ubi_check_pattern(ubi->peb_buf, patterns[i], ubi->peb_size); if (err == 0) { ubi_err(ubi, "pattern %x checking failed for PEB %d", patterns[i], pnum); err = -EIO; goto out; } } err = patt_count; ubi_msg(ubi, "PEB %d passed torture test, do not mark it as bad", pnum); out: mutex_unlock(&ubi->buf_mutex); if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { /* * If a bit-flip or data integrity error was detected, the test * has not passed because it happened on a freshly erased * physical eraseblock which means something is wrong with it. */ ubi_err(ubi, "read problems on freshly erased PEB %d, must be bad", pnum); err = -EIO; } return err; } /** * nor_erase_prepare - prepare a NOR flash PEB for erasure. * @ubi: UBI device description object * @pnum: physical eraseblock number to prepare * * NOR flash, or at least some of them, have peculiar embedded PEB erasure * algorithm: the PEB is first filled with zeroes, then it is erased. And * filling with zeroes starts from the end of the PEB. This was observed with * Spansion S29GL512N NOR flash. * * This means that in case of a power cut we may end up with intact data at the * beginning of the PEB, and all zeroes at the end of PEB. In other words, the * EC and VID headers are OK, but a large chunk of data at the end of PEB is * zeroed. This makes UBI mistakenly treat this PEB as used and associate it * with an LEB, which leads to subsequent failures (e.g., UBIFS fails). * * This function is called before erasing NOR PEBs and it zeroes out EC and VID * magic numbers in order to invalidate them and prevent the failures. Returns * zero in case of success and a negative error code in case of failure. */ static int nor_erase_prepare(struct ubi_device *ubi, int pnum) { int err; size_t written; loff_t addr; uint32_t data = 0; struct ubi_ec_hdr ec_hdr; struct ubi_vid_io_buf vidb; /* * Note, we cannot generally define VID header buffers on stack, * because of the way we deal with these buffers (see the header * comment in this file). But we know this is a NOR-specific piece of * code, so we can do this. But yes, this is error-prone and we should * (pre-)allocate VID header buffer instead. */ struct ubi_vid_hdr vid_hdr; /* * If VID or EC is valid, we have to corrupt them before erasing. * It is important to first invalidate the EC header, and then the VID * header. Otherwise a power cut may lead to valid EC header and * invalid VID header, in which case UBI will treat this PEB as * corrupted and will try to preserve it, and print scary warnings. */ addr = (loff_t)pnum * ubi->peb_size; err = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0); if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && err != UBI_IO_FF){ err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); if(err) goto error; } ubi_init_vid_buf(ubi, &vidb, &vid_hdr); ubi_assert(&vid_hdr == ubi_get_vid_hdr(&vidb)); err = ubi_io_read_vid_hdr(ubi, pnum, &vidb, 0); if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && err != UBI_IO_FF){ addr += ubi->vid_hdr_aloffset; err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); if (err) goto error; } return 0; error: /* * The PEB contains a valid VID or EC header, but we cannot invalidate * it. Supposedly the flash media or the driver is screwed up, so * return an error. */ ubi_err(ubi, "cannot invalidate PEB %d, write returned %d", pnum, err); ubi_dump_flash(ubi, pnum, 0, ubi->peb_size); return -EIO; } /** * ubi_io_sync_erase - synchronously erase a physical eraseblock. * @ubi: UBI device description object * @pnum: physical eraseblock number to erase * @torture: if this physical eraseblock has to be tortured * * This function synchronously erases physical eraseblock @pnum. If @torture * flag is not zero, the physical eraseblock is checked by means of writing * different patterns to it and reading them back. If the torturing is enabled, * the physical eraseblock is erased more than once. * * This function returns the number of erasures made in case of success, %-EIO * if the erasure failed or the torturing test failed, and other negative error * codes in case of other errors. Note, %-EIO means that the physical * eraseblock is bad. */ int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture) { int err, ret = 0; ubi_assert(pnum >= 0 && pnum < ubi->peb_count); err = self_check_not_bad(ubi, pnum); if (err != 0) return err; if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } if (ubi->nor_flash) { err = nor_erase_prepare(ubi, pnum); if (err) return err; } if (torture) { ret = torture_peb(ubi, pnum); if (ret < 0) return ret; } err = do_sync_erase(ubi, pnum); if (err) return err; return ret + 1; } /** * ubi_io_is_bad - check if a physical eraseblock is bad. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * * This function returns a positive number if the physical eraseblock is bad, * zero if not, and a negative error code if an error occurred. */ int ubi_io_is_bad(const struct ubi_device *ubi, int pnum) { struct mtd_info *mtd = ubi->mtd; ubi_assert(pnum >= 0 && pnum < ubi->peb_count); if (ubi->bad_allowed) { int ret; ret = mtd_block_isbad(mtd, (loff_t)pnum * ubi->peb_size); if (ret < 0) ubi_err(ubi, "error %d while checking if PEB %d is bad", ret, pnum); else if (ret) dbg_io("PEB %d is bad", pnum); return ret; } return 0; } /** * ubi_io_mark_bad - mark a physical eraseblock as bad. * @ubi: UBI device description object * @pnum: the physical eraseblock number to mark * * This function returns zero in case of success and a negative error code in * case of failure. */ int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum) { int err; struct mtd_info *mtd = ubi->mtd; ubi_assert(pnum >= 0 && pnum < ubi->peb_count); if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } if (!ubi->bad_allowed) return 0; err = mtd_block_markbad(mtd, (loff_t)pnum * ubi->peb_size); if (err) ubi_err(ubi, "cannot mark PEB %d bad, error %d", pnum, err); return err; } /** * validate_ec_hdr - validate an erase counter header. * @ubi: UBI device description object * @ec_hdr: the erase counter header to check * * This function returns zero if the erase counter header is OK, and %1 if * not. */ static int validate_ec_hdr(const struct ubi_device *ubi, const struct ubi_ec_hdr *ec_hdr) { long long ec; int vid_hdr_offset, leb_start; ec = be64_to_cpu(ec_hdr->ec); vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset); leb_start = be32_to_cpu(ec_hdr->data_offset); if (ec_hdr->version != UBI_VERSION) { ubi_err(ubi, "node with incompatible UBI version found: this UBI version is %d, image version is %d", UBI_VERSION, (int)ec_hdr->version); goto bad; } if (vid_hdr_offset != ubi->vid_hdr_offset) { ubi_err(ubi, "bad VID header offset %d, expected %d", vid_hdr_offset, ubi->vid_hdr_offset); goto bad; } if (leb_start != ubi->leb_start) { ubi_err(ubi, "bad data offset %d, expected %d", leb_start, ubi->leb_start); goto bad; } if (ec < 0 || ec > UBI_MAX_ERASECOUNTER) { ubi_err(ubi, "bad erase counter %lld", ec); goto bad; } return 0; bad: ubi_err(ubi, "bad EC header"); ubi_dump_ec_hdr(ec_hdr); dump_stack(); return 1; } /** * ubi_io_read_ec_hdr - read and check an erase counter header. * @ubi: UBI device description object * @pnum: physical eraseblock to read from * @ec_hdr: a &struct ubi_ec_hdr object where to store the read erase counter * header * @verbose: be verbose if the header is corrupted or was not found * * This function reads erase counter header from physical eraseblock @pnum and * stores it in @ec_hdr. This function also checks CRC checksum of the read * erase counter header. The following codes may be returned: * * o %0 if the CRC checksum is correct and the header was successfully read; * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected * and corrected by the flash driver; this is harmless but may indicate that * this eraseblock may become bad soon (but may be not); * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error); * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was * a data integrity error (uncorrectable ECC error in case of NAND); * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty) * o a negative error code in case of failure. */ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, struct ubi_ec_hdr *ec_hdr, int verbose) { int err, read_err; uint32_t crc, magic, hdr_crc; dbg_io("read EC header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); if (read_err) { if (read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) return read_err; /* * We read all the data, but either a correctable bit-flip * occurred, or MTD reported a data integrity error * (uncorrectable ECC error in case of NAND). The former is * harmless, the later may mean that the read data is * corrupted. But we have a CRC check-sum and we will detect * this. If the EC header is still OK, we just report this as * there was a bit-flip, to force scrubbing. */ } magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { if (mtd_is_eccerr(read_err)) return UBI_IO_BAD_HDR_EBADMSG; /* * The magic field is wrong. Let's check if we have read all * 0xFF. If yes, this physical eraseblock is assumed to be * empty. */ if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { /* The physical eraseblock is supposedly empty */ if (verbose) ubi_warn(ubi, "no EC header found at PEB %d, only 0xFF bytes", pnum); dbg_bld("no EC header found at PEB %d, only 0xFF bytes", pnum); if (!read_err) return UBI_IO_FF; else return UBI_IO_FF_BITFLIPS; } /* * This is not a valid erase counter header, and these are not * 0xFF bytes. Report that the header is corrupted. */ if (verbose) { ubi_warn(ubi, "bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_EC_HDR_MAGIC); ubi_dump_ec_hdr(ec_hdr); } dbg_bld("bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_EC_HDR_MAGIC); return UBI_IO_BAD_HDR; } crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); if (hdr_crc != crc) { if (verbose) { ubi_warn(ubi, "bad EC header CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); ubi_dump_ec_hdr(ec_hdr); } dbg_bld("bad EC header CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); if (!read_err) return UBI_IO_BAD_HDR; else return UBI_IO_BAD_HDR_EBADMSG; } /* And of course validate what has just been read from the media */ err = validate_ec_hdr(ubi, ec_hdr); if (err) { ubi_err(ubi, "validation failed for PEB %d", pnum); return -EINVAL; } /* * If there was %-EBADMSG, but the header CRC is still OK, report about * a bit-flip to force scrubbing on this PEB. */ return read_err ? UBI_IO_BITFLIPS : 0; } /** * ubi_io_write_ec_hdr - write an erase counter header. * @ubi: UBI device description object * @pnum: physical eraseblock to write to * @ec_hdr: the erase counter header to write * * This function writes erase counter header described by @ec_hdr to physical * eraseblock @pnum. It also fills most fields of @ec_hdr before writing, so * the caller do not have to fill them. Callers must only fill the @ec_hdr->ec * field. * * This function returns zero in case of success and a negative error code in * case of failure. If %-EIO is returned, the physical eraseblock most probably * went bad. */ int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum, struct ubi_ec_hdr *ec_hdr) { int err; uint32_t crc; dbg_io("write EC header to PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ec_hdr->magic = cpu_to_be32(UBI_EC_HDR_MAGIC); ec_hdr->version = UBI_VERSION; ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset); ec_hdr->data_offset = cpu_to_be32(ubi->leb_start); ec_hdr->image_seq = cpu_to_be32(ubi->image_seq); crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); ec_hdr->hdr_crc = cpu_to_be32(crc); err = self_check_ec_hdr(ubi, pnum, ec_hdr); if (err) return err; if (ubi_dbg_power_cut(ubi, POWER_CUT_EC_WRITE)) return -EROFS; err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize); return err; } /** * validate_vid_hdr - validate a volume identifier header. * @ubi: UBI device description object * @vid_hdr: the volume identifier header to check * * This function checks that data stored in the volume identifier header * @vid_hdr. Returns zero if the VID header is OK and %1 if not. */ static int validate_vid_hdr(const struct ubi_device *ubi, const struct ubi_vid_hdr *vid_hdr) { int vol_type = vid_hdr->vol_type; int copy_flag = vid_hdr->copy_flag; int vol_id = be32_to_cpu(vid_hdr->vol_id); int lnum = be32_to_cpu(vid_hdr->lnum); int compat = vid_hdr->compat; int data_size = be32_to_cpu(vid_hdr->data_size); int used_ebs = be32_to_cpu(vid_hdr->used_ebs); int data_pad = be32_to_cpu(vid_hdr->data_pad); int data_crc = be32_to_cpu(vid_hdr->data_crc); int usable_leb_size = ubi->leb_size - data_pad; if (copy_flag != 0 && copy_flag != 1) { ubi_err(ubi, "bad copy_flag"); goto bad; } if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 || data_pad < 0) { ubi_err(ubi, "negative values"); goto bad; } if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) { ubi_err(ubi, "bad vol_id"); goto bad; } if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) { ubi_err(ubi, "bad compat"); goto bad; } if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE && compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE && compat != UBI_COMPAT_REJECT) { ubi_err(ubi, "bad compat"); goto bad; } if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { ubi_err(ubi, "bad vol_type"); goto bad; } if (data_pad >= ubi->leb_size / 2) { ubi_err(ubi, "bad data_pad"); goto bad; } if (data_size > ubi->leb_size) { ubi_err(ubi, "bad data_size"); goto bad; } if (vol_type == UBI_VID_STATIC) { /* * Although from high-level point of view static volumes may * contain zero bytes of data, but no VID headers can contain * zero at these fields, because they empty volumes do not have * mapped logical eraseblocks. */ if (used_ebs == 0) { ubi_err(ubi, "zero used_ebs"); goto bad; } if (data_size == 0) { ubi_err(ubi, "zero data_size"); goto bad; } if (lnum < used_ebs - 1) { if (data_size != usable_leb_size) { ubi_err(ubi, "bad data_size"); goto bad; } } else if (lnum == used_ebs - 1) { if (data_size == 0) { ubi_err(ubi, "bad data_size at last LEB"); goto bad; } } else { ubi_err(ubi, "too high lnum"); goto bad; } } else { if (copy_flag == 0) { if (data_crc != 0) { ubi_err(ubi, "non-zero data CRC"); goto bad; } if (data_size != 0) { ubi_err(ubi, "non-zero data_size"); goto bad; } } else { if (data_size == 0) { ubi_err(ubi, "zero data_size of copy"); goto bad; } } if (used_ebs != 0) { ubi_err(ubi, "bad used_ebs"); goto bad; } } return 0; bad: ubi_err(ubi, "bad VID header"); ubi_dump_vid_hdr(vid_hdr); dump_stack(); return 1; } /** * ubi_io_read_vid_hdr - read and check a volume identifier header. * @ubi: UBI device description object * @pnum: physical eraseblock number to read from * @vidb: the volume identifier buffer to store data in * @verbose: be verbose if the header is corrupted or wasn't found * * This function reads the volume identifier header from physical eraseblock * @pnum and stores it in @vidb. It also checks CRC checksum of the read * volume identifier header. The error codes are the same as in * 'ubi_io_read_ec_hdr()'. * * Note, the implementation of this function is also very similar to * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'. */ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_io_buf *vidb, int verbose) { int err, read_err; uint32_t crc, magic, hdr_crc; struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb); void *p = vidb->buffer; dbg_io("read VID header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_shift + UBI_VID_HDR_SIZE); if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) return read_err; magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { if (mtd_is_eccerr(read_err)) return UBI_IO_BAD_HDR_EBADMSG; if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { if (verbose) ubi_warn(ubi, "no VID header found at PEB %d, only 0xFF bytes", pnum); dbg_bld("no VID header found at PEB %d, only 0xFF bytes", pnum); if (!read_err) return UBI_IO_FF; else return UBI_IO_FF_BITFLIPS; } if (verbose) { ubi_warn(ubi, "bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_VID_HDR_MAGIC); ubi_dump_vid_hdr(vid_hdr); } dbg_bld("bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_VID_HDR_MAGIC); return UBI_IO_BAD_HDR; } crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); if (hdr_crc != crc) { if (verbose) { ubi_warn(ubi, "bad CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); ubi_dump_vid_hdr(vid_hdr); } dbg_bld("bad CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); if (!read_err) return UBI_IO_BAD_HDR; else return UBI_IO_BAD_HDR_EBADMSG; } err = validate_vid_hdr(ubi, vid_hdr); if (err) { ubi_err(ubi, "validation failed for PEB %d", pnum); return -EINVAL; } return read_err ? UBI_IO_BITFLIPS : 0; } /** * ubi_io_write_vid_hdr - write a volume identifier header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to write to * @vidb: the volume identifier buffer to write * * This function writes the volume identifier header described by @vid_hdr to * physical eraseblock @pnum. This function automatically fills the * @vidb->hdr->magic and the @vidb->hdr->version fields, as well as calculates * header CRC checksum and stores it at vidb->hdr->hdr_crc. * * This function returns zero in case of success and a negative error code in * case of failure. If %-EIO is returned, the physical eraseblock probably went * bad. */ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_io_buf *vidb) { struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb); int err; uint32_t crc; void *p = vidb->buffer; dbg_io("write VID header to PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); err = self_check_peb_ec_hdr(ubi, pnum); if (err) return err; vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); vid_hdr->version = UBI_VERSION; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); vid_hdr->hdr_crc = cpu_to_be32(crc); err = self_check_vid_hdr(ubi, pnum, vid_hdr); if (err) return err; if (ubi_dbg_power_cut(ubi, POWER_CUT_VID_WRITE)) return -EROFS; err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_alsize); return err; } /** * self_check_not_bad - ensure that a physical eraseblock is not bad. * @ubi: UBI device description object * @pnum: physical eraseblock number to check * * This function returns zero if the physical eraseblock is good, %-EINVAL if * it is bad and a negative error code if an error occurred. */ static int self_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; if (!ubi_dbg_chk_io(ubi)) return 0; err = ubi_io_is_bad(ubi, pnum); if (!err) return err; ubi_err(ubi, "self-check failed for PEB %d", pnum); dump_stack(); return err > 0 ? -EINVAL : err; } /** * self_check_ec_hdr - check if an erase counter header is all right. * @ubi: UBI device description object * @pnum: physical eraseblock number the erase counter header belongs to * @ec_hdr: the erase counter header to check * * This function returns zero if the erase counter header contains valid * values, and %-EINVAL if not. */ static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_ec_hdr *ec_hdr) { int err; uint32_t magic; if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { ubi_err(ubi, "bad magic %#08x, must be %#08x", magic, UBI_EC_HDR_MAGIC); goto fail; } err = validate_ec_hdr(ubi, ec_hdr); if (err) { ubi_err(ubi, "self-check failed for PEB %d", pnum); goto fail; } return 0; fail: ubi_dump_ec_hdr(ec_hdr); dump_stack(); return -EINVAL; } /** * self_check_peb_ec_hdr - check erase counter header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * * This function returns zero if the erase counter header is all right and and * a negative error code if not or if an error occurred. */ static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) { int err; uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; if (!ubi_dbg_chk_io(ubi)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); if (!ec_hdr) return -ENOMEM; err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) goto exit; crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); if (hdr_crc != crc) { ubi_err(ubi, "bad CRC, calculated %#08x, read %#08x", crc, hdr_crc); ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_dump_ec_hdr(ec_hdr); dump_stack(); err = -EINVAL; goto exit; } err = self_check_ec_hdr(ubi, pnum, ec_hdr); exit: kfree(ec_hdr); return err; } /** * self_check_vid_hdr - check that a volume identifier header is all right. * @ubi: UBI device description object * @pnum: physical eraseblock number the volume identifier header belongs to * @vid_hdr: the volume identifier header to check * * This function returns zero if the volume identifier header is all right, and * %-EINVAL if not. */ static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_vid_hdr *vid_hdr) { int err; uint32_t magic; if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { ubi_err(ubi, "bad VID header magic %#08x at PEB %d, must be %#08x", magic, pnum, UBI_VID_HDR_MAGIC); goto fail; } err = validate_vid_hdr(ubi, vid_hdr); if (err) { ubi_err(ubi, "self-check failed for PEB %d", pnum); goto fail; } return err; fail: ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_dump_vid_hdr(vid_hdr); dump_stack(); return -EINVAL; } /** * self_check_peb_vid_hdr - check volume identifier header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * * This function returns zero if the volume identifier header is all right, * and a negative error code if not or if an error occurred. */ static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) { int err; uint32_t crc, hdr_crc; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; void *p; if (!ubi_dbg_chk_io(ubi)) return 0; vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; vid_hdr = ubi_get_vid_hdr(vidb); p = vidb->buffer; err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_alsize); if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) goto exit; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); if (hdr_crc != crc) { ubi_err(ubi, "bad VID header CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_dump_vid_hdr(vid_hdr); dump_stack(); err = -EINVAL; goto exit; } err = self_check_vid_hdr(ubi, pnum, vid_hdr); exit: ubi_free_vid_buf(vidb); return err; } /** * self_check_write - make sure write succeeded. * @ubi: UBI device description object * @buf: buffer with data which were written * @pnum: physical eraseblock number the data were written to * @offset: offset within the physical eraseblock the data were written to * @len: how many bytes were written * * This functions reads data which were recently written and compares it with * the original data buffer - the data have to match. Returns zero if the data * match and a negative error code if not or in case of failure. */ static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len) { int err, i; size_t read; void *buf1; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; if (!ubi_dbg_chk_io(ubi)) return 0; buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); if (!buf1) { ubi_err(ubi, "cannot allocate memory to check writes"); return 0; } err = mtd_read(ubi->mtd, addr, len, &read, buf1); if (err && !mtd_is_bitflip(err)) goto out_free; for (i = 0; i < len; i++) { uint8_t c = ((uint8_t *)buf)[i]; uint8_t c1 = ((uint8_t *)buf1)[i]; int dump_len; if (c == c1) continue; ubi_err(ubi, "self-check failed for PEB %d:%d, len %d", pnum, offset, len); ubi_msg(ubi, "data differ at position %d", i); dump_len = max_t(int, 128, len - i); ubi_msg(ubi, "hex dump of the original buffer from %d to %d", i, i + dump_len); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf + i, dump_len, 1); ubi_msg(ubi, "hex dump of the read buffer from %d to %d", i, i + dump_len); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf1 + i, dump_len, 1); dump_stack(); err = -EINVAL; goto out_free; } vfree(buf1); return 0; out_free: vfree(buf1); return err; } /** * ubi_self_check_all_ff - check that a region of flash is empty. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @offset: the starting offset within the physical eraseblock to check * @len: the length of the region to check * * This function returns zero if only 0xFF bytes are present at offset * @offset of the physical eraseblock @pnum, and a negative error code if not * or if an error occurred. */ int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) { size_t read; int err; void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; if (!ubi_dbg_chk_io(ubi)) return 0; buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); if (!buf) { ubi_err(ubi, "cannot allocate memory to check for 0xFFs"); return 0; } err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err && !mtd_is_bitflip(err)) { ubi_err(ubi, "err %d while reading %d bytes from PEB %d:%d, read %zd bytes", err, len, pnum, offset, read); goto error; } err = ubi_check_pattern(buf, 0xFF, len); if (err == 0) { ubi_err(ubi, "flash region at PEB %d:%d, length %d does not contain all 0xFF bytes", pnum, offset, len); goto fail; } vfree(buf); return 0; fail: ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_msg(ubi, "hex dump of the %d-%d region", offset, offset + len); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); err = -EINVAL; error: dump_stack(); vfree(buf); return err; }
304 848 123 179 320 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 /* SPDX-License-Identifier: GPL-2.0 */ /* * Wireless configuration interface internals. * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H #include <linux/list.h> #include <linux/netdevice.h> #include <linux/rbtree.h> #include <linux/debugfs.h> #include <linux/rfkill.h> #include <linux/workqueue.h> #include <linux/rtnetlink.h> #include <net/genetlink.h> #include <net/cfg80211.h> #include "reg.h" #define WIPHY_IDX_INVALID -1 struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; /* rfkill support */ struct rfkill_ops rfkill_ops; struct rfkill *rfkill; struct work_struct rfkill_sync; /* ISO / IEC 3166 alpha2 for which this device is receiving * country IEs on, this can help disregard country IEs from APs * on the same alpha2 quickly. The alpha2 may differ from * cfg80211_regdomain's alpha2 when an intersection has occurred. * If the AP is reconfigured this can also be used to tell us if * the country on the country IE changed. */ char country_ie_alpha2[2]; /* * the driver requests the regulatory core to set this regulatory * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED * devices using the regulatory_set_wiphy_regd() API */ const struct ieee80211_regdomain *requested_regd; /* If a Country IE has been received this tells us the environment * which its telling us its in. This defaults to ENVIRON_ANY */ enum environment_cap env; /* wiphy index, internal only */ int wiphy_idx; /* protected by RTNL */ int devlist_generation, wdev_id; int opencount; wait_queue_head_t dev_wait; struct list_head beacon_registrations; spinlock_t beacon_registrations_lock; struct list_head mlme_unreg; spinlock_t mlme_unreg_lock; struct work_struct mlme_unreg_wk; /* protected by RTNL only */ int num_running_ifaces; int num_running_monitor_ifaces; /* BSSes/scanning */ spinlock_t bss_lock; struct list_head bss_list; struct rb_root bss_tree; u32 bss_generation; u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; struct list_head sched_scan_req_list; time64_t suspend_at; struct work_struct scan_done_wk; struct genl_info *cur_cmd_info; struct work_struct conn_work; struct work_struct event_work; struct delayed_work dfs_update_channels_wk; /* netlink port which started critical protocol (0 means not started) */ u32 crit_proto_nlportid; struct cfg80211_coalesce *coalesce; struct work_struct destroy_work; struct work_struct sched_scan_stop_wk; struct work_struct sched_scan_res_wk; struct cfg80211_chan_def radar_chandef; struct work_struct propagate_radar_detect_wk; struct cfg80211_chan_def cac_done_chandef; struct work_struct propagate_cac_done_wk; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy) { BUG_ON(!wiphy); return container_of(wiphy, struct cfg80211_registered_device, wiphy); } static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { #ifdef CONFIG_PM int i; if (!rdev->wiphy.wowlan_config) return; for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++) kfree(rdev->wiphy.wowlan_config->patterns[i].mask); kfree(rdev->wiphy.wowlan_config->patterns); if (rdev->wiphy.wowlan_config->tcp && rdev->wiphy.wowlan_config->tcp->sock) sock_release(rdev->wiphy.wowlan_config->tcp->sock); kfree(rdev->wiphy.wowlan_config->tcp); kfree(rdev->wiphy.wowlan_config->nd_config); kfree(rdev->wiphy.wowlan_config); #endif } extern struct workqueue_struct *cfg80211_wq; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; struct rb_node rbn; u64 ts_boottime; unsigned long ts; unsigned long refcount; atomic_t hold; /* time at the start of the reception of the first octet of the * timestamp field of the last beacon/probe received for this BSS. * The time is the TSF of the BSS specified by %parent_bssid. */ u64 parent_tsf; /* the BSS according to which %parent_tsf is set. This is set to * the BSS that the interface that requested the scan was connected to * when the beacon/probe was received. */ u8 parent_bssid[ETH_ALEN] __aligned(2); /* must be last because of priv member */ struct cfg80211_bss pub; }; static inline struct cfg80211_internal_bss *bss_from_pub(struct cfg80211_bss *pub) { return container_of(pub, struct cfg80211_internal_bss, pub); } static inline void cfg80211_hold_bss(struct cfg80211_internal_bss *bss) { atomic_inc(&bss->hold); } static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss) { int r = atomic_dec_return(&bss->hold); WARN_ON(r < 0); } struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx); int get_wiphy_idx(struct wiphy *wiphy); struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) { mutex_lock(&wdev->mtx); __acquire(wdev->mtx); } static inline void wdev_unlock(struct wireless_dev *wdev) __releases(wdev) { __release(wdev->mtx); mutex_unlock(&wdev->mtx); } #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) { ASSERT_RTNL(); return rdev->num_running_ifaces == rdev->num_running_monitor_ifaces && rdev->num_running_ifaces > 0; } enum cfg80211_event_type { EVENT_CONNECT_RESULT, EVENT_ROAMED, EVENT_DISCONNECTED, EVENT_IBSS_JOINED, EVENT_STOPPED, EVENT_PORT_AUTHORIZED, }; struct cfg80211_event { struct list_head list; enum cfg80211_event_type type; union { struct cfg80211_connect_resp_params cr; struct cfg80211_roam_info rm; struct { const u8 *ie; size_t ie_len; u16 reason; bool locally_generated; } dc; struct { u8 bssid[ETH_ALEN]; struct ieee80211_channel *channel; } ij; struct { u8 bssid[ETH_ALEN]; } pa; }; }; struct cfg80211_cached_keys { struct key_params params[CFG80211_MAX_WEP_KEYS]; u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104]; int def; }; enum cfg80211_chan_mode { CHAN_MODE_UNDEFINED, CHAN_MODE_SHARED, CHAN_MODE_EXCLUSIVE, }; struct cfg80211_beacon_registration { struct list_head list; u32 nlportid; }; struct cfg80211_cqm_config { u32 rssi_hyst; s32 last_rssi_event_value; int n_rssi_thresholds; s32 rssi_thresholds[0]; }; void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); /* IBSS */ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_ibss_params *params, struct cfg80211_cached_keys *connkeys); void cfg80211_clear_ibss(struct net_device *dev, bool nowext); int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel); int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); /* mesh */ extern const struct mesh_config default_mesh_config; extern const struct mesh_setup default_mesh_setup; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_chan_def *chandef); /* OCB */ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ocb_setup *setup); int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ocb_setup *setup); int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev); /* AP */ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, bool notify); /* MLME */ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_auth_type auth_type, const u8 *bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, const u8 *auth_data, int auth_data_len); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *ssid, int ssid_len, struct cfg80211_assoc_request *req); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, bool local_state_change); int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, bool local_state_change); void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, int match_len); void cfg80211_mlme_unreg_wk(struct work_struct *wk); void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); /* SME events */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, struct cfg80211_cached_keys *connkeys, const u8 *prev_bssid); void __cfg80211_connect_result(struct net_device *dev, struct cfg80211_connect_resp_params *params, bool wextev); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_autodisconnect_wk(struct work_struct *work); /* SME implementation */ void cfg80211_conn_work(struct work_struct *work); void cfg80211_sme_scan_done(struct net_device *dev); bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status); void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len); void cfg80211_sme_disassoc(struct wireless_dev *wdev); void cfg80211_sme_deauth(struct wireless_dev *wdev); void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, int key_idx, bool pairwise); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); void __cfg80211_scan_done(struct work_struct *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message); void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *req); int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev, bool want_multi); void cfg80211_sched_scan_results_wk(struct work_struct *work); int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *req, bool driver_initiated); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, u64 reqid, bool driver_initiated); void cfg80211_upload_connect_keys(struct wireless_dev *wdev); int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); extern struct work_struct cfg80211_disconnect_work; /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against * @chandef: the channel definition to check * * Checks if chandef is usable and we can/need start CAC on such channel. * * Return: Return true if all channels available and at least * one channel require CAC (NL80211_DFS_USABLE) */ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); void cfg80211_dfs_channels_update_work(struct work_struct *work); unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan); bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev); bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan); static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { unsigned long end = jiffies; if (end >= start) return jiffies_to_msecs(end - start); return jiffies_to_msecs(end + (ULONG_MAX - start) + 1); } void cfg80211_get_chan_state(struct wireless_dev *wdev, struct ieee80211_channel **chan, enum cfg80211_chan_mode *chanmode, u8 *radar_detect); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, const u8 *rates, unsigned int n_rates, u32 *mask); int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, u32 beacon_int); void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); void __cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) #else /* * Trick to enable using it as a condition, * and also not give a warning when it's * not used that way. */ #define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; }) #endif void cfg80211_cqm_config_free(struct wireless_dev *wdev); #endif /* __NET_WIRELESS_CORE_H */
21 51 13 50 17 17 45 45 31 129 125 120 20 125 125 32 18 17 31 20 32 26 4 26 17 16 7 19 8 2 9 3 3 5 2 6 53 412 412 16 6 4 3 1 23 68 20 1 4 3 6 59 52 20 36 2 420 413 413 412 39 40 3 2 39 37 27 13 27 27 38 10 152 44 34 33 10 5 5 10 152 2 122 2 1 1 1 1 1 1 122 1 19 19 120 13 5 11 6 110 258 259 191 145 145 145 145 145 145 144 2200 144 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/fcntl.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/syscalls.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/sched/task.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/capability.h> #include <linux/dnotify.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/pipe_fs_i.h> #include <linux/security.h> #include <linux/ptrace.h> #include <linux/signal.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/memfd.h> #include <linux/compat.h> #include <linux/poll.h> #include <asm/siginfo.h> #include <linux/uaccess.h> #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) { struct inode * inode = file_inode(filp); int error = 0; /* * O_APPEND cannot be cleared if the file is marked as append-only * and the file is open for write. */ if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) return -EPERM; /* O_NOATIME can only be set by the owner or superuser */ if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) if (!inode_owner_or_capable(inode)) return -EPERM; /* required for strict SunOS emulation */ if (O_NONBLOCK != O_NDELAY) if (arg & O_NDELAY) arg |= O_NONBLOCK; /* Pipe packetized mode is controlled by O_DIRECT flag */ if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) { if (!filp->f_mapping || !filp->f_mapping->a_ops || !filp->f_mapping->a_ops->direct_IO) return -EINVAL; } if (filp->f_op->check_flags) error = filp->f_op->check_flags(arg); if (error) return error; /* * ->fasync() is responsible for setting the FASYNC bit. */ if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); if (error < 0) goto out; if (error > 0) error = 0; } spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); spin_unlock(&filp->f_lock); out: return error; } static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int force) { write_lock_irq(&filp->f_owner.lock); if (force || !filp->f_owner.pid) { put_pid(filp->f_owner.pid); filp->f_owner.pid = get_pid(pid); filp->f_owner.pid_type = type; if (pid) { const struct cred *cred = current_cred(); filp->f_owner.uid = cred->uid; filp->f_owner.euid = cred->euid; } } write_unlock_irq(&filp->f_owner.lock); } void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { security_file_set_fowner(filp); f_modown(filp, pid, type, force); } EXPORT_SYMBOL(__f_setown); int f_setown(struct file *filp, unsigned long arg, int force) { enum pid_type type; struct pid *pid = NULL; int who = arg, ret = 0; type = PIDTYPE_TGID; if (who < 0) { /* avoid overflow below */ if (who == INT_MIN) return -EINVAL; type = PIDTYPE_PGID; who = -who; } rcu_read_lock(); if (who) { pid = find_vpid(who); if (!pid) ret = -ESRCH; } if (!ret) __f_setown(filp, pid, type, force); rcu_read_unlock(); return ret; } EXPORT_SYMBOL(f_setown); void f_delown(struct file *filp) { f_modown(filp, NULL, PIDTYPE_TGID, 1); } pid_t f_getown(struct file *filp) { pid_t pid; read_lock(&filp->f_owner.lock); pid = pid_vnr(filp->f_owner.pid); if (filp->f_owner.pid_type == PIDTYPE_PGID) pid = -pid; read_unlock(&filp->f_owner.lock); return pid; } static int f_setown_ex(struct file *filp, unsigned long arg) { struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner; struct pid *pid; int type; int ret; ret = copy_from_user(&owner, owner_p, sizeof(owner)); if (ret) return -EFAULT; switch (owner.type) { case F_OWNER_TID: type = PIDTYPE_PID; break; case F_OWNER_PID: type = PIDTYPE_TGID; break; case F_OWNER_PGRP: type = PIDTYPE_PGID; break; default: return -EINVAL; } rcu_read_lock(); pid = find_vpid(owner.pid); if (owner.pid && !pid) ret = -ESRCH; else __f_setown(filp, pid, type, 1); rcu_read_unlock(); return ret; } static int f_getown_ex(struct file *filp, unsigned long arg) { struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner; int ret = 0; read_lock(&filp->f_owner.lock); owner.pid = pid_vnr(filp->f_owner.pid); switch (filp->f_owner.pid_type) { case PIDTYPE_PID: owner.type = F_OWNER_TID; break; case PIDTYPE_TGID: owner.type = F_OWNER_PID; break; case PIDTYPE_PGID: owner.type = F_OWNER_PGRP; break; default: WARN_ON(1); ret = -EINVAL; break; } read_unlock(&filp->f_owner.lock); if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); if (ret) ret = -EFAULT; } return ret; } #ifdef CONFIG_CHECKPOINT_RESTORE static int f_getowner_uids(struct file *filp, unsigned long arg) { struct user_namespace *user_ns = current_user_ns(); uid_t __user *dst = (void __user *)arg; uid_t src[2]; int err; read_lock(&filp->f_owner.lock); src[0] = from_kuid(user_ns, filp->f_owner.uid); src[1] = from_kuid(user_ns, filp->f_owner.euid); read_unlock(&filp->f_owner.lock); err = put_user(src[0], &dst[0]); err |= put_user(src[1], &dst[1]); return err; } #else static int f_getowner_uids(struct file *filp, unsigned long arg) { return -EINVAL; } #endif static bool rw_hint_valid(enum rw_hint hint) { switch (hint) { case RWF_WRITE_LIFE_NOT_SET: case RWH_WRITE_LIFE_NONE: case RWH_WRITE_LIFE_SHORT: case RWH_WRITE_LIFE_MEDIUM: case RWH_WRITE_LIFE_LONG: case RWH_WRITE_LIFE_EXTREME: return true; default: return false; } } static long fcntl_rw_hint(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(file); u64 *argp = (u64 __user *)arg; enum rw_hint hint; u64 h; switch (cmd) { case F_GET_FILE_RW_HINT: h = file_write_hint(file); if (copy_to_user(argp, &h, sizeof(*argp))) return -EFAULT; return 0; case F_SET_FILE_RW_HINT: if (copy_from_user(&h, argp, sizeof(h))) return -EFAULT; hint = (enum rw_hint) h; if (!rw_hint_valid(hint)) return -EINVAL; spin_lock(&file->f_lock); file->f_write_hint = hint; spin_unlock(&file->f_lock); return 0; case F_GET_RW_HINT: h = inode->i_write_hint; if (copy_to_user(argp, &h, sizeof(*argp))) return -EFAULT; return 0; case F_SET_RW_HINT: if (copy_from_user(&h, argp, sizeof(h))) return -EFAULT; hint = (enum rw_hint) h; if (!rw_hint_valid(hint)) return -EINVAL; inode_lock(inode); inode->i_write_hint = hint; inode_unlock(inode); return 0; default: return -EINVAL; } } static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { void __user *argp = (void __user *)arg; struct flock flock; long err = -EINVAL; switch (cmd) { case F_DUPFD: err = f_dupfd(arg, filp, 0); break; case F_DUPFD_CLOEXEC: err = f_dupfd(arg, filp, O_CLOEXEC); break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; break; case F_SETFD: err = 0; set_close_on_exec(fd, arg & FD_CLOEXEC); break; case F_GETFL: err = filp->f_flags; break; case F_SETFL: err = setfl(fd, filp, arg); break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ case F_OFD_GETLK: #endif case F_GETLK: if (copy_from_user(&flock, argp, sizeof(flock))) return -EFAULT; err = fcntl_getlk(filp, cmd, &flock); if (!err && copy_to_user(argp, &flock, sizeof(flock))) return -EFAULT; break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ case F_OFD_SETLK: case F_OFD_SETLKW: #endif /* Fallthrough */ case F_SETLK: case F_SETLKW: if (copy_from_user(&flock, argp, sizeof(flock))) return -EFAULT; err = fcntl_setlk(fd, filp, cmd, &flock); break; case F_GETOWN: /* * XXX If f_owner is a process group, the * negative return value will get converted * into an error. Oops. If we keep the * current syscall conventions, the only way * to fix this will be in libc. */ err = f_getown(filp); force_successful_syscall_return(); break; case F_SETOWN: err = f_setown(filp, arg, 1); break; case F_GETOWN_EX: err = f_getown_ex(filp, arg); break; case F_SETOWN_EX: err = f_setown_ex(filp, arg); break; case F_GETOWNER_UIDS: err = f_getowner_uids(filp, arg); break; case F_GETSIG: err = filp->f_owner.signum; break; case F_SETSIG: /* arg == 0 restores default behaviour. */ if (!valid_signal(arg)) { break; } err = 0; filp->f_owner.signum = arg; break; case F_GETLEASE: err = fcntl_getlease(filp); break; case F_SETLEASE: err = fcntl_setlease(fd, filp, arg); break; case F_NOTIFY: err = fcntl_dirnotify(fd, filp, arg); break; case F_SETPIPE_SZ: case F_GETPIPE_SZ: err = pipe_fcntl(filp, cmd, arg); break; case F_ADD_SEALS: case F_GET_SEALS: err = memfd_fcntl(filp, cmd, arg); break; case F_GET_RW_HINT: case F_SET_RW_HINT: case F_GET_FILE_RW_HINT: case F_SET_FILE_RW_HINT: err = fcntl_rw_hint(filp, cmd, arg); break; default: break; } return err; } static int check_fcntl_cmd(unsigned cmd) { switch (cmd) { case F_DUPFD: case F_DUPFD_CLOEXEC: case F_GETFD: case F_SETFD: case F_GETFL: return 1; } return 0; } SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct fd f = fdget_raw(fd); long err = -EBADF; if (!f.file) goto out; if (unlikely(f.file->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) goto out1; } err = security_file_fcntl(f.file, cmd, arg); if (!err) err = do_fcntl(fd, cmd, arg, f.file); out1: fdput(f); out: return err; } #if BITS_PER_LONG == 32 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { void __user *argp = (void __user *)arg; struct fd f = fdget_raw(fd); struct flock64 flock; long err = -EBADF; if (!f.file) goto out; if (unlikely(f.file->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) goto out1; } err = security_file_fcntl(f.file, cmd, arg); if (err) goto out1; switch (cmd) { case F_GETLK64: case F_OFD_GETLK: err = -EFAULT; if (copy_from_user(&flock, argp, sizeof(flock))) break; err = fcntl_getlk64(f.file, cmd, &flock); if (!err && copy_to_user(argp, &flock, sizeof(flock))) err = -EFAULT; break; case F_SETLK64: case F_SETLKW64: case F_OFD_SETLK: case F_OFD_SETLKW: err = -EFAULT; if (copy_from_user(&flock, argp, sizeof(flock))) break; err = fcntl_setlk64(fd, f.file, cmd, &flock); break; default: err = do_fcntl(fd, cmd, arg, f.file); break; } out1: fdput(f); out: return err; } #endif #ifdef CONFIG_COMPAT /* careful - don't use anywhere else */ #define copy_flock_fields(dst, src) \ (dst)->l_type = (src)->l_type; \ (dst)->l_whence = (src)->l_whence; \ (dst)->l_start = (src)->l_start; \ (dst)->l_len = (src)->l_len; \ (dst)->l_pid = (src)->l_pid; static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl) { struct compat_flock fl; if (copy_from_user(&fl, ufl, sizeof(struct compat_flock))) return -EFAULT; copy_flock_fields(kfl, &fl); return 0; } static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl) { struct compat_flock64 fl; if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64))) return -EFAULT; copy_flock_fields(kfl, &fl); return 0; } static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl) { struct compat_flock fl; memset(&fl, 0, sizeof(struct compat_flock)); copy_flock_fields(&fl, kfl); if (copy_to_user(ufl, &fl, sizeof(struct compat_flock))) return -EFAULT; return 0; } static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl) { struct compat_flock64 fl; BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start)); BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len)); memset(&fl, 0, sizeof(struct compat_flock64)); copy_flock_fields(&fl, kfl); if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) return -EFAULT; return 0; } #undef copy_flock_fields static unsigned int convert_fcntl_cmd(unsigned int cmd) { switch (cmd) { case F_GETLK64: return F_GETLK; case F_SETLK64: return F_SETLK; case F_SETLKW64: return F_SETLKW; } return cmd; } /* * GETLK was successful and we need to return the data, but it needs to fit in * the compat structure. * l_start shouldn't be too big, unless the original start + end is greater than * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return * -EOVERFLOW in that case. l_len could be too big, in which case we just * truncate it, and only allow the app to see that part of the conflicting lock * that might make sense to it anyway */ static int fixup_compat_flock(struct flock *flock) { if (flock->l_start > COMPAT_OFF_T_MAX) return -EOVERFLOW; if (flock->l_len > COMPAT_OFF_T_MAX) flock->l_len = COMPAT_OFF_T_MAX; return 0; } static long do_compat_fcntl64(unsigned int fd, unsigned int cmd, compat_ulong_t arg) { struct fd f = fdget_raw(fd); struct flock flock; long err = -EBADF; if (!f.file) return err; if (unlikely(f.file->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) goto out_put; } err = security_file_fcntl(f.file, cmd, arg); if (err) goto out_put; switch (cmd) { case F_GETLK: err = get_compat_flock(&flock, compat_ptr(arg)); if (err) break; err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); if (err) break; err = fixup_compat_flock(&flock); if (!err) err = put_compat_flock(&flock, compat_ptr(arg)); break; case F_GETLK64: case F_OFD_GETLK: err = get_compat_flock64(&flock, compat_ptr(arg)); if (err) break; err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); if (!err) err = put_compat_flock64(&flock, compat_ptr(arg)); break; case F_SETLK: case F_SETLKW: err = get_compat_flock(&flock, compat_ptr(arg)); if (err) break; err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); break; case F_SETLK64: case F_SETLKW64: case F_OFD_SETLK: case F_OFD_SETLKW: err = get_compat_flock64(&flock, compat_ptr(arg)); if (err) break; err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); break; default: err = do_fcntl(fd, cmd, arg, f.file); break; } out_put: fdput(f); return err; } COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { return do_compat_fcntl64(fd, cmd, arg); } COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { switch (cmd) { case F_GETLK64: case F_SETLK64: case F_SETLKW64: case F_OFD_GETLK: case F_OFD_SETLK: case F_OFD_SETLKW: return -EINVAL; } return do_compat_fcntl64(fd, cmd, arg); } #endif /* Table to convert sigio signal codes into poll band bitmaps */ static const __poll_t band_table[NSIGPOLL] = { EPOLLIN | EPOLLRDNORM, /* POLL_IN */ EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */ EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */ EPOLLERR, /* POLL_ERR */ EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */ EPOLLHUP | EPOLLERR /* POLL_HUP */ }; static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { const struct cred *cred; int ret; rcu_read_lock(); cred = __task_cred(p); ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && !security_file_send_sigiotask(p, fown, sig)); rcu_read_unlock(); return ret; } static void send_sigio_to_task(struct task_struct *p, struct fown_struct *fown, int fd, int reason, enum pid_type type) { /* * F_SETSIG can change ->signum lockless in parallel, make * sure we read it once and use the same value throughout. */ int signum = READ_ONCE(fown->signum); if (!sigio_perm(p, fown, signum)) return; switch (signum) { siginfo_t si; default: /* Queue a rt signal with the appropriate fd as its value. We use SI_SIGIO as the source, not SI_KERNEL, since kernel signals always get delivered even if we can't queue. Failure to queue in this case _should_ be reported; we fall back to SIGIO in that case. --sct */ clear_siginfo(&si); si.si_signo = signum; si.si_errno = 0; si.si_code = reason; /* * Posix definies POLL_IN and friends to be signal * specific si_codes for SIG_POLL. Linux extended * these si_codes to other signals in a way that is * ambiguous if other signals also have signal * specific si_codes. In that case use SI_SIGIO instead * to remove the ambiguity. */ if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) si.si_code = SI_SIGIO; /* Make sure we are called with one of the POLL_* reasons, otherwise we could leak kernel stack into userspace. */ BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); if (reason - POLL_IN >= NSIGPOLL) si.si_band = ~0L; else si.si_band = mangle_poll(band_table[reason - POLL_IN]); si.si_fd = fd; if (!do_send_sig_info(signum, &si, p, type)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); } } void send_sigio(struct fown_struct *fown, int fd, int band) { struct task_struct *p; enum pid_type type; unsigned long flags; struct pid *pid; read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; pid = fown->pid; if (!pid) goto out_unlock_fown; if (type <= PIDTYPE_TGID) { rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (p) send_sigio_to_task(p, fown, fd, band, type); rcu_read_unlock(); } else { read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { send_sigio_to_task(p, fown, fd, band, type); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); } out_unlock_fown: read_unlock_irqrestore(&fown->lock, flags); } static void send_sigurg_to_task(struct task_struct *p, struct fown_struct *fown, enum pid_type type) { if (sigio_perm(p, fown, SIGURG)) do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); } int send_sigurg(struct fown_struct *fown) { struct task_struct *p; enum pid_type type; struct pid *pid; unsigned long flags; int ret = 0; read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; pid = fown->pid; if (!pid) goto out_unlock_fown; ret = 1; if (type <= PIDTYPE_TGID) { rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (p) send_sigurg_to_task(p, fown, type); rcu_read_unlock(); } else { read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { send_sigurg_to_task(p, fown, type); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); } out_unlock_fown: read_unlock_irqrestore(&fown->lock, flags); return ret; } static DEFINE_SPINLOCK(fasync_lock); static struct kmem_cache *fasync_cache __read_mostly; static void fasync_free_rcu(struct rcu_head *head) { kmem_cache_free(fasync_cache, container_of(head, struct fasync_struct, fa_rcu)); } /* * Remove a fasync entry. If successfully removed, return * positive and clear the FASYNC flag. If no entry exists, * do nothing and return 0. * * NOTE! It is very important that the FASYNC flag always * match the state "is the filp on a fasync list". * */ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) { struct fasync_struct *fa, **fp; int result = 0; spin_lock(&filp->f_lock); spin_lock(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { if (fa->fa_file != filp) continue; write_lock_irq(&fa->fa_lock); fa->fa_file = NULL; write_unlock_irq(&fa->fa_lock); *fp = fa->fa_next; call_rcu(&fa->fa_rcu, fasync_free_rcu); filp->f_flags &= ~FASYNC; result = 1; break; } spin_unlock(&fasync_lock); spin_unlock(&filp->f_lock); return result; } struct fasync_struct *fasync_alloc(void) { return kmem_cache_alloc(fasync_cache, GFP_KERNEL); } /* * NOTE! This can be used only for unused fasync entries: * entries that actually got inserted on the fasync list * need to be released by rcu - see fasync_remove_entry. */ void fasync_free(struct fasync_struct *new) { kmem_cache_free(fasync_cache, new); } /* * Insert a new entry into the fasync list. Return the pointer to the * old one if we didn't use the new one. * * NOTE! It is very important that the FASYNC flag always * match the state "is the filp on a fasync list". */ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) { struct fasync_struct *fa, **fp; spin_lock(&filp->f_lock); spin_lock(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { if (fa->fa_file != filp) continue; write_lock_irq(&fa->fa_lock); fa->fa_fd = fd; write_unlock_irq(&fa->fa_lock); goto out; } rwlock_init(&new->fa_lock); new->magic = FASYNC_MAGIC; new->fa_file = filp; new->fa_fd = fd; new->fa_next = *fapp; rcu_assign_pointer(*fapp, new); filp->f_flags |= FASYNC; out: spin_unlock(&fasync_lock); spin_unlock(&filp->f_lock); return fa; } /* * Add a fasync entry. Return negative on error, positive if * added, and zero if did nothing but change an existing one. */ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) { struct fasync_struct *new; new = fasync_alloc(); if (!new) return -ENOMEM; /* * fasync_insert_entry() returns the old (update) entry if * it existed. * * So free the (unused) new entry and return 0 to let the * caller know that we didn't add any new fasync entries. */ if (fasync_insert_entry(fd, filp, fapp, new)) { fasync_free(new); return 0; } return 1; } /* * fasync_helper() is used by almost all character device drivers * to set up the fasync queue, and for regular files by the file * lease code. It returns negative on error, 0 if it did no changes * and positive if it added/deleted the entry. */ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) { if (!on) return fasync_remove_entry(filp, fapp); return fasync_add_entry(fd, filp, fapp); } EXPORT_SYMBOL(fasync_helper); /* * rcu_read_lock() is held */ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; unsigned long flags; if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a queued signum: SIGURG has its own default signalling mechanism. */ if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } } void kill_fasync(struct fasync_struct **fp, int sig, int band) { /* First a quick test without locking: usually * the list is empty. */ if (*fp) { rcu_read_lock(); kill_fasync_rcu(rcu_dereference(*fp), sig, band); rcu_read_unlock(); } } EXPORT_SYMBOL(kill_fasync); static int __init fcntl_init(void) { /* * Please add new bits here to ensure allocation uniqueness. * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | __FMODE_EXEC | __FMODE_NONOTIFY)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); return 0; } module_init(fcntl_init)
219 220 38 226 226 216 7 183 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 // SPDX-License-Identifier: GPL-2.0 #include <linux/fs.h> #include <linux/sched.h> #include <linux/slab.h> #include "internal.h" #include "mount.h" static DEFINE_SPINLOCK(pin_lock); void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); hlist_del_init(&pin->m_list); hlist_del_init(&pin->s_list); spin_unlock(&pin_lock); spin_lock_irq(&pin->wait.lock); pin->done = 1; wake_up_locked(&pin->wait); spin_unlock_irq(&pin->wait.lock); } void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) { spin_lock(&pin_lock); if (p) hlist_add_head(&pin->s_list, p); hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); spin_unlock(&pin_lock); } void pin_insert(struct fs_pin *pin, struct vfsmount *m) { pin_insert_group(pin, m, &m->mnt_sb->s_pins); } void pin_kill(struct fs_pin *p) { wait_queue_entry_t wait; if (!p) { rcu_read_unlock(); return; } init_wait(&wait); spin_lock_irq(&p->wait.lock); if (likely(!p->done)) { p->done = -1; spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); p->kill(p); return; } if (p->done > 0) { spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); return; } __add_wait_queue(&p->wait, &wait); while (1) { set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); schedule(); rcu_read_lock(); if (likely(list_empty(&wait.entry))) break; /* OK, we know p couldn't have been freed yet */ spin_lock_irq(&p->wait.lock); if (p->done > 0) { spin_unlock_irq(&p->wait.lock); break; } } rcu_read_unlock(); } void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; rcu_read_lock(); p = READ_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; } pin_kill(hlist_entry(p, struct fs_pin, m_list)); } } void group_pin_kill(struct hlist_head *p) { while (1) { struct hlist_node *q; rcu_read_lock(); q = READ_ONCE(p->first); if (!q) { rcu_read_unlock(); break; } pin_kill(hlist_entry(q, struct fs_pin, s_list)); } }
871 869 29 5365 4887 611 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_MMU_CONTEXT_H #define _ASM_X86_MMU_CONTEXT_H #include <asm/desc.h> #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/pkeys.h> #include <trace/events/tlb.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> #include <asm/mpx.h> extern atomic64_t last_mm_ctx_id; #ifndef CONFIG_PARAVIRT static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { } #endif /* !CONFIG_PARAVIRT */ #ifdef CONFIG_PERF_EVENTS DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); static inline void load_mm_cr4(struct mm_struct *mm) { if (static_branch_unlikely(&rdpmc_always_available_key) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else cr4_clear_bits(X86_CR4_PCE); } #else static inline void load_mm_cr4(struct mm_struct *mm) {} #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL /* * ldt_structs can be allocated, used, and freed, but they are never * modified while live. */ struct ldt_struct { /* * Xen requires page-aligned LDTs with special permissions. This is * needed to prevent us from installing evil descriptors such as * call gates. On native, we could merge the ldt_struct and LDT * allocations, but it's not worth trying to optimize. */ struct desc_struct *entries; unsigned int nr_entries; /* * If PTI is in use, then the entries array is not mapped while we're * in user mode. The whole array will be aliased at the addressed * given by ldt_slot_va(slot). We use two slots so that we can allocate * and map, and enable a new LDT without invalidating the mapping * of an older, still-in-use LDT. * * slot will be -1 if this LDT doesn't have an alias mapping. */ int slot; }; /* This is a multiple of PAGE_SIZE. */ #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) static inline void *ldt_slot_va(int slot) { return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); } /* * Used for LDT copy/destruction. */ static inline void init_new_context_ldt(struct mm_struct *mm) { mm->context.ldt = NULL; init_rwsem(&mm->context.ldt_usr_sem); } int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); void ldt_arch_exit_mmap(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ static inline void init_new_context_ldt(struct mm_struct *mm) { } static inline int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm) { return 0; } static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif static inline void load_mm_ldt(struct mm_struct *mm) { #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; /* READ_ONCE synchronizes with smp_store_release */ ldt = READ_ONCE(mm->context.ldt); /* * Any change to mm->context.ldt is followed by an IPI to all * CPUs with the mm active. The LDT will not be freed until * after the IPI is handled by all such CPUs. This means that, * if the ldt_struct changes before we return, the values we see * will be safe, and the new values will be loaded before we run * any user code. * * NB: don't try to convert this to use RCU without extreme care. * We would still need IRQs off, because we don't want to change * the local LDT after an IPI loaded a newer value than the one * that we can see. */ if (unlikely(ldt)) { if (static_cpu_has(X86_FEATURE_PTI)) { if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { /* * Whoops -- either the new LDT isn't mapped * (if slot == -1) or is mapped into a bogus * slot (if slot > 1). */ clear_LDT(); return; } /* * If page table isolation is enabled, ldt->entries * will not be mapped in the userspace pagetables. * Tell the CPU to access the LDT through the alias * at ldt_slot_va(ldt->slot). */ set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); } else { set_ldt(ldt->entries, ldt->nr_entries); } } else { clear_LDT(); } #else clear_LDT(); #endif } static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { #ifdef CONFIG_MODIFY_LDT_SYSCALL /* * Load the LDT if either the old or new mm had an LDT. * * An mm will never go from having an LDT to not having an LDT. Two * mms never share an LDT, so we don't gain anything by checking to * see whether the LDT changed. There's also no guarantee that * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, * then prev->context.ldt will also be non-NULL. * * If we really cared, we could optimize the case where prev == next * and we're exiting lazy mode. Most of the time, if this happens, * we don't actually need to reload LDTR, but modify_ldt() is mostly * used by legacy code and emulators where we don't need this level of * performance. * * This uses | instead of || because it generates better code. */ if (unlikely((unsigned long)prev->context.ldt | (unsigned long)next->context.ldt)) load_mm_ldt(next); #endif DEBUG_LOCKS_WARN_ON(preemptible()); } void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); /* * Init a new mm. Used on mm copies, like at fork() * and on mm's that are brand-new, like at execve(). */ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { mutex_init(&mm->context.lock); mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } #endif init_new_context_ldt(mm); return 0; } static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); #define switch_mm_irqs_off switch_mm_irqs_off #define activate_mm(prev, next) \ do { \ paravirt_activate_mm((prev), (next)); \ switch_mm((prev), (next), NULL); \ } while (0); #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ lazy_load_gs(0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ do { \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) #endif static inline void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; /* Duplicate the oldmm pkey state in mm: */ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; #endif } static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { arch_dup_pkeys(oldmm, mm); paravirt_arch_dup_mmap(oldmm, mm); return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) { paravirt_arch_exit_mmap(mm); ldt_arch_exit_mmap(mm); } #ifdef CONFIG_X86_64 static inline bool is_64bit_mm(struct mm_struct *mm) { return !IS_ENABLED(CONFIG_IA32_EMULATION) || !(mm->context.ia32_compat == TIF_IA32); } #else static inline bool is_64bit_mm(struct mm_struct *mm) { return false; } #endif static inline void arch_bprm_mm_init(struct mm_struct *mm, struct vm_area_struct *vma) { mpx_mm_init(mm); } static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long end) { /* * mpx_notify_unmap() goes and reads a rarely-hot * cacheline in the mm_struct. That can be expensive * enough to be seen in profiles. * * The mpx_notify_unmap() call and its contents have been * observed to affect munmap() performance on hardware * where MPX is not present. * * The unlikely() optimizes for the fast case: no MPX * in the CPU, or no MPX use in the process. Even if * we get this wrong (in the unlikely event that MPX * is widely enabled on some system) the overhead of * MPX itself (reading bounds tables) is expected to * overwhelm the overhead of getting this unlikely() * consistently wrong. */ if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) mpx_notify_unmap(mm, vma, start, end); } /* * We only want to enforce protection keys on the current process * because we effectively have no access to PKRU for other * processes or any way to tell *which * PKRU in a threaded * process we could use. * * So do not enforce things if the VMA is not from the current * mm, or if we are in a kernel thread. */ static inline bool vma_is_foreign(struct vm_area_struct *vma) { if (!current->mm) return true; /* * Should PKRU be enforced on the access to this VMA? If * the VMA is from another process, then PKRU has no * relevance and should not be enforced. */ if (current->mm != vma->vm_mm) return true; return false; } static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { /* pkeys never affect instruction fetches */ if (execute) return true; /* allow access if the VMA is not one from this process */ if (foreign || vma_is_foreign(vma)) return true; return __pkru_allows_pkey(vma_pkey(vma), write); } /* * This can be used from process context to figure out what the value of * CR3 is without needing to do a (slow) __read_cr3(). * * It's intended to be used for code like KVM that sneakily changes CR3 * and needs to restore it. It needs to be used very carefully. */ static inline unsigned long __get_current_cr3_fast(void) { unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ VM_WARN_ON(in_nmi() || preemptible()); VM_BUG_ON(cr3 != __read_cr3()); return cr3; } #endif /* _ASM_X86_MMU_CONTEXT_H */
2 2 2 2 2 2 2 2 2 2 4 4 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 /* * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code * * Copyright (C) 2015 Martin Willi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ #include <crypto/algapi.h> #include <crypto/chacha20.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> #include <asm/fpu/api.h> #include <asm/simd.h> #define CHACHA20_STATE_ALIGN 16 asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); #ifdef CONFIG_AS_AVX2 asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src); static bool chacha20_use_avx2; #endif static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { u8 buf[CHACHA20_BLOCK_SIZE]; #ifdef CONFIG_AS_AVX2 if (chacha20_use_avx2) { while (bytes >= CHACHA20_BLOCK_SIZE * 8) { chacha20_8block_xor_avx2(state, dst, src); bytes -= CHACHA20_BLOCK_SIZE * 8; src += CHACHA20_BLOCK_SIZE * 8; dst += CHACHA20_BLOCK_SIZE * 8; state[12] += 8; } } #endif while (bytes >= CHACHA20_BLOCK_SIZE * 4) { chacha20_4block_xor_ssse3(state, dst, src); bytes -= CHACHA20_BLOCK_SIZE * 4; src += CHACHA20_BLOCK_SIZE * 4; dst += CHACHA20_BLOCK_SIZE * 4; state[12] += 4; } while (bytes >= CHACHA20_BLOCK_SIZE) { chacha20_block_xor_ssse3(state, dst, src); bytes -= CHACHA20_BLOCK_SIZE; src += CHACHA20_BLOCK_SIZE; dst += CHACHA20_BLOCK_SIZE; state[12]++; } if (bytes) { memcpy(buf, src, bytes); chacha20_block_xor_ssse3(state, buf, buf); memcpy(dst, buf, bytes); } } static int chacha20_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); u32 *state, state_buf[16 + 2] __aligned(8); struct skcipher_walk walk; int err; BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) return crypto_chacha20_crypt(req); err = skcipher_walk_virt(&walk, req, true); crypto_chacha20_init(state, ctx, walk.iv); kernel_fpu_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); err = skcipher_walk_done(&walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); return err; } static struct skcipher_alg alg = { .base.cra_name = "chacha20", .base.cra_driver_name = "chacha20-simd", .base.cra_priority = 300, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha20_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CHACHA20_KEY_SIZE, .max_keysize = CHACHA20_KEY_SIZE, .ivsize = CHACHA20_IV_SIZE, .chunksize = CHACHA20_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, .encrypt = chacha20_simd, .decrypt = chacha20_simd, }; static int __init chacha20_simd_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_SSSE3)) return -ENODEV; #ifdef CONFIG_AS_AVX2 chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif return crypto_register_skcipher(&alg); } static void __exit chacha20_simd_mod_fini(void) { crypto_unregister_skcipher(&alg); } module_init(chacha20_simd_mod_init); module_exit(chacha20_simd_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated"); MODULE_ALIAS_CRYPTO("chacha20"); MODULE_ALIAS_CRYPTO("chacha20-simd");
8 29 3 28 33 35 31 31 31 9 19 1 4 2 1 1 2 4 2 2 3 2 3 14 3 3 1 2 3 1 1 1 3 25 37 37 36 31 25 7 6 1 20 21 38 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 /* * Copyright 2008 Red Hat, Inc. All rights reserved. * Copyright 2008 Ian Kent <raven@themaw.net> * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your * option, any later version, incorporated herein by reference. */ #include <linux/miscdevice.h> #include <linux/compat.h> #include <linux/syscalls.h> #include <linux/magic.h> #include "autofs_i.h" /* * This module implements an interface for routing autofs ioctl control * commands via a miscellaneous device file. * * The alternate interface is needed because we need to be able open * an ioctl file descriptor on an autofs mount that may be covered by * another mount. This situation arises when starting automount(8) * or other user space daemon which uses direct mounts or offset * mounts (used for autofs lazy mount/umount of nested mount trees), * which have been left busy at at service shutdown. */ typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, struct autofs_dev_ioctl *); static int check_name(const char *name) { if (!strchr(name, '/')) return -EINVAL; return 0; } /* * Check a string doesn't overrun the chunk of * memory we copied from user land. */ static int invalid_str(char *str, size_t size) { if (memchr(str, 0, size)) return 0; return -EINVAL; } /* * Check that the user compiled against correct version of autofs * misc device code. * * As well as checking the version compatibility this always copies * the kernel interface version out. */ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) { int err = 0; if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) || (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) { pr_warn("ioctl control interface version mismatch: " "kernel(%u.%u), user(%u.%u), cmd(0x%08x)\n", AUTOFS_DEV_IOCTL_VERSION_MAJOR, AUTOFS_DEV_IOCTL_VERSION_MINOR, param->ver_major, param->ver_minor, cmd); err = -EINVAL; } /* Fill in the kernel version. */ param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; return err; } /* * Copy parameter control struct, including a possible path allocated * at the end of the struct. */ static struct autofs_dev_ioctl * copy_dev_ioctl(struct autofs_dev_ioctl __user *in) { struct autofs_dev_ioctl tmp, *res; if (copy_from_user(&tmp, in, AUTOFS_DEV_IOCTL_SIZE)) return ERR_PTR(-EFAULT); if (tmp.size < AUTOFS_DEV_IOCTL_SIZE) return ERR_PTR(-EINVAL); if (tmp.size > AUTOFS_DEV_IOCTL_SIZE + PATH_MAX) return ERR_PTR(-ENAMETOOLONG); res = memdup_user(in, tmp.size); if (!IS_ERR(res)) res->size = tmp.size; return res; } static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) { kfree(param); } /* * Check sanity of parameter control fields and if a path is present * check that it is terminated and contains at least one "/". */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { int err; err = check_dev_ioctl_version(cmd, param); if (err) { pr_warn("invalid device control module version " "supplied for cmd(0x%08x)\n", cmd); goto out; } if (param->size > AUTOFS_DEV_IOCTL_SIZE) { err = invalid_str(param->path, param->size - AUTOFS_DEV_IOCTL_SIZE); if (err) { pr_warn( "path string terminator missing for cmd(0x%08x)\n", cmd); goto out; } err = check_name(param->path); if (err) { pr_warn("invalid path supplied for cmd(0x%08x)\n", cmd); goto out; } } else { unsigned int inr = _IOC_NR(cmd); if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD || inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD || inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) { err = -EINVAL; goto out; } } err = 0; out: return err; } /* * Get the autofs super block info struct from the file opened on * the autofs mount point. */ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) { struct autofs_sb_info *sbi = NULL; struct inode *inode; if (f) { inode = file_inode(f); sbi = autofs_sbi(inode->i_sb); } return sbi; } /* Return autofs dev ioctl version */ static int autofs_dev_ioctl_version(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { /* This should have already been set. */ param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; return 0; } /* Return autofs module protocol version */ static int autofs_dev_ioctl_protover(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { param->protover.version = sbi->version; return 0; } /* Return autofs module protocol sub version */ static int autofs_dev_ioctl_protosubver(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { param->protosubver.sub_version = sbi->sub_version; return 0; } /* Find the topmost mount satisfying test() */ static int find_autofs_mount(const char *pathname, struct path *res, int test(const struct path *path, void *data), void *data) { struct path path; int err; err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); if (err) return err; err = -ENOENT; while (path.dentry == path.mnt->mnt_root) { if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) { if (test(&path, data)) { path_get(&path); *res = path; err = 0; break; } } if (!follow_up(&path)) break; } path_put(&path); return err; } static int test_by_dev(const struct path *path, void *p) { return path->dentry->d_sb->s_dev == *(dev_t *)p; } static int test_by_type(const struct path *path, void *p) { struct autofs_info *ino = autofs_dentry_ino(path->dentry); return ino && ino->sbi->type & *(unsigned *)p; } /* * Open a file descriptor on the autofs mount point corresponding * to the given path and device number (aka. new_encode_dev(sb->s_dev)). */ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) { int err, fd; fd = get_unused_fd_flags(O_CLOEXEC); if (likely(fd >= 0)) { struct file *filp; struct path path; err = find_autofs_mount(name, &path, test_by_dev, &devid); if (err) goto out; filp = dentry_open(&path, O_RDONLY, current_cred()); path_put(&path); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto out; } fd_install(fd, filp); } return fd; out: put_unused_fd(fd); return err; } /* Open a file descriptor on an autofs mount point */ static int autofs_dev_ioctl_openmount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { const char *path; dev_t devid; int err, fd; /* param->path has been checked in validate_dev_ioctl() */ if (!param->openmount.devid) return -EINVAL; param->ioctlfd = -1; path = param->path; devid = new_decode_dev(param->openmount.devid); err = 0; fd = autofs_dev_ioctl_open_mountpoint(path, devid); if (unlikely(fd < 0)) { err = fd; goto out; } param->ioctlfd = fd; out: return err; } /* Close file descriptor allocated above (user can also use close(2)). */ static int autofs_dev_ioctl_closemount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { return ksys_close(param->ioctlfd); } /* * Send "ready" status for an existing wait (either a mount or an expire * request). */ static int autofs_dev_ioctl_ready(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { autofs_wqt_t token; token = (autofs_wqt_t) param->ready.token; return autofs_wait_release(sbi, token, 0); } /* * Send "fail" status for an existing wait (either a mount or an expire * request). */ static int autofs_dev_ioctl_fail(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { autofs_wqt_t token; int status; token = (autofs_wqt_t) param->fail.token; status = param->fail.status < 0 ? param->fail.status : -ENOENT; return autofs_wait_release(sbi, token, status); } /* * Set the pipe fd for kernel communication to the daemon. * * Normally this is set at mount using an option but if we * are reconnecting to a busy mount then we need to use this * to tell the autofs mount about the new kernel pipe fd. In * order to protect mounts against incorrectly setting the * pipefd we also require that the autofs mount be catatonic. * * This also sets the process group id used to identify the * controlling process (eg. the owning automount(8) daemon). */ static int autofs_dev_ioctl_setpipefd(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { int pipefd; int err = 0; struct pid *new_pid = NULL; if (param->setpipefd.pipefd == -1) return -EINVAL; pipefd = param->setpipefd.pipefd; mutex_lock(&sbi->wq_mutex); if (!sbi->catatonic) { mutex_unlock(&sbi->wq_mutex); return -EBUSY; } else { struct file *pipe; new_pid = get_task_pid(current, PIDTYPE_PGID); if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { pr_warn("not allowed to change PID namespace\n"); err = -EINVAL; goto out; } pipe = fget(pipefd); if (!pipe) { err = -EBADF; goto out; } if (autofs_prepare_pipe(pipe) < 0) { err = -EPIPE; fput(pipe); goto out; } swap(sbi->oz_pgrp, new_pid); sbi->pipefd = pipefd; sbi->pipe = pipe; sbi->catatonic = 0; } out: put_pid(new_pid); mutex_unlock(&sbi->wq_mutex); return err; } /* * Make the autofs mount point catatonic, no longer responsive to * mount requests. Also closes the kernel pipe file descriptor. */ static int autofs_dev_ioctl_catatonic(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { autofs_catatonic_mode(sbi); return 0; } /* Set the autofs mount timeout */ static int autofs_dev_ioctl_timeout(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { unsigned long timeout; timeout = param->timeout.timeout; param->timeout.timeout = sbi->exp_timeout / HZ; sbi->exp_timeout = timeout * HZ; return 0; } /* * Return the uid and gid of the last request for the mount * * When reconstructing an autofs mount tree with active mounts * we need to re-connect to mounts that may have used the original * process uid and gid (or string variations of them) for mount * lookups within the map entry. */ static int autofs_dev_ioctl_requester(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { struct autofs_info *ino; struct path path; dev_t devid; int err = -ENOENT; /* param->path has been checked in validate_dev_ioctl() */ devid = sbi->sb->s_dev; param->requester.uid = param->requester.gid = -1; err = find_autofs_mount(param->path, &path, test_by_dev, &devid); if (err) goto out; ino = autofs_dentry_ino(path.dentry); if (ino) { err = 0; autofs_expire_wait(&path, 0); spin_lock(&sbi->fs_lock); param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid); param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid); spin_unlock(&sbi->fs_lock); } path_put(&path); out: return err; } /* * Call repeatedly until it returns -EAGAIN, meaning there's nothing * more that can be done. */ static int autofs_dev_ioctl_expire(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { struct vfsmount *mnt; int how; how = param->expire.how; mnt = fp->f_path.mnt; return autofs_do_expire_multi(sbi->sb, mnt, sbi, how); } /* Check if autofs mount point is in use */ static int autofs_dev_ioctl_askumount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { param->askumount.may_umount = 0; if (may_umount(fp->f_path.mnt)) param->askumount.may_umount = 1; return 0; } /* * Check if the given path is a mountpoint. * * If we are supplied with the file descriptor of an autofs * mount we're looking for a specific mount. In this case * the path is considered a mountpoint if it is itself a * mountpoint or contains a mount, such as a multi-mount * without a root mount. In this case we return 1 if the * path is a mount point and the super magic of the covering * mount if there is one or 0 if it isn't a mountpoint. * * If we aren't supplied with a file descriptor then we * lookup the path and check if it is the root of a mount. * If a type is given we are looking for a particular autofs * mount and if we don't find a match we return fail. If the * located path is the root of a mount we return 1 along with * the super magic of the mount or 0 otherwise. * * In both cases the the device number (as returned by * new_encode_dev()) is also returned. */ static int autofs_dev_ioctl_ismountpoint(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { struct path path; const char *name; unsigned int type; unsigned int devid, magic; int err = -ENOENT; /* param->path has been checked in validate_dev_ioctl() */ name = param->path; type = param->ismountpoint.in.type; param->ismountpoint.out.devid = devid = 0; param->ismountpoint.out.magic = magic = 0; if (!fp || param->ioctlfd == -1) { if (autofs_type_any(type)) err = kern_path_mountpoint(AT_FDCWD, name, &path, LOOKUP_FOLLOW); else err = find_autofs_mount(name, &path, test_by_type, &type); if (err) goto out; devid = new_encode_dev(path.dentry->d_sb->s_dev); err = 0; if (path.mnt->mnt_root == path.dentry) { err = 1; magic = path.dentry->d_sb->s_magic; } } else { dev_t dev = sbi->sb->s_dev; err = find_autofs_mount(name, &path, test_by_dev, &dev); if (err) goto out; devid = new_encode_dev(dev); err = path_has_submounts(&path); if (follow_down_one(&path)) magic = path.dentry->d_sb->s_magic; } param->ismountpoint.out.devid = devid; param->ismountpoint.out.magic = magic; path_put(&path); out: return err; } /* * Our range of ioctl numbers isn't 0 based so we need to shift * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table * lookup. */ #define cmd_idx(cmd) (cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST)) static ioctl_fn lookup_dev_ioctl(unsigned int cmd) { static ioctl_fn _ioctls[] = { autofs_dev_ioctl_version, autofs_dev_ioctl_protover, autofs_dev_ioctl_protosubver, autofs_dev_ioctl_openmount, autofs_dev_ioctl_closemount, autofs_dev_ioctl_ready, autofs_dev_ioctl_fail, autofs_dev_ioctl_setpipefd, autofs_dev_ioctl_catatonic, autofs_dev_ioctl_timeout, autofs_dev_ioctl_requester, autofs_dev_ioctl_expire, autofs_dev_ioctl_askumount, autofs_dev_ioctl_ismountpoint, }; unsigned int idx = cmd_idx(cmd); return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx]; } /* ioctl dispatcher */ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user) { struct autofs_dev_ioctl *param; struct file *fp; struct autofs_sb_info *sbi; unsigned int cmd_first, cmd; ioctl_fn fn = NULL; int err = 0; cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST); cmd = _IOC_NR(command); if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) || cmd - cmd_first > AUTOFS_DEV_IOCTL_IOC_COUNT) { return -ENOTTY; } /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD * and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD */ if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD && !capable(CAP_SYS_ADMIN)) return -EPERM; /* Copy the parameters into kernel space. */ param = copy_dev_ioctl(user); if (IS_ERR(param)) return PTR_ERR(param); err = validate_dev_ioctl(command, param); if (err) goto out; fn = lookup_dev_ioctl(cmd); if (!fn) { pr_warn("unknown command 0x%08x\n", command); err = -ENOTTY; goto out; } fp = NULL; sbi = NULL; /* * For obvious reasons the openmount can't have a file * descriptor yet. We don't take a reference to the * file during close to allow for immediate release, * and the same for retrieving ioctl version. */ if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { fp = fget(param->ioctlfd); if (!fp) { if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) goto cont; err = -EBADF; goto out; } sbi = autofs_dev_ioctl_sbi(fp); if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { err = -EINVAL; fput(fp); goto out; } /* * Admin needs to be able to set the mount catatonic in * order to be able to perform the re-open. */ if (!autofs_oz_mode(sbi) && cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { err = -EACCES; fput(fp); goto out; } } cont: err = fn(fp, sbi, param); if (fp) fput(fp); if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE)) err = -EFAULT; out: free_dev_ioctl(param); return err; } static long autofs_dev_ioctl(struct file *file, unsigned int command, unsigned long u) { int err; err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); return (long) err; } #ifdef CONFIG_COMPAT static long autofs_dev_ioctl_compat(struct file *file, unsigned int command, unsigned long u) { return autofs_dev_ioctl(file, command, (unsigned long) compat_ptr(u)); } #else #define autofs_dev_ioctl_compat NULL #endif static const struct file_operations _dev_ioctl_fops = { .unlocked_ioctl = autofs_dev_ioctl, .compat_ioctl = autofs_dev_ioctl_compat, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice _autofs_dev_ioctl_misc = { .minor = AUTOFS_MINOR, .name = AUTOFS_DEVICE_NAME, .fops = &_dev_ioctl_fops, .mode = 0644, }; MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); MODULE_ALIAS("devname:autofs"); /* Register/deregister misc character device */ int __init autofs_dev_ioctl_init(void) { int r; r = misc_register(&_autofs_dev_ioctl_misc); if (r) { pr_err("misc_register failed for control device\n"); return r; } return 0; } void autofs_dev_ioctl_exit(void) { misc_deregister(&_autofs_dev_ioctl_misc); }
5408 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TASK_WORK_H #define _LINUX_TASK_WORK_H #include <linux/list.h> #include <linux/sched.h> typedef void (*task_work_func_t)(struct callback_head *); static inline void init_task_work(struct callback_head *twork, task_work_func_t func) { twork->func = func; } int task_work_add(struct task_struct *task, struct callback_head *twork, bool); struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); static inline void exit_task_work(struct task_struct *task) { task_work_run(); } #endif /* _LINUX_TASK_WORK_H */
9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_HWEIGHT_H #define _ASM_X86_HWEIGHT_H #include <asm/cpufeatures.h> #ifdef CONFIG_64BIT /* popcnt %edi, %eax */ #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7" /* popcnt %rdi, %rax */ #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" #define REG_OUT "a" #else /* popcnt %eax, %eax */ #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" #define REG_IN "a" #define REG_OUT "a" #endif #define __HAVE_ARCH_SW_HWEIGHT static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); return res; } static inline unsigned int __arch_hweight16(unsigned int w) { return __arch_hweight32(w & 0xffff); } static inline unsigned int __arch_hweight8(unsigned int w) { return __arch_hweight32(w & 0xff); } #ifdef CONFIG_X86_32 static inline unsigned long __arch_hweight64(__u64 w) { return __arch_hweight32((u32)w) + __arch_hweight32((u32)(w >> 32)); } #else static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); return res; } #endif /* CONFIG_X86_32 */ #endif
28 100 7 213 3295 1409 29 884 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NODEMASK_H #define __LINUX_NODEMASK_H /* * Nodemasks provide a bitmap suitable for representing the * set of Node's in a system, one bit position per Node number. * * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * * For details of nodemask_parse_user(), see bitmap_parse_user() in * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in * lib/bitmap.c. * * The available nodemask operations are: * * void node_set(node, mask) turn on bit 'node' in mask * void node_clear(node, mask) turn off bit 'node' in mask * void nodes_setall(mask) set all bits * void nodes_clear(mask) clear all bits * int node_isset(node, mask) true iff bit 'node' set in mask * int node_test_and_set(node, mask) test and set bit 'node' in mask * * void nodes_and(dst, src1, src2) dst = src1 & src2 [intersection] * void nodes_or(dst, src1, src2) dst = src1 | src2 [union] * void nodes_xor(dst, src1, src2) dst = src1 ^ src2 * void nodes_andnot(dst, src1, src2) dst = src1 & ~src2 * void nodes_complement(dst, src) dst = ~src * * int nodes_equal(mask1, mask2) Does mask1 == mask2? * int nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect? * int nodes_subset(mask1, mask2) Is mask1 a subset of mask2? * int nodes_empty(mask) Is mask empty (no bits sets)? * int nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * * void nodes_shift_right(dst, src, n) Shift right * void nodes_shift_left(dst, src, n) Shift left * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES * unsigned int first_unset_node(mask) First node not set in mask, or * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set * NODE_MASK_ALL Initializer - all bits set * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) * void nodes_onto(dst, orig, relmap) *dst = orig relative to relmap * void nodes_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_node_mask(node, mask) for-loop node over mask * * int num_online_nodes() Number of online Nodes * int num_possible_nodes() Number of all possible Nodes * * int node_random(mask) Random node with set bit in mask * * int node_online(node) Is some node online? * int node_possible(node) Is some node possible? * * node_set_online(node) set bit 'node' in node_online_map * node_set_offline(node) clear bit 'node' in node_online_map * * for_each_node(node) for-loop node over node_possible_map * for_each_online_node(node) for-loop node over node_online_map * * Subtlety: * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway) * to generate slightly worse code. So use a simple one-line #define * for node_isset(), instead of wrapping an inline inside a macro, the * way we do the other calls. * * NODEMASK_SCRATCH * When doing above logical AND, OR, XOR, Remap operations the callers tend to * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large, * nodemask_t's consume too much stack space. NODEMASK_SCRATCH is a helper * for such situations. See below and CPUMASK_ALLOC also. */ #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/numa.h> typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; /** * nodemask_pr_args - printf args to output a nodemask * @maskp: nodemask to be printed * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions * are called in both __init and non-__init functions, if they are not * inlined we will end up with a section mis-match error (of the type of * freeable items not being freed). So we must use __always_inline here * to fix the problem. If other functions in the future also end up in * this situation they will also need to be annotated as __always_inline */ #define node_set(node, dst) __node_set((node), &(dst)) static __always_inline void __node_set(int node, volatile nodemask_t *dstp) { set_bit(node, dstp->bits); } #define node_clear(node, dst) __node_clear((node), &(dst)) static inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } /* No static inline type checking - see Subtlety (1) above. */ #define node_isset(node, nodemask) test_bit((node), (nodemask).bits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) static inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); } #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) static inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); } #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) static inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); } #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) static inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } #define nodes_shift_right(dst, src, n) \ __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES) static inline void __nodes_shift_right(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); } #define nodes_shift_left(dst, src, n) \ __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES) static inline void __nodes_shift_left(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } /* FIXME: better would be to fix all architectures to never return > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) static inline unsigned int __first_node(const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) static inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } /* * Find the next present node in src, starting after node n, wrapping around to * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) unsigned int __next_node_in(int node, const nodemask_t *srcp); static inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); } #define nodemask_of_node(node) \ ({ \ typeof(_unused_nodemask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL << (node); \ } else { \ init_nodemask_of_node(&m, (node)); \ } \ m; \ }) #define first_unset_node(mask) __first_unset_node(&(mask)) static inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); } #define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) #if MAX_NUMNODES <= BITS_PER_LONG #define NODE_MASK_ALL \ ((nodemask_t) { { \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #else #define NODE_MASK_ALL \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL, \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #endif #define NODE_MASK_NONE \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] = 0UL \ } }) #define nodes_addr(src) ((src).bits) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) static inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); } #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); } #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); } #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); } #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \ (node >= 0) && (node) < MAX_NUMNODES; \ (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ #define for_each_node_mask(node, mask) \ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) #endif /* MAX_NUMNODES */ /* * Bitmasks that are kept for all the nodes. */ enum node_states { N_POSSIBLE, /* The node could become online at some point */ N_ONLINE, /* The node is online */ N_NORMAL_MEMORY, /* The node has regular memory */ #ifdef CONFIG_HIGHMEM N_HIGH_MEMORY, /* The node has regular or high memory */ #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif N_MEMORY, /* The node has memory(regular, high, movable) */ N_CPU, /* The node has one or more cpus */ NR_NODE_STATES }; /* * The following particular system nodemasks and operations * on them manage all possible and online nodes. */ extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 static inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } static inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } static inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } static inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } #define for_each_node_state(__node, __state) \ for_each_node_mask((__node), node_states[__state]) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) static inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } static inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } extern int nr_node_ids; extern int nr_online_nodes; static inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } static inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } #else static inline int node_state(int node, enum node_states state) { return node == 0; } static inline void node_set_state(int node, enum node_states state) { } static inline void node_clear_state(int node, enum node_states state) { } static inline int num_node_state(enum node_states state) { return 1; } #define for_each_node_state(node, __state) \ for ( (node) = 0; (node) == 0; (node) = 1) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) #define nr_node_ids 1 #define nr_online_nodes 1 #define node_set_online(node) node_set_state((node), N_ONLINE) #define node_set_offline(node) node_clear_state((node), N_ONLINE) #endif #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) extern int node_random(const nodemask_t *maskp); #else static inline int node_random(const nodemask_t *mask) { return 0; } #endif #define node_online_map node_states[N_ONLINE] #define node_possible_map node_states[N_POSSIBLE] #define num_online_nodes() num_node_state(N_ONLINE) #define num_possible_nodes() num_node_state(N_POSSIBLE) #define node_online(node) node_state((node), N_ONLINE) #define node_possible(node) node_state((node), N_POSSIBLE) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) /* * For nodemask scrach area. * NODEMASK_ALLOC(type, name) allocates an object with a specified type and * name. */ #if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */ #define NODEMASK_ALLOC(type, name, gfp_flags) \ type *name = kmalloc(sizeof(*name), gfp_flags) #define NODEMASK_FREE(m) kfree(m) #else #define NODEMASK_ALLOC(type, name, gfp_flags) type _##name, *name = &_##name #define NODEMASK_FREE(m) do {} while (0) #endif /* A example struture for using NODEMASK_ALLOC, used in mempolicy. */ struct nodemask_scratch { nodemask_t mask1; nodemask_t mask2; }; #define NODEMASK_SCRATCH(x) \ NODEMASK_ALLOC(struct nodemask_scratch, x, \ GFP_KERNEL | __GFP_NORETRY) #define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x) #endif /* __LINUX_NODEMASK_H */
2 370 1364 1321 499 595 3565 115 641 1 114 636 681 623 589 642 2 270 496 284 55 10 585 80 267 61 968 76 1765 1753 427 582 555 157 170 1477 713 321 425 284 551 560 65 112 112 44 40 711 646 1 160 51 49 69 102 102 1 607 590 66 21 2 2060 279 2059 279 203 3640 1 55 56 242 35 441 777 280 549 35 556 542 2316 785 1186 1790 1790 3387 3387 46 26 26 33 41 26 69 69 42 78 78 42 80 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ext4 #if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EXT4_H #include <linux/writeback.h> #include <linux/tracepoint.h> struct ext4_allocation_context; struct ext4_allocation_request; struct ext4_extent; struct ext4_prealloc_space; struct ext4_inode_info; struct mpage_da_data; struct ext4_map_blocks; struct extent_status; struct ext4_fsmap; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define show_mballoc_flags(flags) __print_flags(flags, "|", \ { EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \ { EXT4_MB_HINT_RESERVED, "HINT_RESV" }, \ { EXT4_MB_HINT_METADATA, "HINT_MDATA" }, \ { EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \ { EXT4_MB_HINT_BEST, "HINT_BEST" }, \ { EXT4_MB_HINT_DATA, "HINT_DATA" }, \ { EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \ { EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \ { EXT4_MB_HINT_GOAL_ONLY, "HINT_GOAL_ONLY" }, \ { EXT4_MB_HINT_TRY_GOAL, "HINT_TRY_GOAL" }, \ { EXT4_MB_DELALLOC_RESERVED, "DELALLOC_RESV" }, \ { EXT4_MB_STREAM_ALLOC, "STREAM_ALLOC" }, \ { EXT4_MB_USE_ROOT_BLOCKS, "USE_ROOT_BLKS" }, \ { EXT4_MB_USE_RESERVED, "USE_RESV" }) #define show_map_flags(flags) __print_flags(flags, "|", \ { EXT4_GET_BLOCKS_CREATE, "CREATE" }, \ { EXT4_GET_BLOCKS_UNWRIT_EXT, "UNWRIT" }, \ { EXT4_GET_BLOCKS_DELALLOC_RESERVE, "DELALLOC" }, \ { EXT4_GET_BLOCKS_PRE_IO, "PRE_IO" }, \ { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ { EXT4_GET_BLOCKS_ZERO, "ZERO" }) #define show_mflags(flags) __print_flags(flags, "", \ { EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_MAPPED, "M" }, \ { EXT4_MAP_UNWRITTEN, "U" }, \ { EXT4_MAP_BOUNDARY, "B" }) #define show_free_flags(flags) __print_flags(flags, "|", \ { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ { EXT4_FREE_BLOCKS_FORGET, "FORGET" }, \ { EXT4_FREE_BLOCKS_VALIDATED, "VALIDATED" }, \ { EXT4_FREE_BLOCKS_NO_QUOT_UPDATE, "NO_QUOTA" }, \ { EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\ { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) #define show_extent_status(status) __print_flags(status, "", \ { EXTENT_STATUS_WRITTEN, "W" }, \ { EXTENT_STATUS_UNWRITTEN, "U" }, \ { EXTENT_STATUS_DELAYED, "D" }, \ { EXTENT_STATUS_HOLE, "H" }) #define show_falloc_mode(mode) __print_flags(mode, "|", \ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), TP_ARGS(inode, orig_ino), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, orig_ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u16, mode ) ), TP_fast_assign( __entry->orig_ino = orig_ino; __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->orig_ino, (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid) ); TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u64, blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->uid = i_uid_read(inode); __entry->gid = i_gid_read(inode); __entry->blocks = inode->i_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid, __entry->blocks) ); TRACE_EVENT(ext4_request_inode, TP_PROTO(struct inode *dir, int mode), TP_ARGS(dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_allocate_inode, TP_PROTO(struct inode *inode, struct inode *dir, int mode), TP_ARGS(inode, dir, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, dir ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->dir = dir->i_ino; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_evict_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, nlink ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->nlink = inode->i_nlink; ), TP_printk("dev %d,%d ino %lu nlink %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nlink) ); TRACE_EVENT(ext4_drop_inode, TP_PROTO(struct inode *inode, int drop), TP_ARGS(inode, drop), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, drop ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->drop = drop; ), TP_printk("dev %d,%d ino %lu drop %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->drop) ); TRACE_EVENT(ext4_nfs_commit_metadata, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; ), TP_printk("dev %d,%d ino %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mark_inode_dirty, TP_PROTO(struct inode *inode, unsigned long IP), TP_ARGS(inode, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field(unsigned long, ip ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ip = IP; ), TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); TRACE_EVENT(ext4_begin_ordered_truncate, TP_PROTO(struct inode *inode, loff_t new_size), TP_ARGS(inode, new_size), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, new_size ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->new_size = new_size; ), TP_printk("dev %d,%d ino %lu new_size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->new_size) ); DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->flags = flags; ), TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->flags) ); DEFINE_EVENT(ext4__write_begin, ext4_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags) ); DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), TP_ARGS(inode, pos, len, flags) ); DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) __field( unsigned int, copied ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->copied = copied; ), TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->copied) ); DEFINE_EVENT(ext4__write_end, ext4_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( long, nr_to_write ) __field( long, pages_skipped ) __field( loff_t, range_start ) __field( loff_t, range_end ) __field( pgoff_t, writeback_index ) __field( int, sync_mode ) __field( char, for_kupdate ) __field( char, range_cyclic ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->range_start = wbc->range_start; __entry->range_end = wbc->range_end; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->sync_mode = wbc->sync_mode; __entry->for_kupdate = wbc->for_kupdate; __entry->range_cyclic = wbc->range_cyclic; ), TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " "range_start %lld range_end %lld sync_mode %d " "for_kupdate %d range_cyclic %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->sync_mode, __entry->for_kupdate, __entry->range_cyclic, (unsigned long) __entry->writeback_index) ); TRACE_EVENT(ext4_da_write_pages, TP_PROTO(struct inode *inode, pgoff_t first_page, struct writeback_control *wbc), TP_ARGS(inode, first_page, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, first_page ) __field( long, nr_to_write ) __field( int, sync_mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->first_page = first_page; __entry->nr_to_write = wbc->nr_to_write; __entry->sync_mode = wbc->sync_mode; ), TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld " "sync_mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->first_page, __entry->nr_to_write, __entry->sync_mode) ); TRACE_EVENT(ext4_da_write_pages_extent, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map), TP_ARGS(inode, map), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, lblk ) __field( __u32, len ) __field( __u32, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = map->m_lblk; __entry->len = map->m_len; __entry->flags = map->m_flags; ), TP_printk("dev %d,%d ino %lu lblk %llu len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, show_mflags(__entry->flags)) ); TRACE_EVENT(ext4_writepages_result, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int ret, int pages_written), TP_ARGS(inode, wbc, ret, pages_written), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) __field( int, pages_written ) __field( long, pages_skipped ) __field( pgoff_t, writeback_index ) __field( int, sync_mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ret = ret; __entry->pages_written = pages_written; __entry->pages_skipped = wbc->pages_skipped; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->sync_mode = wbc->sync_mode; ), TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld " "sync_mode %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, __entry->sync_mode, (unsigned long) __entry->writeback_index) ); DECLARE_EVENT_CLASS(ext4__page_op, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, index ) ), TP_fast_assign( __entry->dev = page->mapping->host->i_sb->s_dev; __entry->ino = page->mapping->host->i_ino; __entry->index = page->index; ), TP_printk("dev %d,%d ino %lu page_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->index) ); DEFINE_EVENT(ext4__page_op, ext4_writepage, TP_PROTO(struct page *page), TP_ARGS(page) ); DEFINE_EVENT(ext4__page_op, ext4_readpage, TP_PROTO(struct page *page), TP_ARGS(page) ); DEFINE_EVENT(ext4__page_op, ext4_releasepage, TP_PROTO(struct page *page), TP_ARGS(page) ); DECLARE_EVENT_CLASS(ext4_invalidatepage_op, TP_PROTO(struct page *page, unsigned int offset, unsigned int length), TP_ARGS(page, offset, length), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( pgoff_t, index ) __field( unsigned int, offset ) __field( unsigned int, length ) ), TP_fast_assign( __entry->dev = page->mapping->host->i_sb->s_dev; __entry->ino = page->mapping->host->i_ino; __entry->index = page->index; __entry->offset = offset; __entry->length = length; ), TP_printk("dev %d,%d ino %lu page_index %lu offset %u length %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->index, __entry->offset, __entry->length) ); DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage, TP_PROTO(struct page *page, unsigned int offset, unsigned int length), TP_ARGS(page, offset, length) ); DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage, TP_PROTO(struct page *page, unsigned int offset, unsigned int length), TP_ARGS(page, offset, length) ); TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), TP_ARGS(sb, blk, count), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u64, blk ) __field( __u64, count ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->blk = blk; __entry->count = count; ), TP_printk("dev %d,%d blk %llu count %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blk, __entry->count) ); DECLARE_EVENT_CLASS(ext4__mb_new_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, pa_pstart ) __field( __u64, pa_lstart ) __field( __u32, pa_len ) ), TP_fast_assign( __entry->dev = ac->ac_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->pa_pstart = pa->pa_pstart; __entry->pa_lstart = pa->pa_lstart; __entry->pa_len = pa->pa_len; ), TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) ); DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa) ); DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, TP_PROTO(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), TP_ARGS(ac, pa) ); TRACE_EVENT(ext4_mb_release_inode_pa, TP_PROTO(struct ext4_prealloc_space *pa, unsigned long long block, unsigned int count), TP_ARGS(pa, block, count), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( __u32, count ) ), TP_fast_assign( __entry->dev = pa->pa_inode->i_sb->s_dev; __entry->ino = pa->pa_inode->i_ino; __entry->block = block; __entry->count = count; ), TP_printk("dev %d,%d ino %lu block %llu count %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->block, __entry->count) ); TRACE_EVENT(ext4_mb_release_group_pa, TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa), TP_ARGS(sb, pa), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u64, pa_pstart ) __field( __u32, pa_len ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), TP_printk("dev %d,%d pstart %llu len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->pa_pstart, __entry->pa_len) ); TRACE_EVENT(ext4_discard_preallocations, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; ), TP_printk("dev %d,%d ino %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mb_discard_preallocations, TP_PROTO(struct super_block *sb, int needed), TP_ARGS(sb, needed), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, needed ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->needed = needed; ), TP_printk("dev %d,%d needed %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->needed) ); TRACE_EVENT(ext4_request_blocks, TP_PROTO(struct ext4_allocation_request *ar), TP_ARGS(ar), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, len ) __field( __u32, logical ) __field( __u32, lleft ) __field( __u32, lright ) __field( __u64, goal ) __field( __u64, pleft ) __field( __u64, pright ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = ar->inode->i_sb->s_dev; __entry->ino = ar->inode->i_ino; __entry->len = ar->len; __entry->logical = ar->logical; __entry->goal = ar->goal; __entry->lleft = ar->lleft; __entry->lright = ar->lright; __entry->pleft = ar->pleft; __entry->pright = ar->pright; __entry->flags = ar->flags; ), TP_printk("dev %d,%d ino %lu flags %s len %u lblk %u goal %llu " "lleft %u lright %u pleft %llu pright %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_allocate_blocks, TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block), TP_ARGS(ar, block), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( unsigned int, len ) __field( __u32, logical ) __field( __u32, lleft ) __field( __u32, lright ) __field( __u64, goal ) __field( __u64, pleft ) __field( __u64, pright ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = ar->inode->i_sb->s_dev; __entry->ino = ar->inode->i_ino; __entry->block = block; __entry->len = ar->len; __entry->logical = ar->logical; __entry->goal = ar->goal; __entry->lleft = ar->lleft; __entry->lright = ar->lright; __entry->pleft = ar->pleft; __entry->pright = ar->pright; __entry->flags = ar->flags; ), TP_printk("dev %d,%d ino %lu flags %s len %u block %llu lblk %u " "goal %llu lleft %u lright %u pleft %llu pright %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->block, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_free_blocks, TP_PROTO(struct inode *inode, __u64 block, unsigned long count, int flags), TP_ARGS(inode, block, count, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( unsigned long, count ) __field( int, flags ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->block = block; __entry->count = count; __entry->flags = flags; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->block, __entry->count, show_free_flags(__entry->flags)) ); TRACE_EVENT(ext4_sync_file_enter, TP_PROTO(struct file *file, int datasync), TP_ARGS(file, datasync), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, parent ) __field( int, datasync ) ), TP_fast_assign( struct dentry *dentry = file->f_path.dentry; __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->datasync = datasync; __entry->parent = d_inode(dentry->d_parent)->i_ino; ), TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) ); TRACE_EVENT(ext4_sync_file_exit, TP_PROTO(struct inode *inode, int ret), TP_ARGS(inode, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) ); TRACE_EVENT(ext4_sync_fs, TP_PROTO(struct super_block *sb, int wait), TP_ARGS(sb, wait), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, wait ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->wait = wait; ), TP_printk("dev %d,%d wait %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->wait) ); TRACE_EVENT(ext4_alloc_da_blocks, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, data_blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; ), TP_printk("dev %d,%d ino %lu reserved_data_blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->data_blocks) ); TRACE_EVENT(ext4_mballoc_alloc, TP_PROTO(struct ext4_allocation_context *ac), TP_ARGS(ac), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u32, orig_logical ) __field( int, orig_start ) __field( __u32, orig_group ) __field( int, orig_len ) __field( __u32, goal_logical ) __field( int, goal_start ) __field( __u32, goal_group ) __field( int, goal_len ) __field( __u32, result_logical ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) __field( __u16, found ) __field( __u16, groups ) __field( __u16, buddy ) __field( __u16, flags ) __field( __u16, tail ) __field( __u8, cr ) ), TP_fast_assign( __entry->dev = ac->ac_inode->i_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->orig_logical = ac->ac_o_ex.fe_logical; __entry->orig_start = ac->ac_o_ex.fe_start; __entry->orig_group = ac->ac_o_ex.fe_group; __entry->orig_len = ac->ac_o_ex.fe_len; __entry->goal_logical = ac->ac_g_ex.fe_logical; __entry->goal_start = ac->ac_g_ex.fe_start; __entry->goal_group = ac->ac_g_ex.fe_group; __entry->goal_len = ac->ac_g_ex.fe_len; __entry->result_logical = ac->ac_f_ex.fe_logical; __entry->result_start = ac->ac_f_ex.fe_start; __entry->result_group = ac->ac_f_ex.fe_group; __entry->result_len = ac->ac_f_ex.fe_len; __entry->found = ac->ac_found; __entry->flags = ac->ac_flags; __entry->groups = ac->ac_groups_scanned; __entry->buddy = ac->ac_buddy; __entry->tail = ac->ac_tail; __entry->cr = ac->ac_criteria; ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " "result %u/%d/%u@%u blks %u grps %u cr %u flags %s " "tail %u broken %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->goal_group, __entry->goal_start, __entry->goal_len, __entry->goal_logical, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical, __entry->found, __entry->groups, __entry->cr, show_mballoc_flags(__entry->flags), __entry->tail, __entry->buddy ? 1 << __entry->buddy : 0) ); TRACE_EVENT(ext4_mballoc_prealloc, TP_PROTO(struct ext4_allocation_context *ac), TP_ARGS(ac), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u32, orig_logical ) __field( int, orig_start ) __field( __u32, orig_group ) __field( int, orig_len ) __field( __u32, result_logical ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) ), TP_fast_assign( __entry->dev = ac->ac_inode->i_sb->s_dev; __entry->ino = ac->ac_inode->i_ino; __entry->orig_logical = ac->ac_o_ex.fe_logical; __entry->orig_start = ac->ac_o_ex.fe_start; __entry->orig_group = ac->ac_o_ex.fe_group; __entry->orig_len = ac->ac_o_ex.fe_len; __entry->result_logical = ac->ac_b_ex.fe_logical; __entry->result_start = ac->ac_b_ex.fe_start; __entry->result_group = ac->ac_b_ex.fe_group; __entry->result_len = ac->ac_b_ex.fe_len; ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical) ); DECLARE_EVENT_CLASS(ext4__mballoc, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ino = inode ? inode->i_ino : 0; __entry->result_start = start; __entry->result_group = group; __entry->result_len = len; ), TP_printk("dev %d,%d inode %lu extent %u/%d/%d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, __entry->result_len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free, TP_PROTO(struct super_block *sb, struct inode *inode, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, inode, group, start, len) ); TRACE_EVENT(ext4_forget, TP_PROTO(struct inode *inode, int is_metadata, __u64 block), TP_ARGS(inode, is_metadata, block), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, block ) __field( int, is_metadata ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->block = block; __entry->is_metadata = is_metadata; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->is_metadata, __entry->block) ); TRACE_EVENT(ext4_da_update_reserve_space, TP_PROTO(struct inode *inode, int used_blocks, int quota_claim), TP_ARGS(inode, used_blocks, quota_claim), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) __field( int, quota_claim ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->used_blocks = used_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->quota_claim = quota_claim; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " "reserved_data_blocks %d quota_claim %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, __entry->quota_claim) ); TRACE_EVENT(ext4_da_reserve_space, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, reserved_data_blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu " "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->reserved_data_blocks) ); TRACE_EVENT(ext4_da_release_space, TP_PROTO(struct inode *inode, int freed_blocks), TP_ARGS(inode, freed_blocks), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) __field( __u16, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->freed_blocks = freed_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d " "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->freed_blocks, __entry->reserved_data_blocks) ); DECLARE_EVENT_CLASS(ext4__bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group), TP_STRUCT__entry( __field( dev_t, dev ) __field( __u32, group ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->group = group; ), TP_printk("dev %d,%d group %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); TRACE_EVENT(ext4_direct_IO_enter, TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), TP_ARGS(inode, offset, len, rw), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned long, len ) __field( int, rw ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = offset; __entry->len = len; __entry->rw = rw; ), TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->rw) ); TRACE_EVENT(ext4_direct_IO_exit, TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret), TP_ARGS(inode, offset, len, rw, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned long, len ) __field( int, rw ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = offset; __entry->len = len; __entry->rw = rw; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->rw, __entry->ret) ); DECLARE_EVENT_CLASS(ext4__fallocate_mode, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, offset ) __field( loff_t, len ) __field( int, mode ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld mode %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len, show_falloc_mode(__entry->mode)) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_fallocate_enter, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_punch_hole, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), TP_ARGS(inode, offset, len, mode) ); TRACE_EVENT(ext4_fallocate_exit, TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret), TP_ARGS(inode, offset, max_blocks, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, blocks ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pos = offset; __entry->blocks = max_blocks; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->blocks, __entry->ret) ); TRACE_EVENT(ext4_unlink_enter, TP_PROTO(struct inode *parent, struct dentry *dentry), TP_ARGS(parent, dentry), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, parent ) __field( loff_t, size ) ), TP_fast_assign( __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->parent = parent->i_ino; __entry->size = d_inode(dentry)->i_size; ), TP_printk("dev %d,%d ino %lu size %lld parent %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->size, (unsigned long) __entry->parent) ); TRACE_EVENT(ext4_unlink_exit, TP_PROTO(struct dentry *dentry, int ret), TP_ARGS(dentry, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, ret ) ), TP_fast_assign( __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) ); DECLARE_EVENT_CLASS(ext4__truncate, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->blocks = inode->i_blocks; ), TP_printk("dev %d,%d ino %lu blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->blocks) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_enter, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); /* 'ux' is the unwritten extent. */ TRACE_EVENT(ext4_ext_convert_to_initialized_enter, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, struct ext4_extent *ux), TP_ARGS(inode, map, ux), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, m_lblk ) __field( unsigned, m_len ) __field( ext4_lblk_t, u_lblk ) __field( unsigned, u_len ) __field( ext4_fsblk_t, u_pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_len = map->m_len; __entry->u_lblk = le32_to_cpu(ux->ee_block); __entry->u_len = ext4_ext_get_actual_len(ux); __entry->u_pblk = ext4_ext_pblock(ux); ), TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u " "u_pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->m_lblk, __entry->m_len, __entry->u_lblk, __entry->u_len, __entry->u_pblk) ); /* * 'ux' is the unwritten extent. * 'ix' is the initialized extent to which blocks are transferred. */ TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, struct ext4_extent *ux, struct ext4_extent *ix), TP_ARGS(inode, map, ux, ix), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, m_lblk ) __field( unsigned, m_len ) __field( ext4_lblk_t, u_lblk ) __field( unsigned, u_len ) __field( ext4_fsblk_t, u_pblk ) __field( ext4_lblk_t, i_lblk ) __field( unsigned, i_len ) __field( ext4_fsblk_t, i_pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_len = map->m_len; __entry->u_lblk = le32_to_cpu(ux->ee_block); __entry->u_len = ext4_ext_get_actual_len(ux); __entry->u_pblk = ext4_ext_pblock(ux); __entry->i_lblk = le32_to_cpu(ix->ee_block); __entry->i_len = ext4_ext_get_actual_len(ix); __entry->i_pblk = ext4_ext_pblock(ix); ), TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u " "u_lblk %u u_len %u u_pblk %llu " "i_lblk %u i_len %u i_pblk %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->m_lblk, __entry->m_len, __entry->u_lblk, __entry->u_len, __entry->u_pblk, __entry->i_lblk, __entry->i_len, __entry->i_pblk) ); DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, unsigned int flags), TP_ARGS(inode, lblk, len, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; __entry->flags = flags; ), TP_printk("dev %d,%d ino %lu lblk %u len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, show_map_flags(__entry->flags)) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned len, unsigned flags), TP_ARGS(inode, lblk, len, flags) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned len, unsigned flags), TP_ARGS(inode, lblk, len, flags) ); DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, flags ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( unsigned int, mflags ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->flags = flags; __entry->pblk = map->m_pblk; __entry->lblk = map->m_lblk; __entry->len = map->m_len; __entry->mflags = map->m_flags; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu flags %s lblk %u pblk %llu len %u " "mflags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, show_map_flags(__entry->flags), __entry->lblk, __entry->pblk, __entry->len, show_mflags(__entry->mflags), __entry->ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit, TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, int ret), TP_ARGS(inode, flags, map, ret) ); TRACE_EVENT(ext4_ext_load_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk), TP_ARGS(inode, lblk, pblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->pblk) ); TRACE_EVENT(ext4_load_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; ), TP_printk("dev %d,%d ino %ld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_journal_start, TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks, unsigned long IP), TP_ARGS(sb, blocks, rsv_blocks, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field(unsigned long, ip ) __field( int, blocks ) __field( int, rsv_blocks ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ip = IP; __entry->blocks = blocks; __entry->rsv_blocks = rsv_blocks; ), TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks, (void *)__entry->ip) ); TRACE_EVENT(ext4_journal_start_reserved, TP_PROTO(struct super_block *sb, int blocks, unsigned long IP), TP_ARGS(sb, blocks, IP), TP_STRUCT__entry( __field( dev_t, dev ) __field(unsigned long, ip ) __field( int, blocks ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->ip = IP; __entry->blocks = blocks; ), TP_printk("dev %d,%d blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, (void *)__entry->ip) ); DECLARE_EVENT_CLASS(ext4__trim, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len), TP_STRUCT__entry( __field( int, dev_major ) __field( int, dev_minor ) __field( __u32, group ) __field( int, start ) __field( int, len ) ), TP_fast_assign( __entry->dev_major = MAJOR(sb->s_dev); __entry->dev_minor = MINOR(sb->s_dev); __entry->group = group; __entry->start = start; __entry->len = len; ), TP_printk("dev %d,%d group %u, start %d, len %d", __entry->dev_major, __entry->dev_minor, __entry->group, __entry->start, __entry->len) ); DEFINE_EVENT(ext4__trim, ext4_trim_extent, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len) ); DEFINE_EVENT(ext4__trim, ext4_trim_all_free, TP_PROTO(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t len), TP_ARGS(sb, group, start, len) ); TRACE_EVENT(ext4_ext_handle_unwritten_extents, TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int flags, unsigned int allocated, ext4_fsblk_t newblock), TP_ARGS(inode, map, flags, allocated, newblock), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( int, flags ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) __field( unsigned int, len ) __field( unsigned int, allocated ) __field( ext4_fsblk_t, newblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->flags = flags; __entry->lblk = map->m_lblk; __entry->pblk = map->m_pblk; __entry->len = map->m_len; __entry->allocated = allocated; __entry->newblk = newblock; ), TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %s " "allocated %d newblock %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, __entry->len, show_map_flags(__entry->flags), (unsigned int) __entry->allocated, (unsigned long long) __entry->newblk) ); TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret), TP_ARGS(sb, map, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned int, flags ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) __field( unsigned int, len ) __field( int, ret ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->flags = map->m_flags; __entry->lblk = map->m_lblk; __entry->pblk = map->m_pblk; __entry->len = map->m_len; __entry->ret = ret; ), TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lblk, (unsigned long long) __entry->pblk, __entry->len, show_mflags(__entry->flags), __entry->ret) ); TRACE_EVENT(ext4_ext_put_in_cache, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len, ext4_fsblk_t start), TP_ARGS(inode, lblk, len, start), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) __field( ext4_fsblk_t, start ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; __entry->start = start; ), TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, __entry->len, (unsigned long long) __entry->start) ); TRACE_EVENT(ext4_ext_in_cache, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret), TP_ARGS(inode, lblk, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu lblk %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, __entry->ret) ); TRACE_EVENT(ext4_find_delalloc_range, TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to, int reverse, int found, ext4_lblk_t found_blk), TP_ARGS(inode, from, to, reverse, found, found_blk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, from ) __field( ext4_lblk_t, to ) __field( int, reverse ) __field( int, found ) __field( ext4_lblk_t, found_blk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->from = from; __entry->to = to; __entry->reverse = reverse; __entry->found = found; __entry->found_blk = found_blk; ), TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d " "(blk = %u)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->from, (unsigned) __entry->to, __entry->reverse, __entry->found, (unsigned) __entry->found_blk) ); TRACE_EVENT(ext4_get_reserved_cluster_alloc, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len), TP_ARGS(inode, lblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu lblk %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, __entry->len) ); TRACE_EVENT(ext4_ext_show_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, unsigned short len), TP_ARGS(inode, lblk, pblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) __field( unsigned short, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, (unsigned short) __entry->len) ); TRACE_EVENT(ext4_remove_blocks, TP_PROTO(struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_fsblk_t to, long long partial_cluster), TP_ARGS(inode, ex, from, to, partial_cluster), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, from ) __field( ext4_lblk_t, to ) __field( long long, partial ) __field( ext4_fsblk_t, ee_pblk ) __field( ext4_lblk_t, ee_lblk ) __field( unsigned short, ee_len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->from = from; __entry->to = to; __entry->partial = partial_cluster; __entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_len = ext4_ext_get_actual_len(ex); ), TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" "from %u to %u partial_cluster %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->ee_lblk, (unsigned long long) __entry->ee_pblk, (unsigned short) __entry->ee_len, (unsigned) __entry->from, (unsigned) __entry->to, (long long) __entry->partial) ); TRACE_EVENT(ext4_ext_rm_leaf, TP_PROTO(struct inode *inode, ext4_lblk_t start, struct ext4_extent *ex, long long partial_cluster), TP_ARGS(inode, start, ex, partial_cluster), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( long long, partial ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, ee_lblk ) __field( ext4_fsblk_t, ee_pblk ) __field( short, ee_len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->partial = partial_cluster; __entry->start = start; __entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_len = ext4_ext_get_actual_len(ex); ), TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" "partial_cluster %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->ee_lblk, (unsigned long long) __entry->ee_pblk, (unsigned short) __entry->ee_len, (long long) __entry->partial) ); TRACE_EVENT(ext4_ext_rm_idx, TP_PROTO(struct inode *inode, ext4_fsblk_t pblk), TP_ARGS(inode, pblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_fsblk_t, pblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pblk = pblk; ), TP_printk("dev %d,%d ino %lu index_pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long long) __entry->pblk) ); TRACE_EVENT(ext4_ext_remove_space, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, int depth), TP_ARGS(inode, start, end, depth), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, end ) __field( int, depth ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->end = end; __entry->depth = depth; ), TP_printk("dev %d,%d ino %lu since %u end %u depth %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->end, __entry->depth) ); TRACE_EVENT(ext4_ext_remove_space_done, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, int depth, long long partial, __le16 eh_entries), TP_ARGS(inode, start, end, depth, partial, eh_entries), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, end ) __field( int, depth ) __field( long long, partial ) __field( unsigned short, eh_entries ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; __entry->end = end; __entry->depth = depth; __entry->partial = partial; __entry->eh_entries = le16_to_cpu(eh_entries); ), TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %lld " "remaining_entries %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->end, __entry->depth, (long long) __entry->partial, (unsigned short) __entry->eh_entries) ); DECLARE_EVENT_CLASS(ext4__es_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_pblock(es); __entry->status = ext4_es_status(es); ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status)) ); DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es) ); DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es) ); TRACE_EVENT(ext4_es_remove_extent, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), TP_ARGS(inode, lblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( loff_t, lblk ) __field( loff_t, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu es [%lld/%lld)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len) ); TRACE_EVENT(ext4_es_find_delayed_extent_range_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk) ); TRACE_EVENT(ext4_es_find_delayed_extent_range_exit, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_pblock(es); __entry->status = ext4_es_status(es); ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status)) ); TRACE_EVENT(ext4_es_lookup_extent_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = lblk; ), TP_printk("dev %d,%d ino %lu lblk %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk) ); TRACE_EVENT(ext4_es_lookup_extent_exit, TP_PROTO(struct inode *inode, struct extent_status *es, int found), TP_ARGS(inode, es, found), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) __field( int, found ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_pblock(es); __entry->status = ext4_es_status(es); __entry->found = found; ), TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->found, __entry->lblk, __entry->len, __entry->found ? __entry->pblk : 0, show_extent_status(__entry->found ? __entry->status : 0)) ); DECLARE_EVENT_CLASS(ext4__es_shrink_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_to_scan ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_to_scan = nr_to_scan; __entry->cache_cnt = cache_cnt; ), TP_printk("dev %d,%d nr_to_scan %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_to_scan, __entry->cache_cnt) ); DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt) ); DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt) ); TRACE_EVENT(ext4_es_shrink_scan_exit, TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt), TP_ARGS(sb, nr_shrunk, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_shrunk ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_shrunk = nr_shrunk; __entry->cache_cnt = cache_cnt; ), TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, __entry->cache_cnt) ); TRACE_EVENT(ext4_collapse_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len), TP_ARGS(inode, offset, len), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(loff_t, offset) __field(loff_t, len) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len) ); TRACE_EVENT(ext4_insert_range, TP_PROTO(struct inode *inode, loff_t offset, loff_t len), TP_ARGS(inode, offset, len), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(loff_t, offset) __field(loff_t, len) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->offset = offset; __entry->len = len; ), TP_printk("dev %d,%d ino %lu offset %lld len %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->offset, __entry->len) ); TRACE_EVENT(ext4_es_shrink, TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, int nr_skipped, int retried), TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_shrunk ) __field( unsigned long long, scan_time ) __field( int, nr_skipped ) __field( int, retried ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->nr_shrunk = nr_shrunk; __entry->scan_time = div_u64(scan_time, 1000); __entry->nr_skipped = nr_skipped; __entry->retried = retried; ), TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu " "nr_skipped %d retried %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, __entry->scan_time, __entry->nr_skipped, __entry->retried) ); /* fsmap traces */ DECLARE_EVENT_CLASS(ext4_fsmap_class, TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, u64 owner), TP_ARGS(sb, keydev, agno, bno, len, owner), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(u32, agno) __field(u64, bno) __field(u64, len) __field(u64, owner) ), TP_fast_assign( __entry->dev = sb->s_bdev->bd_dev; __entry->keydev = new_decode_dev(keydev); __entry->agno = agno; __entry->bno = bno; __entry->len = len; __entry->owner = owner; ), TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld\n", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->agno, __entry->bno, __entry->len, __entry->owner) ) #define DEFINE_FSMAP_EVENT(name) \ DEFINE_EVENT(ext4_fsmap_class, name, \ TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, \ u64 owner), \ TP_ARGS(sb, keydev, agno, bno, len, owner)) DEFINE_FSMAP_EVENT(ext4_fsmap_low_key); DEFINE_FSMAP_EVENT(ext4_fsmap_high_key); DEFINE_FSMAP_EVENT(ext4_fsmap_mapping); DECLARE_EVENT_CLASS(ext4_getfsmap_class, TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), TP_ARGS(sb, fsmap), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(u64, block) __field(u64, len) __field(u64, owner) __field(u64, flags) ), TP_fast_assign( __entry->dev = sb->s_bdev->bd_dev; __entry->keydev = new_decode_dev(fsmap->fmr_device); __entry->block = fsmap->fmr_physical; __entry->len = fsmap->fmr_length; __entry->owner = fsmap->fmr_owner; __entry->flags = fsmap->fmr_flags; ), TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld flags 0x%llx\n", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->block, __entry->len, __entry->owner, __entry->flags) ) #define DEFINE_GETFSMAP_EVENT(name) \ DEFINE_EVENT(ext4_getfsmap_class, name, \ TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), \ TP_ARGS(sb, fsmap)) DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping); TRACE_EVENT(ext4_shutdown, TP_PROTO(struct super_block *sb, unsigned long flags), TP_ARGS(sb, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned, flags ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->flags = flags; ), TP_printk("dev %d,%d flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->flags) ); TRACE_EVENT(ext4_error, TP_PROTO(struct super_block *sb, const char *function, unsigned int line), TP_ARGS(sb, function, line), TP_STRUCT__entry( __field( dev_t, dev ) __field( const char *, function ) __field( unsigned, line ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->function = function; __entry->line = line; ), TP_printk("dev %d,%d function %s line %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->function, __entry->line) ); #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 6 2 2 1 1 1 70 70 5 70 25 61 70 5 5 1 5 5 5 5 5 5 70 9 1 2 2 2 2 2 9 9 104 103 3 2 22 101 101 4 5 2 2 3 99 20 1 20 18 19 2 17 1 88 89 89 89 83 82 83 1 83 83 104 11 299 299 299 299 17 233 233 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 /* * DCCP over IPv6 * Linux INET6 implementation * * Based on net/dccp6/ipv6.c * * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/xfrm.h> #include <linux/string.h> #include <net/addrconf.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> #include <net/inet_sock.h> #include <net/inet6_connection_sock.h> #include <net/inet6_hashtables.h> #include <net/ip6_route.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/secure_seq.h> #include <net/sock.h> #include "dccp.h" #include "ipv6.h" #include "feat.h" /* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped; static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops; /* add pseudo-header to DCCP checksum stored in skb->csum */ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr) { return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); } static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr); } static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, dccp_hdr(skb)->dccph_dport, dccp_hdr(skb)->dccph_sport ); } static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); /* Only need dccph_dport & dccph_sport which are the first * 4 bytes in dccp header. * Our caller (icmpv6_notify()) already pulled 8 bytes for us. */ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, ntohs(dh->dccph_sport), inet6_iif(skb), 0); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } seq = dccp_hdr_seq(dh); if (sk->sk_state == DCCP_NEW_SYN_RECV) return dccp_req_err(sk, seq); bh_lock_sock(sk); if (sock_owned_by_user(sk)) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); } goto out; } if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (!ip6_sk_accept_pmtu(sk)) goto out; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; dst = inet6_csk_update_pmtu(sk, ntohl(info)); if (!dst) goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); } static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct in6_addr *final_p, final; struct flowi6 fl6; int err = -1; struct dst_entry *dst; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = ireq->ir_v6_rmt_addr; fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowlabel = 0; fl6.flowi6_oif = ireq->ir_iif; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); rcu_read_lock(); final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto done; } skb = dccp_make_response(sk, dst, req); if (skb != NULL) { struct dccp_hdr *dh = dccp_hdr(skb); struct ipv6_txoptions *opt; dh->dccph_checksum = dccp_v6_csum_finish(skb, &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6.daddr = ireq->ir_v6_rmt_addr; rcu_read_lock(); opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass); rcu_read_unlock(); err = net_xmit_eval(err); } done: dst_release(dst); return err; } static void dccp_v6_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); kfree(inet_rsk(req)->ipv6_opt); kfree_skb(inet_rsk(req)->pktopts); } static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) { const struct ipv6hdr *rxip6h; struct sk_buff *skb; struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); struct sock *ctl_sk = net->dccp.v6_ctl_sk; struct dst_entry *dst; if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (!ipv6_unicast_destination(rxskb)) return; skb = dccp_ctl_make_reset(ctl_sk, rxskb); if (skb == NULL) return; rxip6h = ipv6_hdr(rxskb); dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, &rxip6h->daddr); memset(&fl6, 0, sizeof(fl6)); fl6.daddr = rxip6h->saddr; fl6.saddr = rxip6h->daddr; fl6.flowi6_proto = IPPROTO_DCCP; fl6.flowi6_oif = inet6_iif(rxskb); fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; } kfree_skb(skb); } static struct request_sock_ops dccp6_request_sock_ops = { .family = AF_INET6, .obj_size = sizeof(struct dccp6_request_sock), .rtx_syn_ack = dccp_v6_send_response, .send_ack = dccp_reqsk_send_ack, .destructor = dccp_v6_reqsk_destructor, .send_reset = dccp_v6_ctl_send_reset, .syn_ack_timeout = dccp_syn_ack_timeout, }; static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct request_sock *req; struct dccp_request_sock *dreq; struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) return 0; /* discard, don't send a reset here */ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); return 0; } if (dccp_bad_service_code(sk, service)) { dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* * There are no SYN attacks on IPv6, yet... */ dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; if (sk_acceptq_is_full(sk)) goto drop; req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true); if (req == NULL) goto drop; if (dccp_reqsk_init(req, dccp_sk(sk), skb)) goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; ireq->ireq_family = AF_INET6; ireq->ir_mark = inet_request_mark(sk, skb); if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { refcount_inc(&skb->users); ireq->pktopts = skb; } ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); /* * Step 3: Process LISTEN state * * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie * * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child(). */ dreq->dreq_isr = dcb->dccpd_seq; dreq->dreq_gsr = dreq->dreq_isr; dreq->dreq_iss = dccp_v6_init_sequence(skb); dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); reqsk_put(req); return 0; drop_and_free: reqsk_free(req); drop: __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst, req_unhash, own_req); if (newsk == NULL) return NULL; newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, dccp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } if (sk_acceptq_is_full(sk)) goto out_overflow; if (!dst) { struct flowi6 fl6; dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP); if (!dst) goto out; } newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, dccp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) * * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); RCU_INIT_POINTER(newnp->opt, opt); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (opt) inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); dccp_done(newsk); goto out; } *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } return newsk; out_overflow: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; } /* The socket must have it's spinlock held when we get * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *opt_skb = NULL; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. From backlog it always goes here. Kerboom... Fortunately, dccp_rcv_established and rcv_established handle them correctly, but it is not case with dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK */ if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); if (sk_filter(sk, skb)) goto discard; /* * socket locking is here for SMP purposes as backlog rcv is currently * called with bh processing disabled. */ /* Do Stevens' IPV6_PKTOPTIONS. Yes, guys, it is the only place in our code, where we may make it not affecting IPv4. The rest of code is protocol independent, and I do not like idea to uglify IPv4. Actually, all the idea behind IPV6_PKTOPTIONS looks not very well thought. For now we latch options, received in the last packet, enqueued by tcp. Feel free to propose better solution. --ANK (980728) */ if (np->rxopt.all) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; } /* * Step 3: Process LISTEN state * If S.state == LISTEN, * If P.type == Request or P contains a valid Init Cookie option, * (* Must scan the packet's options to check for Init * Cookies. Only Init Cookies are processed here, * however; other options are processed in Step 8. This * scan need only be performed if the endpoint uses Init * Cookies *) * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair * S.state = RESPOND * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies * Continue with S.state == RESPOND * (* A Response packet will be generated in Step 11 *) * Otherwise, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process */ if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; reset: dccp_v6_ctl_send_reset(sk, skb); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); kfree_skb(skb); return 0; /* Handling IPV6_PKTOPTIONS skb the similar * way it's done for net/ipv6/tcp_ipv6.c */ ipv6_pktoptions: if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (np->repflow) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { skb_set_owner_r(opt_skb, sk); memmove(IP6CB(opt_skb), &DCCP_SKB_CB(opt_skb)->header.h6, sizeof(struct inet6_skb_parm)); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); opt_skb = xchg(&np->pktoptions, NULL); } } kfree_skb(opt_skb); return 0; } static int dccp_v6_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; bool refcounted; struct sock *sk; int min_cov; /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; /* Step 1: If header checksum is incorrect, drop packet and return. */ if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr)) { DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } dh = dccp_hdr(skb); DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; if (dccp_packet_without_ack(skb)) DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; else DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); lookup: sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport, inet6_iif(skb), 0, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); goto no_dccp_socket; } /* * Step 2: * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (sk->sk_state == DCCP_TIME_WAIT) { dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); inet_twsk_put(inet_twsk(sk)); goto no_dccp_socket; } if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); struct sock *nsk; sk = req->rsk_listener; if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } sock_hold(sk); refcounted = true; nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); goto discard_and_relse; } if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v6_ctl_send_reset(sk, skb); goto discard_and_relse; } else { sock_put(sk); return 0; } } /* * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage * o if MinCsCov = 0, only packets with CsCov = 0 are accepted * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov */ min_cov = dccp_sk(sk)->dccps_pcrlen; if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", dh->dccph_cscov, min_cov); /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */ goto discard_and_relse; } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; /* * Step 2: * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; dccp_v6_ctl_send_reset(sk, skb); } discard_it: kfree_skb(skb); return 0; discard_and_relse: if (refcounted) sock_put(sk); goto discard_it; } static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 1; addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type & IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != usin->sin6_scope_id) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* * DCCP over IPv4 */ if (addr_type == IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); if (__ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; icsk->icsk_af_ops = &dccp_ipv6_mapped; sk->sk_backlog_rcv = dccp_v4_do_rcv; err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } np->saddr = sk->sk_v6_rcv_saddr; return err; } if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { saddr = &fl6.saddr; sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); err = inet6_hash_connect(&dccp_death_row, sk); if (err) goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = dccp_connect(sk); if (err) goto late_failure; return 0; late_failure: dccp_set_state(sk, DCCP_CLOSED); __sk_dst_reset(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .queue_xmit = inet6_csk_xmit, .send_check = dccp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .conn_request = dccp_v6_conn_request, .syn_recv_sock = dccp_v6_request_recv_sock, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif }; /* * DCCP over IPv4 via INET6 API */ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = dccp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .conn_request = dccp_v6_conn_request, .syn_recv_sock = dccp_v6_request_recv_sock, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif }; /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ static int dccp_v6_init_sock(struct sock *sk) { static __u8 dccp_v6_ctl_sock_initialized; int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized); if (err == 0) { if (unlikely(!dccp_v6_ctl_sock_initialized)) dccp_v6_ctl_sock_initialized = 1; inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; } return err; } static void dccp_v6_destroy_sock(struct sock *sk) { dccp_destroy_sock(sk); inet6_destroy_sock(sk); } static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; static struct proto dccp_v6_prot = { .name = "DCCPv6", .owner = THIS_MODULE, .close = dccp_close, .connect = dccp_v6_connect, .disconnect = dccp_disconnect, .ioctl = dccp_ioctl, .init = dccp_v6_init_sock, .setsockopt = dccp_setsockopt, .getsockopt = dccp_getsockopt, .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v6_do_rcv, .hash = inet6_hash, .unhash = inet_unhash, .accept = inet_csk_accept, .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, .destroy = dccp_v6_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_dccp_setsockopt, .compat_getsockopt = compat_dccp_getsockopt, #endif }; static const struct inet6_protocol dccp_v6_protocol = { .handler = dccp_v6_rcv, .err_handler = dccp_v6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; static const struct proto_ops inet6_dccp_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet6_getname, .poll = dccp_poll, .ioctl = inet6_ioctl, .listen = inet_dccp_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif }; static struct inet_protosw dccp_v6_protosw = { .type = SOCK_DCCP, .protocol = IPPROTO_DCCP, .prot = &dccp_v6_prot, .ops = &inet6_dccp_ops, .flags = INET_PROTOSW_ICSK, }; static int __net_init dccp_v6_init_net(struct net *net) { if (dccp_hashinfo.bhash == NULL) return -ESOCKTNOSUPPORT; return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6, SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v6_exit_net(struct net *net) { inet_ctl_sock_destroy(net->dccp.v6_ctl_sk); } static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) { inet_twsk_purge(&dccp_hashinfo, AF_INET6); } static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, .exit_batch = dccp_v6_exit_batch, }; static int __init dccp_v6_init(void) { int err = proto_register(&dccp_v6_prot, 1); if (err) goto out; inet6_register_protosw(&dccp_v6_protosw); err = register_pernet_subsys(&dccp_v6_ops); if (err) goto out_destroy_ctl_sock; err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP); if (err) goto out_unregister_proto; out: return err; out_unregister_proto: unregister_pernet_subsys(&dccp_v6_ops); out_destroy_ctl_sock: inet6_unregister_protosw(&dccp_v6_protosw); proto_unregister(&dccp_v6_prot); goto out; } static void __exit dccp_v6_exit(void) { inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP); unregister_pernet_subsys(&dccp_v6_ops); inet6_unregister_protosw(&dccp_v6_protosw); proto_unregister(&dccp_v6_prot); } module_init(dccp_v6_init); module_exit(dccp_v6_exit); /* * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP */ MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 /* * fs/f2fs/dir.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/signal.h> #include "f2fs.h" #include "node.h" #include "acl.h" #include "xattr.h" #include <trace/events/f2fs.h> static unsigned long dir_blocks(struct inode *inode) { return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1)) >> PAGE_SHIFT; } static unsigned int dir_buckets(unsigned int level, int dir_level) { if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) return 1 << (level + dir_level); else return MAX_DIR_BUCKETS; } static unsigned int bucket_blocks(unsigned int level) { if (level < MAX_DIR_HASH_DEPTH / 2) return 2; else return 4; } static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, [F2FS_FT_CHRDEV] = DT_CHR, [F2FS_FT_BLKDEV] = DT_BLK, [F2FS_FT_FIFO] = DT_FIFO, [F2FS_FT_SOCK] = DT_SOCK, [F2FS_FT_SYMLINK] = DT_LNK, }; static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; static void set_de_type(struct f2fs_dir_entry *de, umode_t mode) { de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) { if (de->file_type < F2FS_FT_MAX) return f2fs_filetype_table[de->file_type]; return DT_UNKNOWN; } static unsigned long dir_block_index(unsigned int level, int dir_level, unsigned int idx) { unsigned long i; unsigned long bidx = 0; for (i = 0; i < level; i++) bidx += dir_buckets(i, dir_level) * bucket_blocks(i); bidx += idx * bucket_blocks(level); return bidx; } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, struct page **res_page) { struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); de = f2fs_find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; return de; } struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, f2fs_hash_t namehash, int *max_slots, struct f2fs_dentry_ptr *d) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; int max_len = 0; if (max_slots) *max_slots = 0; while (bit_pos < d->max) { if (!test_bit_le(bit_pos, d->bitmap)) { bit_pos++; max_len++; continue; } de = &d->dentry[bit_pos]; if (unlikely(!de->name_len)) { bit_pos++; continue; } if (de->hash_code == namehash && fscrypt_match_name(fname, d->filename[bit_pos], le16_to_cpu(de->name_len))) goto found; if (max_slots && max_len > *max_slots) *max_slots = max_len; max_len = 0; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; found: if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, struct fscrypt_name *fname, struct page **res_page) { struct qstr name = FSTR_TO_QSTR(&fname->disk_name); int s = GET_DENTRY_SLOTS(name.len); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, le32_to_cpu(namehash) % nbucket); end_block = bidx + nblock; for (; bidx < end_block; bidx++) { /* no need to allocate new dentry pages to all the indices */ dentry_page = f2fs_find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) { room = true; continue; } else { *res_page = dentry_page; break; } } de = find_in_block(dentry_page, fname, namehash, &max_slots, res_page); if (de) break; if (max_slots >= s) room = true; f2fs_put_page(dentry_page, 0); } if (!de && room && F2FS_I(dir)->chash != namehash) { F2FS_I(dir)->chash = namehash; F2FS_I(dir)->clevel = level; } return de; } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, struct fscrypt_name *fname, struct page **res_page) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; *res_page = NULL; if (f2fs_has_inline_dentry(dir)) { de = f2fs_find_in_inline_dir(dir, fname, res_page); goto out; } if (npages == 0) goto out; max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING, "Corrupted max_depth of %lu: %u", dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; f2fs_i_depth_write(dir, max_depth); } for (level = 0; level < max_depth; level++) { de = find_in_level(dir, level, fname, res_page); if (de || IS_ERR(*res_page)) break; } out: /* This is to increase the speed of f2fs_create */ if (!de) F2FS_I(dir)->task = current; return de; } /* * Find an entry in the specified directory with the wanted name. * It returns the page where the entry was found (as a parameter - res_page), * and the entry itself. Page is returned mapped and unlocked. * Entry is guaranteed to be valid. */ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, const struct qstr *child, struct page **res_page) { struct f2fs_dir_entry *de = NULL; struct fscrypt_name fname; int err; err = fscrypt_setup_filename(dir, child, 1, &fname); if (err) { if (err == -ENOENT) *res_page = NULL; else *res_page = ERR_PTR(err); return NULL; } de = __f2fs_find_entry(dir, &fname, res_page); fscrypt_free_filename(&fname); return de; } struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) { struct qstr dotdot = QSTR_INIT("..", 2); return f2fs_find_entry(dir, &dotdot, p); } ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, struct page **page) { ino_t res = 0; struct f2fs_dir_entry *de; de = f2fs_find_entry(dir, qstr, page); if (de) { res = le32_to_cpu(de->ino); f2fs_put_page(*page, 0); } return res; } void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); f2fs_wait_on_page_writeback(page, type, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); set_page_dirty(page); dir->i_mtime = dir->i_ctime = current_time(dir); f2fs_mark_inode_dirty_sync(dir, false); f2fs_put_page(page, 1); } static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; f2fs_wait_on_page_writeback(ipage, NODE, true); /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); ri->i_namelen = cpu_to_le32(name->len); memcpy(ri->i_name, name->name, name->len); set_page_dirty(ipage); } void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { struct qstr dot = QSTR_INIT(".", 1); struct qstr dotdot = QSTR_INIT("..", 2); /* update dirent of "." */ f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0); /* update dirent of ".." */ f2fs_update_dentry(parent->i_ino, parent->i_mode, d, &dotdot, 0, 1); } static int make_empty_dir(struct inode *inode, struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) return f2fs_make_empty_inline_dir(inode, parent, page); dentry_page = f2fs_get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); dentry_blk = page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_do_make_empty_dir(inode, parent, &d); set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); return 0; } struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct page *dpage) { struct page *page; int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { page = f2fs_new_inode_page(inode); if (IS_ERR(page)) return page; if (S_ISDIR(inode->i_mode)) { /* in order to handle error case */ get_page(page); err = make_empty_dir(inode, dir, page); if (err) { lock_page(page); goto put_error; } put_page(page); } err = f2fs_init_acl(inode, dir, page, dpage); if (err) goto put_error; err = f2fs_init_security(inode, dir, orig_name, page); if (err) goto put_error; if ((f2fs_encrypted_inode(dir) || dummy_encrypt) && f2fs_may_encrypt(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; } } else { page = f2fs_get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; } if (new_name) { init_dent_inode(new_name, page); if (f2fs_encrypted_inode(dir)) file_set_enc_name(inode); } /* * This file should be checkpointed during fsync. * We lost i_pino from now on. */ if (is_inode_flag_set(inode, FI_INC_LINK)) { if (!S_ISDIR(inode->i_mode)) file_lost_pino(inode); /* * If link the tmpfile to alias through linkat path, * we should remove this inode from orphan list. */ if (inode->i_nlink == 0) f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); f2fs_i_links_write(inode, true); } return page; put_error: clear_nlink(inode); f2fs_update_inode(inode, page); f2fs_put_page(page, 1); return ERR_PTR(err); } void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, true); clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = current_time(dir); f2fs_mark_inode_dirty_sync(dir, false); if (F2FS_I(dir)->i_current_depth != current_depth) f2fs_i_depth_write(dir, current_depth); if (inode && is_inode_flag_set(inode, FI_INC_LINK)) clear_inode_flag(inode, FI_INC_LINK); } int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); if (zero_start >= max_slots) return max_slots; zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; if (zero_end + 1 >= max_slots) return max_slots; goto next; } void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) { struct f2fs_dir_entry *de; int slots = GET_DENTRY_SLOTS(name->len); int i; de = &d->dentry[bit_pos]; de->hash_code = name_hash; de->name_len = cpu_to_le16(name->len); memcpy(d->filename[bit_pos], name->name, name->len); de->ino = cpu_to_le32(ino); set_de_type(de, mode); for (i = 0; i < slots; i++) { __set_bit_le(bit_pos + i, (void *)d->bitmap); /* avoid wrong garbage data for readdir */ if (i) (de + i)->name_len = 0; } } int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; struct page *page = NULL; int slots, err = 0; level = 0; slots = GET_DENTRY_SLOTS(new_name->len); dentry_hash = f2fs_dentry_hash(new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { level = F2FS_I(dir)->clevel; F2FS_I(dir)->chash = 0; } start: if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { f2fs_show_injection_info(FAULT_DIR_DEPTH); return -ENOSPC; } if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; /* Increase the depth, if required */ if (level == current_depth) ++current_depth; nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = f2fs_get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); dentry_blk = page_address(dentry_page); bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; f2fs_put_page(dentry_page, 1); } /* Move to next level to find the empty slot for new dentry */ ++level; goto start; add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA, true); if (inode) { down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, new_name, orig_name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; } } make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); if (inode) { f2fs_i_pino_write(inode, dir->i_ino); /* synchronize inode page's data from inode cache */ if (is_inode_flag_set(inode, FI_NEW_INODE)) f2fs_update_inode(inode, page); f2fs_put_page(page, 1); } f2fs_update_parent_metadata(dir, inode, current_depth); fail: if (inode) up_write(&F2FS_I(inode)->i_sem); f2fs_put_page(dentry_page, 1); return err; } int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct qstr new_name; int err = -EAGAIN; new_name.name = fname_name(fname); new_name.len = fname_len(fname); if (f2fs_has_inline_dentry(dir)) err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, inode, ino, mode); if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; } /* * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct fscrypt_name fname; struct page *page = NULL; struct f2fs_dir_entry *de = NULL; int err; err = fscrypt_setup_filename(dir, name, 0, &fname); if (err) return err; /* * An immature stakable filesystem shows a race condition between lookup * and create. If we have same task when doing lookup and create, it's * definitely fine as expected by VFS normally. Otherwise, let's just * verify on-disk dentry one more time, which guarantees filesystem * consistency more. */ if (current != F2FS_I(dir)->task) { de = __f2fs_find_entry(dir, &fname, &page); F2FS_I(dir)->task = NULL; } if (de) { f2fs_put_page(page, 0); err = -EEXIST; } else if (IS_ERR(page)) { err = PTR_ERR(page); } else { err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } fscrypt_free_filename(&fname); return err; } int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) { struct page *page; int err = 0; down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; } f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); fail: up_write(&F2FS_I(inode)->i_sem); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } void f2fs_drop_nlink(struct inode *dir, struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); down_write(&F2FS_I(inode)->i_sem); if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); inode->i_ctime = current_time(inode); f2fs_i_links_write(inode, false); if (S_ISDIR(inode->i_mode)) { f2fs_i_links_write(inode, false); f2fs_i_size_write(inode, 0); } up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) f2fs_add_orphan_inode(inode); else f2fs_release_orphan_inode(sbi); } /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); lock_page(page); f2fs_wait_on_page_writeback(page, DATA, true); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); set_page_dirty(page); dir->i_ctime = dir->i_mtime = current_time(dir); f2fs_mark_inode_dirty_sync(dir, false); if (inode) f2fs_drop_nlink(dir, inode); if (bit_pos == NR_DENTRY_IN_BLOCK && !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_radix_tree_dirty_tag(page); clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); clear_cold_data(page); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); } f2fs_put_page(page, 1); } bool f2fs_empty_dir(struct inode *dir) { unsigned long bidx; struct page *dentry_page; unsigned int bit_pos; struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); if (f2fs_has_inline_dentry(dir)) return f2fs_empty_inline_dir(dir); for (bidx = 0; bidx < nblock; bidx++) { dentry_page = f2fs_get_lock_data_page(dir, bidx, false); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) continue; else return false; } dentry_blk = page_address(dentry_page); if (bidx == 0) bit_pos = 2; else bit_pos = 0; bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, bit_pos); f2fs_put_page(dentry_page, 1); if (bit_pos < NR_DENTRY_IN_BLOCK) return false; } return true; } int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr) { unsigned char d_type = DT_UNKNOWN; unsigned int bit_pos; struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); while (bit_pos < d->max) { bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); if (bit_pos >= d->max) break; de = &d->dentry[bit_pos]; if (de->name_len == 0) { bit_pos++; ctx->pos = start_pos + bit_pos; continue; } d_type = f2fs_get_de_type(de); de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); /* check memory boundary before moving forward */ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); if (unlikely(bit_pos > d->max || le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { f2fs_msg(sbi->sb, KERN_WARNING, "%s: corrupted namelen=%d, run fsck to fix.", __func__, le16_to_cpu(de->name_len)); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; goto out; } if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; err = fscrypt_fname_disk_to_usr(d->inode, (u32)de->hash_code, 0, &de_name, fstr); if (err) return err; de_name = *fstr; fstr->len = save_len; } if (!dir_emit(ctx, de_name.name, de_name.len, le32_to_cpu(de->ino), d_type)) return 1; if (sbi->readdir_ra == 1) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); ctx->pos = start_pos + bit_pos; } out: return err; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); struct f2fs_dentry_block *dentry_blk = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; loff_t start_pos = ctx->pos; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); struct f2fs_dentry_ptr d; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); int err = 0; if (f2fs_encrypted_inode(inode)) { err = fscrypt_get_encryption_info(inode); if (err && err != -ENOKEY) goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); if (err < 0) goto out; } if (f2fs_has_inline_dentry(inode)) { err = f2fs_read_inline_dir(file, ctx, &fstr); goto out_free; } for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) { /* allow readdir() to be interrupted */ if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_free; } cond_resched(); /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); dentry_page = f2fs_get_lock_data_page(inode, n, false); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); if (err == -ENOENT) { err = 0; continue; } else { goto out_free; } } dentry_blk = page_address(dentry_page); make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); if (err) { f2fs_put_page(dentry_page, 1); break; } f2fs_put_page(dentry_page, 1); } out_free: fscrypt_fname_free_buffer(&fstr); out: trace_f2fs_readdir(inode, start_pos, ctx->pos, err); return err < 0 ? err : 0; } static int f2fs_dir_open(struct inode *inode, struct file *filp) { if (f2fs_encrypted_inode(inode)) return fscrypt_get_encryption_info(inode) ? -EACCES : 0; return 0; } const struct file_operations f2fs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = f2fs_readdir, .fsync = f2fs_sync_file, .open = f2fs_dir_open, .unlocked_ioctl = f2fs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = f2fs_compat_ioctl, #endif };
16 16 16 16 16 16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 /* * HID raw devices, giving access to raw HID events. * * In comparison to hiddev, this device does not process the * hid events at all (no parsing, no lookups). This lets applications * to work on raw hid events as they want to, and avoids a need to * use a transport-specific userspace libhid/libusb libraries. * * Copyright (c) 2007-2014 Jiri Kosina */ /* * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/fs.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/cdev.h> #include <linux/poll.h> #include <linux/device.h> #include <linux/major.h> #include <linux/slab.h> #include <linux/hid.h> #include <linux/mutex.h> #include <linux/sched/signal.h> #include <linux/string.h> #include <linux/hidraw.h> static int hidraw_major; static struct cdev hidraw_cdev; static struct class *hidraw_class; static struct hidraw *hidraw_table[HIDRAW_MAX_DEVICES]; static DEFINE_MUTEX(minors_lock); static ssize_t hidraw_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hidraw_list *list = file->private_data; int ret = 0, len; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); while (ret == 0) { if (list->head == list->tail) { add_wait_queue(&list->hidraw->wait, &wait); set_current_state(TASK_INTERRUPTIBLE); while (list->head == list->tail) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!list->hidraw->exist) { ret = -EIO; break; } if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } /* allow O_NONBLOCK to work well from other threads */ mutex_unlock(&list->read_mutex); schedule(); mutex_lock(&list->read_mutex); set_current_state(TASK_INTERRUPTIBLE); } set_current_state(TASK_RUNNING); remove_wait_queue(&list->hidraw->wait, &wait); } if (ret) goto out; len = list->buffer[list->tail].len > count ? count : list->buffer[list->tail].len; if (list->buffer[list->tail].value) { if (copy_to_user(buffer, list->buffer[list->tail].value, len)) { ret = -EFAULT; goto out; } ret = len; } kfree(list->buffer[list->tail].value); list->buffer[list->tail].value = NULL; list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1); } out: mutex_unlock(&list->read_mutex); return ret; } /* * The first byte of the report buffer is expected to be a report number. * * This function is to be called with the minors_lock mutex held. */ static ssize_t hidraw_send_report(struct file *file, const char __user *buffer, size_t count, unsigned char report_type) { unsigned int minor = iminor(file_inode(file)); struct hid_device *dev; __u8 *buf; int ret = 0; if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { ret = -ENODEV; goto out; } dev = hidraw_table[minor]->hid; if (count > HID_MAX_BUFFER_SIZE) { hid_warn(dev, "pid %d passed too large report\n", task_pid_nr(current)); ret = -EINVAL; goto out; } if (count < 2) { hid_warn(dev, "pid %d passed too short report\n", task_pid_nr(current)); ret = -EINVAL; goto out; } buf = memdup_user(buffer, count); if (IS_ERR(buf)) { ret = PTR_ERR(buf); goto out; } if ((report_type == HID_OUTPUT_REPORT) && !(dev->quirks & HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP)) { ret = hid_hw_output_report(dev, buf, count); /* * compatibility with old implementation of USB-HID and I2C-HID: * if the device does not support receiving output reports, * on an interrupt endpoint, fallback to SET_REPORT HID command. */ if (ret != -ENOSYS) goto out_free; } ret = hid_hw_raw_request(dev, buf[0], buf, count, report_type, HID_REQ_SET_REPORT); out_free: kfree(buf); out: return ret; } static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { ssize_t ret; mutex_lock(&minors_lock); ret = hidraw_send_report(file, buffer, count, HID_OUTPUT_REPORT); mutex_unlock(&minors_lock); return ret; } /* * This function performs a Get_Report transfer over the control endpoint * per section 7.2.1 of the HID specification, version 1.1. The first byte * of buffer is the report number to request, or 0x0 if the defice does not * use numbered reports. The report_type parameter can be HID_FEATURE_REPORT * or HID_INPUT_REPORT. * * This function is to be called with the minors_lock mutex held. */ static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t count, unsigned char report_type) { unsigned int minor = iminor(file_inode(file)); struct hid_device *dev; __u8 *buf; int ret = 0, len; unsigned char report_number; if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { ret = -ENODEV; goto out; } dev = hidraw_table[minor]->hid; if (!dev->ll_driver->raw_request) { ret = -ENODEV; goto out; } if (count > HID_MAX_BUFFER_SIZE) { printk(KERN_WARNING "hidraw: pid %d passed too large report\n", task_pid_nr(current)); ret = -EINVAL; goto out; } if (count < 2) { printk(KERN_WARNING "hidraw: pid %d passed too short report\n", task_pid_nr(current)); ret = -EINVAL; goto out; } buf = kmalloc(count, GFP_KERNEL); if (!buf) { ret = -ENOMEM; goto out; } /* * Read the first byte from the user. This is the report number, * which is passed to hid_hw_raw_request(). */ if (copy_from_user(&report_number, buffer, 1)) { ret = -EFAULT; goto out_free; } ret = hid_hw_raw_request(dev, report_number, buf, count, report_type, HID_REQ_GET_REPORT); if (ret < 0) goto out_free; len = (ret < count) ? ret : count; if (copy_to_user(buffer, buf, len)) { ret = -EFAULT; goto out_free; } ret = len; out_free: kfree(buf); out: return ret; } static __poll_t hidraw_poll(struct file *file, poll_table *wait) { struct hidraw_list *list = file->private_data; __poll_t mask = EPOLLOUT | EPOLLWRNORM; /* hidraw is always writable */ poll_wait(file, &list->hidraw->wait, wait); if (list->head != list->tail) mask |= EPOLLIN | EPOLLRDNORM; if (!list->hidraw->exist) mask |= EPOLLERR | EPOLLHUP; return mask; } static int hidraw_open(struct inode *inode, struct file *file) { unsigned int minor = iminor(inode); struct hidraw *dev; struct hidraw_list *list; unsigned long flags; int err = 0; if (!(list = kzalloc(sizeof(struct hidraw_list), GFP_KERNEL))) { err = -ENOMEM; goto out; } mutex_lock(&minors_lock); if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { err = -ENODEV; goto out_unlock; } dev = hidraw_table[minor]; if (!dev->open++) { err = hid_hw_power(dev->hid, PM_HINT_FULLON); if (err < 0) { dev->open--; goto out_unlock; } err = hid_hw_open(dev->hid); if (err < 0) { hid_hw_power(dev->hid, PM_HINT_NORMAL); dev->open--; goto out_unlock; } } list->hidraw = hidraw_table[minor]; mutex_init(&list->read_mutex); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); list_add_tail(&list->node, &hidraw_table[minor]->list); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); file->private_data = list; out_unlock: mutex_unlock(&minors_lock); out: if (err < 0) kfree(list); return err; } static int hidraw_fasync(int fd, struct file *file, int on) { struct hidraw_list *list = file->private_data; return fasync_helper(fd, file, on, &list->fasync); } static void drop_ref(struct hidraw *hidraw, int exists_bit) { if (exists_bit) { hidraw->exist = 0; if (hidraw->open) { hid_hw_close(hidraw->hid); wake_up_interruptible(&hidraw->wait); } device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor)); } else { --hidraw->open; } if (!hidraw->open) { if (!hidraw->exist) { hidraw_table[hidraw->minor] = NULL; kfree(hidraw); } else { /* close device for last reader */ hid_hw_close(hidraw->hid); hid_hw_power(hidraw->hid, PM_HINT_NORMAL); } } } static int hidraw_release(struct inode * inode, struct file * file) { unsigned int minor = iminor(inode); struct hidraw_list *list = file->private_data; unsigned long flags; int i; mutex_lock(&minors_lock); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); for (i = list->tail; i < list->head; i++) kfree(list->buffer[i].value); list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); drop_ref(hidraw_table[minor], 0); mutex_unlock(&minors_lock); return 0; } static long hidraw_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(file); unsigned int minor = iminor(inode); long ret = 0; struct hidraw *dev; void __user *user_arg = (void __user*) arg; mutex_lock(&minors_lock); dev = hidraw_table[minor]; if (!dev || !dev->exist) { ret = -ENODEV; goto out; } switch (cmd) { case HIDIOCGRDESCSIZE: if (put_user(dev->hid->rsize, (int __user *)arg)) ret = -EFAULT; break; case HIDIOCGRDESC: { __u32 len; if (get_user(len, (int __user *)arg)) ret = -EFAULT; else if (len > HID_MAX_DESCRIPTOR_SIZE - 1) ret = -EINVAL; else if (copy_to_user(user_arg + offsetof( struct hidraw_report_descriptor, value[0]), dev->hid->rdesc, min(dev->hid->rsize, len))) ret = -EFAULT; break; } case HIDIOCGRAWINFO: { struct hidraw_devinfo dinfo; dinfo.bustype = dev->hid->bus; dinfo.vendor = dev->hid->vendor; dinfo.product = dev->hid->product; if (copy_to_user(user_arg, &dinfo, sizeof(dinfo))) ret = -EFAULT; break; } default: { struct hid_device *hid = dev->hid; if (_IOC_TYPE(cmd) != 'H') { ret = -EINVAL; break; } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCSFEATURE(0))) { int len = _IOC_SIZE(cmd); ret = hidraw_send_report(file, user_arg, len, HID_FEATURE_REPORT); break; } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGFEATURE(0))) { int len = _IOC_SIZE(cmd); ret = hidraw_get_report(file, user_arg, len, HID_FEATURE_REPORT); break; } /* Begin Read-only ioctls. */ if (_IOC_DIR(cmd) != _IOC_READ) { ret = -EINVAL; break; } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWNAME(0))) { int len = strlen(hid->name) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); ret = copy_to_user(user_arg, hid->name, len) ? -EFAULT : len; break; } if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWPHYS(0))) { int len = strlen(hid->phys) + 1; if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); ret = copy_to_user(user_arg, hid->phys, len) ? -EFAULT : len; break; } } ret = -ENOTTY; } out: mutex_unlock(&minors_lock); return ret; } static const struct file_operations hidraw_ops = { .owner = THIS_MODULE, .read = hidraw_read, .write = hidraw_write, .poll = hidraw_poll, .open = hidraw_open, .release = hidraw_release, .unlocked_ioctl = hidraw_ioctl, .fasync = hidraw_fasync, #ifdef CONFIG_COMPAT .compat_ioctl = hidraw_ioctl, #endif .llseek = noop_llseek, }; int hidraw_report_event(struct hid_device *hid, u8 *data, int len) { struct hidraw *dev = hid->hidraw; struct hidraw_list *list; int ret = 0; unsigned long flags; spin_lock_irqsave(&dev->list_lock, flags); list_for_each_entry(list, &dev->list, node) { int new_head = (list->head + 1) & (HIDRAW_BUFFER_SIZE - 1); if (new_head == list->tail) continue; if (!(list->buffer[list->head].value = kmemdup(data, len, GFP_ATOMIC))) { ret = -ENOMEM; break; } list->buffer[list->head].len = len; list->head = new_head; kill_fasync(&list->fasync, SIGIO, POLL_IN); } spin_unlock_irqrestore(&dev->list_lock, flags); wake_up_interruptible(&dev->wait); return ret; } EXPORT_SYMBOL_GPL(hidraw_report_event); int hidraw_connect(struct hid_device *hid) { int minor, result; struct hidraw *dev; /* we accept any HID device, all applications */ dev = kzalloc(sizeof(struct hidraw), GFP_KERNEL); if (!dev) return -ENOMEM; result = -EINVAL; mutex_lock(&minors_lock); for (minor = 0; minor < HIDRAW_MAX_DEVICES; minor++) { if (hidraw_table[minor]) continue; hidraw_table[minor] = dev; result = 0; break; } if (result) { mutex_unlock(&minors_lock); kfree(dev); goto out; } dev->dev = device_create(hidraw_class, &hid->dev, MKDEV(hidraw_major, minor), NULL, "%s%d", "hidraw", minor); if (IS_ERR(dev->dev)) { hidraw_table[minor] = NULL; mutex_unlock(&minors_lock); result = PTR_ERR(dev->dev); kfree(dev); goto out; } init_waitqueue_head(&dev->wait); spin_lock_init(&dev->list_lock); INIT_LIST_HEAD(&dev->list); dev->hid = hid; dev->minor = minor; dev->exist = 1; hid->hidraw = dev; mutex_unlock(&minors_lock); out: return result; } EXPORT_SYMBOL_GPL(hidraw_connect); void hidraw_disconnect(struct hid_device *hid) { struct hidraw *hidraw = hid->hidraw; mutex_lock(&minors_lock); drop_ref(hidraw, 1); mutex_unlock(&minors_lock); } EXPORT_SYMBOL_GPL(hidraw_disconnect); int __init hidraw_init(void) { int result; dev_t dev_id; result = alloc_chrdev_region(&dev_id, HIDRAW_FIRST_MINOR, HIDRAW_MAX_DEVICES, "hidraw"); if (result < 0) { pr_warn("can't get major number\n"); goto out; } hidraw_major = MAJOR(dev_id); hidraw_class = class_create(THIS_MODULE, "hidraw"); if (IS_ERR(hidraw_class)) { result = PTR_ERR(hidraw_class); goto error_cdev; } cdev_init(&hidraw_cdev, &hidraw_ops); result = cdev_add(&hidraw_cdev, dev_id, HIDRAW_MAX_DEVICES); if (result < 0) goto error_class; printk(KERN_INFO "hidraw: raw HID events driver (C) Jiri Kosina\n"); out: return result; error_class: class_destroy(hidraw_class); error_cdev: unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES); goto out; } void hidraw_exit(void) { dev_t dev_id = MKDEV(hidraw_major, 0); cdev_del(&hidraw_cdev); class_destroy(hidraw_class); unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES); }
910 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_RT_H #define _LINUX_SCHED_RT_H #include <linux/sched.h> struct task_struct; static inline int rt_prio(int prio) { if (unlikely(prio < MAX_RT_PRIO)) return 1; return 0; } static inline int rt_task(struct task_struct *p) { return rt_prio(p->prio); } static inline bool task_is_realtime(struct task_struct *tsk) { int policy = tsk->policy; if (policy == SCHED_FIFO || policy == SCHED_RR) return true; if (policy == SCHED_DEADLINE) return true; return false; } #ifdef CONFIG_RT_MUTEXES /* * Must hold either p->pi_lock or task_rq(p)->lock. */ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) { return p->pi_top_task; } extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return tsk->pi_blocked_on != NULL; } #else static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; } # define rt_mutex_adjust_pi(p) do { } while (0) static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return false; } #endif extern void normalize_rt_tasks(void); /* * default timeslice is 100 msecs (used only for SCHED_RR tasks). * Timeslices get refilled after they expire. */ #define RR_TIMESLICE (100 * HZ / 1000) #endif /* _LINUX_SCHED_RT_H */
46 46 46 20 46 20 46 13 13 13 13 5 13 13 13 13 8 8 13 39 39 39 49 49 3 46 46 46 46 46 11 11 11 13 13 11 13 8 8 8 8 4 7 6 5 5 2 6 4 2 1 1 5 4 3 4 4 3 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 /* * \author Rickard E. (Rik) Faith <faith@valinux.com> * \author Daryll Strauss <daryll@valinux.com> * \author Gareth Hughes <gareth@valinux.com> */ /* * Created: Mon Jan 4 08:58:31 1999 by faith@valinux.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/poll.h> #include <linux/slab.h> #include <linux/module.h> #include <drm/drm_client.h> #include <drm/drm_file.h> #include <drm/drmP.h> #include "drm_legacy.h" #include "drm_internal.h" #include "drm_crtc_internal.h" /* from BKL pushdown */ DEFINE_MUTEX(drm_global_mutex); /** * DOC: file operations * * Drivers must define the file operations structure that forms the DRM * userspace API entry point, even though most of those operations are * implemented in the DRM core. The resulting &struct file_operations must be * stored in the &drm_driver.fops field. The mandatory functions are drm_open(), * drm_read(), drm_ioctl() and drm_compat_ioctl() if CONFIG_COMPAT is enabled * Note that drm_compat_ioctl will be NULL if CONFIG_COMPAT=n, so there's no * need to sprinkle #ifdef into the code. Drivers which implement private ioctls * that require 32/64 bit compatibility support must provide their own * &file_operations.compat_ioctl handler that processes private ioctls and calls * drm_compat_ioctl() for core ioctls. * * In addition drm_read() and drm_poll() provide support for DRM events. DRM * events are a generic and extensible means to send asynchronous events to * userspace through the file descriptor. They are used to send vblank event and * page flip completions by the KMS API. But drivers can also use it for their * own needs, e.g. to signal completion of rendering. * * For the driver-side event interface see drm_event_reserve_init() and * drm_send_event() as the main starting points. * * The memory mapping implementation will vary depending on how the driver * manages memory. Legacy drivers will use the deprecated drm_legacy_mmap() * function, modern drivers should use one of the provided memory-manager * specific implementations. For GEM-based drivers this is drm_gem_mmap(), and * for drivers which use the CMA GEM helpers it's drm_gem_cma_mmap(). * * No other file operations are supported by the DRM userspace API. Overall the * following is an example &file_operations structure:: * * static const example_drm_fops = { * .owner = THIS_MODULE, * .open = drm_open, * .release = drm_release, * .unlocked_ioctl = drm_ioctl, * .compat_ioctl = drm_compat_ioctl, // NULL if CONFIG_COMPAT=n * .poll = drm_poll, * .read = drm_read, * .llseek = no_llseek, * .mmap = drm_gem_mmap, * }; * * For plain GEM based drivers there is the DEFINE_DRM_GEM_FOPS() macro, and for * CMA based drivers there is the DEFINE_DRM_GEM_CMA_FOPS() macro to make this * simpler. * * The driver's &file_operations must be stored in &drm_driver.fops. * * For driver-private IOCTL handling see the more detailed discussion in * :ref:`IOCTL support in the userland interfaces chapter<drm_driver_ioctl>`. */ static int drm_open_helper(struct file *filp, struct drm_minor *minor); /** * drm_file_alloc - allocate file context * @minor: minor to allocate on * * This allocates a new DRM file context. It is not linked into any context and * can be used by the caller freely. Note that the context keeps a pointer to * @minor, so it must be freed before @minor is. * * RETURNS: * Pointer to newly allocated context, ERR_PTR on failure. */ struct drm_file *drm_file_alloc(struct drm_minor *minor) { struct drm_device *dev = minor->dev; struct drm_file *file; int ret; file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) return ERR_PTR(-ENOMEM); file->pid = get_pid(task_pid(current)); file->minor = minor; /* for compatibility root is always authenticated */ file->authenticated = capable(CAP_SYS_ADMIN); file->lock_count = 0; INIT_LIST_HEAD(&file->lhead); INIT_LIST_HEAD(&file->fbs); mutex_init(&file->fbs_lock); INIT_LIST_HEAD(&file->blobs); INIT_LIST_HEAD(&file->pending_event_list); INIT_LIST_HEAD(&file->event_list); init_waitqueue_head(&file->event_wait); file->event_space = 4096; /* set aside 4k for event buffer */ mutex_init(&file->event_read_lock); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_open(dev, file); if (drm_core_check_feature(dev, DRIVER_SYNCOBJ)) drm_syncobj_open(file); if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_prime_init_file_private(&file->prime); if (dev->driver->open) { ret = dev->driver->open(dev, file); if (ret < 0) goto out_prime_destroy; } return file; out_prime_destroy: if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_prime_destroy_file_private(&file->prime); if (drm_core_check_feature(dev, DRIVER_SYNCOBJ)) drm_syncobj_release(file); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_release(dev, file); put_pid(file->pid); kfree(file); return ERR_PTR(ret); } static void drm_events_release(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; struct drm_pending_event *e, *et; unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); /* Unlink pending events */ list_for_each_entry_safe(e, et, &file_priv->pending_event_list, pending_link) { list_del(&e->pending_link); e->file_priv = NULL; } /* Remove unconsumed events */ list_for_each_entry_safe(e, et, &file_priv->event_list, link) { list_del(&e->link); kfree(e); } spin_unlock_irqrestore(&dev->event_lock, flags); } /** * drm_file_free - free file context * @file: context to free, or NULL * * This destroys and deallocates a DRM file context previously allocated via * drm_file_alloc(). The caller must make sure to unlink it from any contexts * before calling this. * * If NULL is passed, this is a no-op. * * RETURNS: * 0 on success, or error code on failure. */ void drm_file_free(struct drm_file *file) { struct drm_device *dev; if (!file) return; dev = file->minor->dev; DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n", task_pid_nr(current), (long)old_encode_dev(file->minor->kdev->devt), dev->open_count); if (drm_core_check_feature(dev, DRIVER_LEGACY) && dev->driver->preclose) dev->driver->preclose(dev, file); if (drm_core_check_feature(dev, DRIVER_LEGACY)) drm_legacy_lock_release(dev, file->filp); if (drm_core_check_feature(dev, DRIVER_HAVE_DMA)) drm_legacy_reclaim_buffers(dev, file); drm_events_release(file); if (drm_core_check_feature(dev, DRIVER_MODESET)) { drm_fb_release(file); drm_property_destroy_user_blobs(dev, file); } if (drm_core_check_feature(dev, DRIVER_SYNCOBJ)) drm_syncobj_release(file); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_release(dev, file); drm_legacy_ctxbitmap_flush(dev, file); if (drm_is_primary_client(file)) drm_master_release(file); if (dev->driver->postclose) dev->driver->postclose(dev, file); if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_prime_destroy_file_private(&file->prime); WARN_ON(!list_empty(&file->event_list)); put_pid(file->pid); kfree(file); } static int drm_setup(struct drm_device * dev) { int ret; if (dev->driver->firstopen && drm_core_check_feature(dev, DRIVER_LEGACY)) { ret = dev->driver->firstopen(dev); if (ret != 0) return ret; } ret = drm_legacy_dma_setup(dev); if (ret < 0) return ret; DRM_DEBUG("\n"); return 0; } /** * drm_open - open method for DRM file * @inode: device inode * @filp: file pointer. * * This function must be used by drivers as their &file_operations.open method. * It looks up the correct DRM device and instantiates all the per-file * resources for it. It also calls the &drm_driver.open driver callback. * * RETURNS: * * 0 on success or negative errno value on falure. */ int drm_open(struct inode *inode, struct file *filp) { struct drm_device *dev; struct drm_minor *minor; int retcode; int need_setup = 0; minor = drm_minor_acquire(iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); dev = minor->dev; if (!dev->open_count++) need_setup = 1; /* share address_space across all char-devs of a single device */ filp->f_mapping = dev->anon_inode->i_mapping; retcode = drm_open_helper(filp, minor); if (retcode) goto err_undo; if (need_setup) { retcode = drm_setup(dev); if (retcode) goto err_undo; } return 0; err_undo: dev->open_count--; drm_minor_release(minor); return retcode; } EXPORT_SYMBOL(drm_open); /* * Check whether DRI will run on this CPU. * * \return non-zero if the DRI will run on this CPU, or zero otherwise. */ static int drm_cpu_valid(void) { #if defined(__sparc__) && !defined(__sparc_v9__) return 0; /* No cmpxchg before v9 sparc. */ #endif return 1; } /* * Called whenever a process opens /dev/drm. * * \param filp file pointer. * \param minor acquired minor-object. * \return zero on success or a negative number on failure. * * Creates and initializes a drm_file structure for the file private data in \p * filp and add it into the double linked list in \p dev. */ static int drm_open_helper(struct file *filp, struct drm_minor *minor) { struct drm_device *dev = minor->dev; struct drm_file *priv; int ret; if (filp->f_flags & O_EXCL) return -EBUSY; /* No exclusive opens */ if (!drm_cpu_valid()) return -EINVAL; if (dev->switch_power_state != DRM_SWITCH_POWER_ON && dev->switch_power_state != DRM_SWITCH_POWER_DYNAMIC_OFF) return -EINVAL; DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor->index); priv = drm_file_alloc(minor); if (IS_ERR(priv)) return PTR_ERR(priv); if (drm_is_primary_client(priv)) { ret = drm_master_open(priv); if (ret) { drm_file_free(priv); return ret; } } filp->private_data = priv; filp->f_mode |= FMODE_UNSIGNED_OFFSET; priv->filp = filp; mutex_lock(&dev->filelist_mutex); list_add(&priv->lhead, &dev->filelist); mutex_unlock(&dev->filelist_mutex); #ifdef __alpha__ /* * Default the hose */ if (!dev->hose) { struct pci_dev *pci_dev; pci_dev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, NULL); if (pci_dev) { dev->hose = pci_dev->sysdata; pci_dev_put(pci_dev); } if (!dev->hose) { struct pci_bus *b = list_entry(pci_root_buses.next, struct pci_bus, node); if (b) dev->hose = b->sysdata; } } #endif return 0; } static void drm_legacy_dev_reinit(struct drm_device *dev) { if (dev->irq_enabled) drm_irq_uninstall(dev); mutex_lock(&dev->struct_mutex); drm_legacy_agp_clear(dev); drm_legacy_sg_cleanup(dev); drm_legacy_vma_flush(dev); drm_legacy_dma_takedown(dev); mutex_unlock(&dev->struct_mutex); dev->sigdata.lock = NULL; dev->context_flag = 0; dev->last_context = 0; dev->if_version = 0; DRM_DEBUG("lastclose completed\n"); } void drm_lastclose(struct drm_device * dev) { DRM_DEBUG("\n"); if (dev->driver->lastclose) dev->driver->lastclose(dev); DRM_DEBUG("driver lastclose completed\n"); if (drm_core_check_feature(dev, DRIVER_LEGACY)) drm_legacy_dev_reinit(dev); drm_client_dev_restore(dev); } /** * drm_release - release method for DRM file * @inode: device inode * @filp: file pointer. * * This function must be used by drivers as their &file_operations.release * method. It frees any resources associated with the open file, and calls the * &drm_driver.postclose driver callback. If this is the last open file for the * DRM device also proceeds to call the &drm_driver.lastclose driver callback. * * RETURNS: * * Always succeeds and returns 0. */ int drm_release(struct inode *inode, struct file *filp) { struct drm_file *file_priv = filp->private_data; struct drm_minor *minor = file_priv->minor; struct drm_device *dev = minor->dev; mutex_lock(&drm_global_mutex); DRM_DEBUG("open_count = %d\n", dev->open_count); mutex_lock(&dev->filelist_mutex); list_del(&file_priv->lhead); mutex_unlock(&dev->filelist_mutex); drm_file_free(file_priv); if (!--dev->open_count) drm_lastclose(dev); mutex_unlock(&drm_global_mutex); drm_minor_release(minor); return 0; } EXPORT_SYMBOL(drm_release); /** * drm_read - read method for DRM file * @filp: file pointer * @buffer: userspace destination pointer for the read * @count: count in bytes to read * @offset: offset to read * * This function must be used by drivers as their &file_operations.read * method iff they use DRM events for asynchronous signalling to userspace. * Since events are used by the KMS API for vblank and page flip completion this * means all modern display drivers must use it. * * @offset is ignored, DRM events are read like a pipe. Therefore drivers also * must set the &file_operation.llseek to no_llseek(). Polling support is * provided by drm_poll(). * * This function will only ever read a full event. Therefore userspace must * supply a big enough buffer to fit any event to ensure forward progress. Since * the maximum event space is currently 4K it's recommended to just use that for * safety. * * RETURNS: * * Number of bytes read (always aligned to full events, and can be 0) or a * negative error code on failure. */ ssize_t drm_read(struct file *filp, char __user *buffer, size_t count, loff_t *offset) { struct drm_file *file_priv = filp->private_data; struct drm_device *dev = file_priv->minor->dev; ssize_t ret; if (!access_ok(VERIFY_WRITE, buffer, count)) return -EFAULT; ret = mutex_lock_interruptible(&file_priv->event_read_lock); if (ret) return ret; for (;;) { struct drm_pending_event *e = NULL; spin_lock_irq(&dev->event_lock); if (!list_empty(&file_priv->event_list)) { e = list_first_entry(&file_priv->event_list, struct drm_pending_event, link); file_priv->event_space += e->event->length; list_del(&e->link); } spin_unlock_irq(&dev->event_lock); if (e == NULL) { if (ret) break; if (filp->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } mutex_unlock(&file_priv->event_read_lock); ret = wait_event_interruptible(file_priv->event_wait, !list_empty(&file_priv->event_list)); if (ret >= 0) ret = mutex_lock_interruptible(&file_priv->event_read_lock); if (ret) return ret; } else { unsigned length = e->event->length; if (length > count - ret) { put_back_event: spin_lock_irq(&dev->event_lock); file_priv->event_space -= length; list_add(&e->link, &file_priv->event_list); spin_unlock_irq(&dev->event_lock); wake_up_interruptible(&file_priv->event_wait); break; } if (copy_to_user(buffer + ret, e->event, length)) { if (ret == 0) ret = -EFAULT; goto put_back_event; } ret += length; kfree(e); } } mutex_unlock(&file_priv->event_read_lock); return ret; } EXPORT_SYMBOL(drm_read); /** * drm_poll - poll method for DRM file * @filp: file pointer * @wait: poll waiter table * * This function must be used by drivers as their &file_operations.read method * iff they use DRM events for asynchronous signalling to userspace. Since * events are used by the KMS API for vblank and page flip completion this means * all modern display drivers must use it. * * See also drm_read(). * * RETURNS: * * Mask of POLL flags indicating the current status of the file. */ __poll_t drm_poll(struct file *filp, struct poll_table_struct *wait) { struct drm_file *file_priv = filp->private_data; __poll_t mask = 0; poll_wait(filp, &file_priv->event_wait, wait); if (!list_empty(&file_priv->event_list)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } EXPORT_SYMBOL(drm_poll); /** * drm_event_reserve_init_locked - init a DRM event and reserve space for it * @dev: DRM device * @file_priv: DRM file private data * @p: tracking structure for the pending event * @e: actual event data to deliver to userspace * * This function prepares the passed in event for eventual delivery. If the event * doesn't get delivered (because the IOCTL fails later on, before queuing up * anything) then the even must be cancelled and freed using * drm_event_cancel_free(). Successfully initialized events should be sent out * using drm_send_event() or drm_send_event_locked() to signal completion of the * asynchronous event to userspace. * * If callers embedded @p into a larger structure it must be allocated with * kmalloc and @p must be the first member element. * * This is the locked version of drm_event_reserve_init() for callers which * already hold &drm_device.event_lock. * * RETURNS: * * 0 on success or a negative error code on failure. */ int drm_event_reserve_init_locked(struct drm_device *dev, struct drm_file *file_priv, struct drm_pending_event *p, struct drm_event *e) { if (file_priv->event_space < e->length) return -ENOMEM; file_priv->event_space -= e->length; p->event = e; list_add(&p->pending_link, &file_priv->pending_event_list); p->file_priv = file_priv; return 0; } EXPORT_SYMBOL(drm_event_reserve_init_locked); /** * drm_event_reserve_init - init a DRM event and reserve space for it * @dev: DRM device * @file_priv: DRM file private data * @p: tracking structure for the pending event * @e: actual event data to deliver to userspace * * This function prepares the passed in event for eventual delivery. If the event * doesn't get delivered (because the IOCTL fails later on, before queuing up * anything) then the even must be cancelled and freed using * drm_event_cancel_free(). Successfully initialized events should be sent out * using drm_send_event() or drm_send_event_locked() to signal completion of the * asynchronous event to userspace. * * If callers embedded @p into a larger structure it must be allocated with * kmalloc and @p must be the first member element. * * Callers which already hold &drm_device.event_lock should use * drm_event_reserve_init_locked() instead. * * RETURNS: * * 0 on success or a negative error code on failure. */ int drm_event_reserve_init(struct drm_device *dev, struct drm_file *file_priv, struct drm_pending_event *p, struct drm_event *e) { unsigned long flags; int ret; spin_lock_irqsave(&dev->event_lock, flags); ret = drm_event_reserve_init_locked(dev, file_priv, p, e); spin_unlock_irqrestore(&dev->event_lock, flags); return ret; } EXPORT_SYMBOL(drm_event_reserve_init); /** * drm_event_cancel_free - free a DRM event and release it's space * @dev: DRM device * @p: tracking structure for the pending event * * This function frees the event @p initialized with drm_event_reserve_init() * and releases any allocated space. It is used to cancel an event when the * nonblocking operation could not be submitted and needed to be aborted. */ void drm_event_cancel_free(struct drm_device *dev, struct drm_pending_event *p) { unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); if (p->file_priv) { p->file_priv->event_space += p->event->length; list_del(&p->pending_link); } spin_unlock_irqrestore(&dev->event_lock, flags); if (p->fence) dma_fence_put(p->fence); kfree(p); } EXPORT_SYMBOL(drm_event_cancel_free); /** * drm_send_event_locked - send DRM event to file descriptor * @dev: DRM device * @e: DRM event to deliver * * This function sends the event @e, initialized with drm_event_reserve_init(), * to its associated userspace DRM file. Callers must already hold * &drm_device.event_lock, see drm_send_event() for the unlocked version. * * Note that the core will take care of unlinking and disarming events when the * corresponding DRM file is closed. Drivers need not worry about whether the * DRM file for this event still exists and can call this function upon * completion of the asynchronous work unconditionally. */ void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e) { assert_spin_locked(&dev->event_lock); if (e->completion) { complete_all(e->completion); e->completion_release(e->completion); e->completion = NULL; } if (e->fence) { dma_fence_signal(e->fence); dma_fence_put(e->fence); } if (!e->file_priv) { kfree(e); return; } list_del(&e->pending_link); list_add_tail(&e->link, &e->file_priv->event_list); wake_up_interruptible(&e->file_priv->event_wait); } EXPORT_SYMBOL(drm_send_event_locked); /** * drm_send_event - send DRM event to file descriptor * @dev: DRM device * @e: DRM event to deliver * * This function sends the event @e, initialized with drm_event_reserve_init(), * to its associated userspace DRM file. This function acquires * &drm_device.event_lock, see drm_send_event_locked() for callers which already * hold this lock. * * Note that the core will take care of unlinking and disarming events when the * corresponding DRM file is closed. Drivers need not worry about whether the * DRM file for this event still exists and can call this function upon * completion of the asynchronous work unconditionally. */ void drm_send_event(struct drm_device *dev, struct drm_pending_event *e) { unsigned long irqflags; spin_lock_irqsave(&dev->event_lock, irqflags); drm_send_event_locked(dev, e); spin_unlock_irqrestore(&dev->event_lock, irqflags); } EXPORT_SYMBOL(drm_send_event);
20 20 20 20 20 20 20 20 20 20 20 50 51 50 3 47 1 46 46 51 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 /* * IPV4 GSO/GRO offload support * Linux INET implementation * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * UDPv4 GSO support */ #include <linux/skbuff.h> #include <net/udp.h> #include <net/protocol.h> static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, netdev_features_t features), __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); bool remcsum, need_csum, offload_csum, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int udp_offset, outer_hlen; __wsum partial; bool need_ipsec; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; /* Adjust partial header checksum to negate old length. * We cannot rely on the value contained in uh->len as it is * possible that the actual value exceeds the boundaries of the * 16 bit length field due to the header being added outside of an * IP or IPv6 frame that was already limited to 64K - 1. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) partial = (__force __wsum)uh->len; else partial = (__force __wsum)htonl(skb->len); partial = csum_sub(csum_unfold(uh->check), partial); /* setup inner skb. */ skb->encapsulation = 0; SKB_GSO_CB(skb)->encap_level = 0; __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); skb->protocol = new_protocol; need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); skb->encap_hdr_csum = need_csum; remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && !need_ipsec && (skb->dev->features & (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); features &= skb->dev->hw_enc_features; /* The only checksum offload we care about from here on out is the * outer one so strip the existing checksum feature flags and * instead set the flag based on our outer checksum offload value. */ if (remcsum) { features &= ~NETIF_F_CSUM_MASK; if (!need_csum || offload_csum) features |= NETIF_F_HW_CSUM; } /* segment inner packet. */ segs = gso_inner_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, mac_len); goto out; } gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); outer_hlen = skb_tnl_header_len(skb); udp_offset = outer_hlen - tnl_hlen; skb = segs; do { unsigned int len; if (remcsum) skb->ip_summed = CHECKSUM_NONE; /* Set up inner headers if we are offloading inner checksum */ if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_reset_inner_headers(skb); skb->encapsulation = 1; } skb->mac_len = mac_len; skb->protocol = protocol; __skb_push(skb, outer_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); len = skb->len - udp_offset; uh = udp_hdr(skb); /* If we are only performing partial GSO the inner header * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ if (gso_partial && skb_is_gso(skb)) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); } else { uh->len = htons(len); } if (!need_csum) continue; uh->check = ~csum_fold(csum_add(partial, (__force __wsum)htonl(len))); if (skb->encapsulation || !offload_csum) { uh->check = gso_make_checksum(skb, ~uh->check); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } else { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); } } while ((skb = skb->next)); out: return segs; } struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6) { __be16 protocol = skb->protocol; const struct net_offload **offloads; const struct net_offload *ops; struct sk_buff *segs = ERR_PTR(-EINVAL); struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, netdev_features_t features); rcu_read_lock(); switch (skb->inner_protocol_type) { case ENCAP_TYPE_ETHER: protocol = skb->inner_protocol; gso_inner_segment = skb_mac_gso_segment; break; case ENCAP_TYPE_IPPROTO: offloads = is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[skb->inner_ipproto]); if (!ops || !ops->callbacks.gso_segment) goto out_unlock; gso_inner_segment = ops->callbacks.gso_segment; break; default: goto out_unlock; } segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment, protocol, is_ipv6); out_unlock: rcu_read_unlock(); return segs; } EXPORT_SYMBOL(skb_udp_tunnel_segment); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features) { struct sock *sk = gso_skb->sk; unsigned int sum_truesize = 0; struct sk_buff *segs, *seg; struct udphdr *uh; unsigned int mss; bool copy_dtor; __sum16 check; __be16 newlen; mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) return ERR_PTR(-EINVAL); skb_pull(gso_skb, sizeof(*uh)); /* clear destructor to avoid skb_segment assigning it to tail */ copy_dtor = gso_skb->destructor == sock_wfree; if (copy_dtor) gso_skb->destructor = NULL; segs = skb_segment(gso_skb, features); if (unlikely(IS_ERR_OR_NULL(segs))) { if (copy_dtor) gso_skb->destructor = sock_wfree; return segs; } /* GSO partial and frag_list segmentation only requires splitting * the frame into an MSS multiple and possibly a remainder, both * cases return a GSO skb. So update the mss now. */ if (skb_is_gso(segs)) mss *= skb_shinfo(segs)->gso_segs; seg = segs; uh = udp_hdr(seg); /* preserve TX timestamp flags and TS key for first segment */ skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey; skb_shinfo(seg)->tx_flags |= (skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP); /* compute checksum adjustment based on old length versus new */ newlen = htons(sizeof(*uh) + mss); check = csum16_add(csum16_sub(uh->check, uh->len), newlen); for (;;) { if (copy_dtor) { seg->destructor = sock_wfree; seg->sk = sk; sum_truesize += seg->truesize; } if (!seg->next) break; uh->len = newlen; uh->check = check; if (seg->ip_summed == CHECKSUM_PARTIAL) gso_reset_checksum(seg, ~check); else uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0; seg = seg->next; uh = udp_hdr(seg); } /* last packet can be partial gso_size, account for that in checksum */ newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) + seg->data_len); check = csum16_add(csum16_sub(uh->check, uh->len), newlen); uh->len = newlen; uh->check = check; if (seg->ip_summed == CHECKSUM_PARTIAL) gso_reset_checksum(seg, ~check); else uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0; /* update refcount for the packet */ if (copy_dtor) { int delta = sum_truesize - gso_skb->truesize; /* In some pathological cases, delta can be negative. * We need to either use refcount_add() or refcount_sub_and_test() */ if (likely(delta >= 0)) refcount_add(delta, &sk->sk_wmem_alloc); else WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); } return segs; } EXPORT_SYMBOL_GPL(__udp_gso_segment); static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; __wsum csum; struct udphdr *uh; struct iphdr *iph; if (skb->encapsulation && (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { segs = skb_udp_tunnel_segment(skb, features, false); goto out; } if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4))) goto out; if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) return __udp_gso_segment(skb, features); mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) goto out; /* Do software UFO. Complete and fill in the UDP checksum as * HW cannot do checksum of UDP packets sent as multiple * IP fragments. */ uh = udp_hdr(skb); iph = ip_hdr(skb); uh->check = 0; csum = skb_checksum(skb, 0, skb->len, 0); uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in * software prior to segmenting the frame. */ if (!skb->encap_hdr_csum) features |= NETIF_F_HW_CSUM; /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ segs = skb_segment(skb, features); out: return segs; } struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup) { struct sk_buff *pp = NULL; struct sk_buff *p; struct udphdr *uh2; unsigned int off = skb_gro_offset(skb); int flush = 1; struct sock *sk; if (NAPI_GRO_CB(skb)->encap_mark || (uh->check && skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the tunnel gro layer */ NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); sk = (*lookup)(skb, uh->source, uh->dest); if (sk && udp_sk(sk)->gro_receive) goto unflush; goto out_unlock; unflush: flush = 0; list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; uh2 = (struct udphdr *)(p->data + off); /* Match ports and either checksums are either both zero * or nonzero. */ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || (!uh->check ^ !uh2->check)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); out_unlock: rcu_read_unlock(); out: skb_gro_flush_final(skb, pp, flush); return pp; } EXPORT_SYMBOL(udp_gro_receive); static struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); if (unlikely(!uh)) goto flush; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip; if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, inet_gro_compute_pseudo)) goto flush; else if (uh->check) skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, inet_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb); flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; } int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup) { __be16 newlen = htons(skb->len - nhoff); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); int err = -ENOSYS; struct sock *sk; uh->len = newlen; /* Set encapsulation before calling into inner gro_complete() functions * to make them set up the inner offsets. */ skb->encapsulation = 1; rcu_read_lock(); sk = (*lookup)(skb, uh->source, uh->dest); if (sk && udp_sk(sk)->gro_complete) err = udp_sk(sk)->gro_complete(sk, skb, nhoff + sizeof(struct udphdr)); rcu_read_unlock(); if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; return err; } EXPORT_SYMBOL(udp_gro_complete); static int udp4_gro_complete(struct sk_buff *skb, int nhoff) { const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); if (uh->check) { skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, iph->daddr, 0); } else { skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } static const struct net_offload udpv4_offload = { .callbacks = { .gso_segment = udp4_ufo_fragment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, }; int __init udpv4_offload_init(void) { return inet_add_offload(&udpv4_offload, IPPROTO_UDP); }
25 24 21 23 25 25 25 24 24 20 6 21 21 11 21 11 24 22 1 25 25 25 6 25 2 22 22 22 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 // SPDX-License-Identifier: GPL-2.0+ /* * mdt.c - meta data file for NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. */ #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/mm.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/swap.h> #include <linux/slab.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" #include "alloc.h" /* nilfs_palloc_destroy_cache() */ #include <trace/events/nilfs2.h> #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) static int nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, struct buffer_head *bh, void (*init_block)(struct inode *, struct buffer_head *, void *)) { struct nilfs_inode_info *ii = NILFS_I(inode); void *kaddr; int ret; /* Caller exclude read accesses using page lock */ /* set_buffer_new(bh); */ bh->b_blocknr = 0; ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh); if (unlikely(ret)) return ret; set_buffer_mapped(bh); kaddr = kmap_atomic(bh->b_page); memset(kaddr + bh_offset(bh), 0, i_blocksize(inode)); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); kunmap_atomic(kaddr); set_buffer_uptodate(bh); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block); return 0; } static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, struct buffer_head **out_bh, void (*init_block)(struct inode *, struct buffer_head *, void *)) { struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; nilfs_transaction_begin(sb, &ti, 0); err = -ENOMEM; bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0); if (unlikely(!bh)) goto failed_unlock; err = -EEXIST; if (buffer_uptodate(bh)) goto failed_bh; wait_on_buffer(bh); if (buffer_uptodate(bh)) goto failed_bh; bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); *out_bh = bh; } failed_bh: unlock_page(bh->b_page); put_page(bh->b_page); brelse(bh); failed_unlock: if (likely(!err)) err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); return err; } static int nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, int mode, int mode_flags, struct buffer_head **out_bh) { struct buffer_head *bh; __u64 blknum = 0; int ret = -ENOMEM; bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); if (unlikely(!bh)) goto failed; ret = -EEXIST; /* internal code */ if (buffer_uptodate(bh)) goto out; if (mode_flags & REQ_RAHEAD) { if (!trylock_buffer(bh)) { ret = -EBUSY; goto failed_bh; } } else /* mode == READ */ lock_buffer(bh); if (buffer_uptodate(bh)) { unlock_buffer(bh); goto out; } ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum); if (unlikely(ret)) { unlock_buffer(bh); goto failed_bh; } map_bh(bh, inode->i_sb, (sector_t)blknum); bh->b_end_io = end_buffer_read_sync; get_bh(bh); submit_bh(mode, mode_flags, bh); ret = 0; trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); out: get_bh(bh); *out_bh = bh; failed_bh: unlock_page(bh->b_page); put_page(bh->b_page); brelse(bh); failed: return ret; } static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, int readahead, struct buffer_head **out_bh) { struct buffer_head *first_bh, *bh; unsigned long blkoff; int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; int err; err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, 0, &first_bh); if (err == -EEXIST) /* internal code */ goto out; if (unlikely(err)) goto failed; if (readahead) { blkoff = block + 1; for (i = 0; i < nr_ra_blocks; i++, blkoff++) { err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ, REQ_RAHEAD, &bh); if (likely(!err || err == -EEXIST)) brelse(bh); else if (err != -EBUSY) break; /* abort readahead if bmap lookup failed */ if (!buffer_locked(first_bh)) goto out_no_wait; } } wait_on_buffer(first_bh); out_no_wait: err = -EIO; if (!buffer_uptodate(first_bh)) { nilfs_msg(inode->i_sb, KERN_ERR, "I/O error reading meta-data file (ino=%lu, block-offset=%lu)", inode->i_ino, block); goto failed_bh; } out: *out_bh = first_bh; return 0; failed_bh: brelse(first_bh); failed: return err; } /** * nilfs_mdt_get_block - read or create a buffer on meta data file. * @inode: inode of the meta data file * @blkoff: block offset * @create: create flag * @init_block: initializer used for newly allocated block * @out_bh: output of a pointer to the buffer_head * * nilfs_mdt_get_block() looks up the specified buffer and tries to create * a new buffer if @create is not zero. On success, the returned buffer is * assured to be either existing or formatted using a buffer lock on success. * @out_bh is substituted only when zero is returned. * * Return Value: On success, it returns 0. On error, the following negative * error code is returned. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error * * %-ENOENT - the specified block does not exist (hole block) * * %-EROFS - Read only filesystem (for create mode) */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, void (*init_block)(struct inode *, struct buffer_head *, void *), struct buffer_head **out_bh) { int ret; /* Should be rewritten with merging nilfs_mdt_read_block() */ retry: ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh); if (!create || ret != -ENOENT) return ret; ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block); if (unlikely(ret == -EEXIST)) { /* create = 0; */ /* limit read-create loop retries */ goto retry; } return ret; } /** * nilfs_mdt_find_block - find and get a buffer on meta data file. * @inode: inode of the meta data file * @start: start block offset (inclusive) * @end: end block offset (inclusive) * @blkoff: block offset * @out_bh: place to store a pointer to buffer_head struct * * nilfs_mdt_find_block() looks up an existing block in range of * [@start, @end] and stores pointer to a buffer head of the block to * @out_bh, and block offset to @blkoff, respectively. @out_bh and * @blkoff are substituted only when zero is returned. * * Return Value: On success, it returns 0. On error, the following negative * error code is returned. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error * * %-ENOENT - no block was found in the range */ int nilfs_mdt_find_block(struct inode *inode, unsigned long start, unsigned long end, unsigned long *blkoff, struct buffer_head **out_bh) { __u64 next; int ret; if (unlikely(start > end)) return -ENOENT; ret = nilfs_mdt_read_block(inode, start, true, out_bh); if (!ret) { *blkoff = start; goto out; } if (unlikely(ret != -ENOENT || start == ULONG_MAX)) goto out; ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next); if (!ret) { if (next <= end) { ret = nilfs_mdt_read_block(inode, next, true, out_bh); if (!ret) *blkoff = next; } else { ret = -ENOENT; } } out: return ret; } /** * nilfs_mdt_delete_block - make a hole on the meta data file. * @inode: inode of the meta data file * @block: block offset * * Return Value: On success, zero is returned. * On error, one of the following negative error code is returned. * * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { struct nilfs_inode_info *ii = NILFS_I(inode); int err; err = nilfs_bmap_delete(ii->i_bmap, block); if (!err || err == -ENOENT) { nilfs_mdt_mark_dirty(inode); nilfs_mdt_forget_block(inode, block); } return err; } /** * nilfs_mdt_forget_block - discard dirty state and try to remove the page * @inode: inode of the meta data file * @block: block offset * * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and * tries to release the page including the buffer from a page cache. * * Return Value: On success, 0 is returned. On error, one of the following * negative error code is returned. * * %-EBUSY - page has an active buffer. * * %-ENOENT - page cache has no page addressed by the offset. */ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) { pgoff_t index = (pgoff_t)block >> (PAGE_SHIFT - inode->i_blkbits); struct page *page; unsigned long first_block; int ret = 0; int still_dirty; page = find_lock_page(inode->i_mapping, index); if (!page) return -ENOENT; wait_on_page_writeback(page); first_block = (unsigned long)index << (PAGE_SHIFT - inode->i_blkbits); if (page_has_buffers(page)) { struct buffer_head *bh; bh = nilfs_page_get_nth_block(page, block - first_block); nilfs_forget_buffer(bh); } still_dirty = PageDirty(page); unlock_page(page); put_page(page); if (still_dirty || invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) ret = -EBUSY; return ret; } int nilfs_mdt_fetch_dirty(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) { set_bit(NILFS_I_DIRTY, &ii->i_state); return 1; } return test_bit(NILFS_I_DIRTY, &ii->i_state); } static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; struct super_block *sb; int err = 0; if (inode && sb_rdonly(inode->i_sb)) { /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we * have dirty pages that try to be flushed in background. * So, here we simply discard this dirty page. */ nilfs_clear_dirty_page(page, false); unlock_page(page); return -EROFS; } redirty_page_for_writepage(wbc, page); unlock_page(page); if (!inode) return 0; sb = inode->i_sb; if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); else if (wbc->for_reclaim) nilfs_flush_segment(sb, inode->i_ino); return err; } static const struct address_space_operations def_mdt_aops = { .writepage = nilfs_mdt_write_page, }; static const struct inode_operations def_mdt_iops; static const struct file_operations def_mdt_fops; int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) { struct nilfs_mdt_info *mi; mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); if (!mi) return -ENOMEM; init_rwsem(&mi->mi_sem); inode->i_private = mi; inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, gfp_mask); inode->i_op = &def_mdt_iops; inode->i_fop = &def_mdt_fops; inode->i_mapping->a_ops = &def_mdt_aops; return 0; } /** * nilfs_mdt_clear - do cleanup for the metadata file * @inode: inode of the metadata file */ void nilfs_mdt_clear(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mdi->mi_shadow; if (mdi->mi_palloc_cache) nilfs_palloc_destroy_cache(inode); if (shadow) { struct inode *s_inode = shadow->inode; shadow->inode = NULL; iput(s_inode); mdi->mi_shadow = NULL; } } /** * nilfs_mdt_destroy - release resources used by the metadata file * @inode: inode of the metadata file */ void nilfs_mdt_destroy(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ kfree(mdi); } void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size, unsigned int header_size) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); mi->mi_entry_size = entry_size; mi->mi_entries_per_block = i_blocksize(inode) / entry_size; mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } /** * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file * @inode: inode of the metadata file * @shadow: shadow mapping */ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct inode *s_inode; INIT_LIST_HEAD(&shadow->frozen_buffers); s_inode = nilfs_iget_for_shadow(inode); if (IS_ERR(s_inode)) return PTR_ERR(s_inode); shadow->inode = s_inode; mi->mi_shadow = shadow; return 0; } /** * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map * @inode: inode of the metadata file */ int nilfs_mdt_save_to_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; struct inode *s_inode = shadow->inode; int ret; ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); if (ret) goto out; ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, ii->i_assoc_inode->i_mapping); if (ret) goto out; nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store); out: return ret; } int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen; struct page *page; int blkbits = inode->i_blkbits; page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); if (!page) return -ENOMEM; if (!page_has_buffers(page)) create_empty_buffers(page, 1 << blkbits, 0); bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); if (!buffer_uptodate(bh_frozen)) nilfs_copy_buffer(bh_frozen, bh); if (list_empty(&bh_frozen->b_assoc_buffers)) { list_add_tail(&bh_frozen->b_assoc_buffers, &shadow->frozen_buffers); set_buffer_nilfs_redirected(bh); } else { brelse(bh_frozen); /* already frozen */ } unlock_page(page); put_page(page); return 0; } struct buffer_head * nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen = NULL; struct page *page; int n; page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); if (page) { if (page_has_buffers(page)) { n = bh_offset(bh) >> inode->i_blkbits; bh_frozen = nilfs_page_get_nth_block(page, n); } unlock_page(page); put_page(page); } return bh_frozen; } static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow) { struct list_head *head = &shadow->frozen_buffers; struct buffer_head *bh; while (!list_empty(head)) { bh = list_first_entry(head, struct buffer_head, b_assoc_buffers); list_del_init(&bh->b_assoc_buffers); brelse(bh); /* drop ref-count to make it releasable */ } } /** * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state * @inode: inode of the metadata file */ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; down_write(&mi->mi_sem); if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); nilfs_clear_dirty_pages(inode->i_mapping, true); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); up_write(&mi->mi_sem); } /** * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches * @inode: inode of the metadata file */ void nilfs_mdt_clear_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; down_write(&mi->mi_sem); nilfs_release_frozen_buffers(shadow); truncate_inode_pages(shadow->inode->i_mapping, 0); truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); up_write(&mi->mi_sem); }
30 49 49 126 337 533 158 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/ipc/util.h * Copyright (C) 1999 Christoph Rohland * * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com> * namespaces support. 2006 OpenVZ, SWsoft Inc. * Pavel Emelianov <xemul@openvz.org> */ #ifndef _IPC_UTIL_H #define _IPC_UTIL_H #include <linux/unistd.h> #include <linux/err.h> #include <linux/ipc_namespace.h> #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ #define SEQ_MULTIPLIER (IPCMNI) void sem_init(void); void msg_init(void); void shm_init(void); struct ipc_namespace; struct pid_namespace; #ifdef CONFIG_POSIX_MQUEUE extern void mq_clear_sbinfo(struct ipc_namespace *ns); extern void mq_put_mnt(struct ipc_namespace *ns); #else static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { } static inline void mq_put_mnt(struct ipc_namespace *ns) { } #endif #ifdef CONFIG_SYSVIPC void sem_init_ns(struct ipc_namespace *ns); void msg_init_ns(struct ipc_namespace *ns); void shm_init_ns(struct ipc_namespace *ns); void sem_exit_ns(struct ipc_namespace *ns); void msg_exit_ns(struct ipc_namespace *ns); void shm_exit_ns(struct ipc_namespace *ns); #else static inline void sem_init_ns(struct ipc_namespace *ns) { } static inline void msg_init_ns(struct ipc_namespace *ns) { } static inline void shm_init_ns(struct ipc_namespace *ns) { } static inline void sem_exit_ns(struct ipc_namespace *ns) { } static inline void msg_exit_ns(struct ipc_namespace *ns) { } static inline void shm_exit_ns(struct ipc_namespace *ns) { } #endif /* * Structure that holds the parameters needed by the ipc operations * (see after) */ struct ipc_params { key_t key; int flg; union { size_t size; /* for shared memories */ int nsems; /* for semaphores */ } u; /* holds the getnew() specific param */ }; /* * Structure that holds some ipc operations. This structure is used to unify * the calls to sys_msgget(), sys_semget(), sys_shmget() * . routine to call to create a new ipc object. Can be one of newque, * newary, newseg * . routine to call to check permissions for a new ipc object. * Can be one of security_msg_associate, security_sem_associate, * security_shm_associate * . routine to call for an extra check if needed */ struct ipc_ops { int (*getnew)(struct ipc_namespace *, struct ipc_params *); int (*associate)(struct kern_ipc_perm *, int); int (*more_checks)(struct kern_ipc_perm *, struct ipc_params *); }; struct seq_file; struct ipc_ids; void ipc_init_ids(struct ipc_ids *ids); #ifdef CONFIG_PROC_FS void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)); struct pid_namespace *ipc_seq_pid_ns(struct seq_file *); #else #define ipc_init_proc_interface(path, header, ids, show) do {} while (0) #endif #define IPC_SEM_IDS 0 #define IPC_MSG_IDS 1 #define IPC_SHM_IDS 2 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) #define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX) /* must be called with ids->rwsem acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); /* must be called with both locks acquired. */ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); /* must be called with both locks acquired. */ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *); /* must be called with ipcp locked */ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); /** * ipc_get_maxidx - get the highest assigned index * @ids: ipc identifier set * * Called with ipc_ids.rwsem held for reading. */ static inline int ipc_get_maxidx(struct ipc_ids *ids) { if (ids->in_use == 0) return -1; if (ids->in_use == IPCMNI) return IPCMNI - 1; return ids->max_idx; } /* * For allocation that need to be freed by RCU. * Objects are reference counted, they start with reference count 1. * getref increases the refcount, the putref call that reduces the recount * to 0 schedules the rcu destruction. Caller must guarantee locking. * * refcount is initialized by ipc_addid(), before that point call_rcu() * must be used. */ bool ipc_rcu_getref(struct kern_ipc_perm *ptr); void ipc_rcu_putref(struct kern_ipc_perm *ptr, void (*func)(struct rcu_head *head)); struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm); static inline void ipc_update_pid(struct pid **pos, struct pid *pid) { struct pid *old = *pos; if (old != pid) { *pos = get_pid(pid); put_pid(old); } } #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION /* On IA-64, we always use the "64-bit version" of the IPC structures. */ # define ipc_parse_version(cmd) IPC_64 #else int ipc_parse_version(int *cmd); #endif extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, size_t len); extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len); static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int id) { return ipcid_to_seqx(id) != ipcp->seq; } static inline void ipc_lock_object(struct kern_ipc_perm *perm) { spin_lock(&perm->lock); } static inline void ipc_unlock_object(struct kern_ipc_perm *perm) { spin_unlock(&perm->lock); } static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) { assert_spin_locked(&perm->lock); } static inline void ipc_unlock(struct kern_ipc_perm *perm) { ipc_unlock_object(perm); rcu_read_unlock(); } /* * ipc_valid_object() - helper to sort out IPC_RMID races for codepaths * where the respective ipc_ids.rwsem is not being held down. * Checks whether the ipc object is still around or if it's gone already, as * ipc_rmid() may have already freed the ID while the ipc lock was spinning. * Needs to be called with kern_ipc_perm.lock held -- exception made for one * checkpoint case at sys_semtimedop() as noted in code commentary. */ static inline bool ipc_valid_object(struct kern_ipc_perm *perm) { return !perm->deleted; } struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, const struct ipc_ops *ops, struct ipc_params *params); void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, void (*free)(struct ipc_namespace *, struct kern_ipc_perm *)); #ifdef CONFIG_COMPAT #include <linux/compat.h> struct compat_ipc_perm { key_t key; __compat_uid_t uid; __compat_gid_t gid; __compat_uid_t cuid; __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; }; void to_compat_ipc_perm(struct compat_ipc_perm *, struct ipc64_perm *); void to_compat_ipc64_perm(struct compat_ipc64_perm *, struct ipc64_perm *); int get_compat_ipc_perm(struct ipc64_perm *, struct compat_ipc_perm __user *); int get_compat_ipc64_perm(struct ipc64_perm *, struct compat_ipc64_perm __user *); static inline int compat_ipc_parse_version(int *cmd) { #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION int version = *cmd & IPC_64; *cmd &= ~IPC_64; return version; #else return IPC_64; #endif } #endif /* for __ARCH_WANT_SYS_IPC */ long ksys_semtimedop(int semid, struct sembuf __user *tsops, unsigned int nsops, const struct __kernel_timespec __user *timeout); long ksys_semget(key_t key, int nsems, int semflg); long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct compat_timespec __user *timeout); #ifdef CONFIG_COMPAT long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr); long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr); #endif /* CONFIG_COMPAT */ #endif
2 6 1 1 1 1 6 1 1 6 6 6 6 7 6 6 6 6 6 5 6 2 2 1 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 /* * slcan.c - serial line CAN interface driver (using tty line discipline) * * This file is derived from linux/drivers/net/slip/slip.c * * slip.c Authors : Laurence Culhane <loz@holmes.demon.co.uk> * Fred N. van Kempen <waltje@uwalt.nl.mugnet.org> * slcan.c Author : Oliver Hartkopp <socketcan@hartkopp.net> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, see http://www.gnu.org/licenses/gpl.html * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/string.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/sched.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/skb.h> MODULE_ALIAS_LDISC(N_SLCAN); MODULE_DESCRIPTION("serial line CAN interface"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>"); #define SLCAN_MAGIC 0x53CA static int maxdev = 10; /* MAX number of SLCAN channels; This can be overridden with insmod slcan.ko maxdev=nnn */ module_param(maxdev, int, 0); MODULE_PARM_DESC(maxdev, "Maximum number of slcan interfaces"); /* maximum rx buffer len: extended CAN frame with timestamp */ #define SLC_MTU (sizeof("T1111222281122334455667788EA5F\r")+1) #define SLC_CMD_LEN 1 #define SLC_SFF_ID_LEN 3 #define SLC_EFF_ID_LEN 8 struct slcan { int magic; /* Various fields. */ struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ spinlock_t lock; struct work_struct tx_work; /* Flushes transmit buffer */ /* These are pointers to the malloc()ed frame buffers. */ unsigned char rbuff[SLC_MTU]; /* receiver buffer */ int rcount; /* received chars counter */ unsigned char xbuff[SLC_MTU]; /* transmitter buffer */ unsigned char *xhead; /* pointer to next XMIT byte */ int xleft; /* bytes left in XMIT queue */ unsigned long flags; /* Flag values/ mode etc */ #define SLF_INUSE 0 /* Channel in use */ #define SLF_ERROR 1 /* Parity, etc. error */ }; static struct net_device **slcan_devs; /************************************************************************ * SLCAN ENCAPSULATION FORMAT * ************************************************************************/ /* * A CAN frame has a can_id (11 bit standard frame format OR 29 bit extended * frame format) a data length code (can_dlc) which can be from 0 to 8 * and up to <can_dlc> data bytes as payload. * Additionally a CAN frame may become a remote transmission frame if the * RTR-bit is set. This causes another ECU to send a CAN frame with the * given can_id. * * The SLCAN ASCII representation of these different frame types is: * <type> <id> <dlc> <data>* * * Extended frames (29 bit) are defined by capital characters in the type. * RTR frames are defined as 'r' types - normal frames have 't' type: * t => 11 bit data frame * r => 11 bit RTR frame * T => 29 bit data frame * R => 29 bit RTR frame * * The <id> is 3 (standard) or 8 (extended) bytes in ASCII Hex (base64). * The <dlc> is a one byte ASCII number ('0' - '8') * The <data> section has at much ASCII Hex bytes as defined by the <dlc> * * Examples: * * t1230 : can_id 0x123, can_dlc 0, no data * t4563112233 : can_id 0x456, can_dlc 3, data 0x11 0x22 0x33 * T12ABCDEF2AA55 : extended can_id 0x12ABCDEF, can_dlc 2, data 0xAA 0x55 * r1230 : can_id 0x123, can_dlc 0, no data, remote transmission request * */ /************************************************************************ * STANDARD SLCAN DECAPSULATION * ************************************************************************/ /* Send one completely decapsulated can_frame to the network layer */ static void slc_bump(struct slcan *sl) { struct sk_buff *skb; struct can_frame cf; int i, tmp; u32 tmpid; char *cmd = sl->rbuff; memset(&cf, 0, sizeof(cf)); switch (*cmd) { case 'r': cf.can_id = CAN_RTR_FLAG; /* fallthrough */ case 't': /* store dlc ASCII value and terminate SFF CAN ID string */ cf.can_dlc = sl->rbuff[SLC_CMD_LEN + SLC_SFF_ID_LEN]; sl->rbuff[SLC_CMD_LEN + SLC_SFF_ID_LEN] = 0; /* point to payload data behind the dlc */ cmd += SLC_CMD_LEN + SLC_SFF_ID_LEN + 1; break; case 'R': cf.can_id = CAN_RTR_FLAG; /* fallthrough */ case 'T': cf.can_id |= CAN_EFF_FLAG; /* store dlc ASCII value and terminate EFF CAN ID string */ cf.can_dlc = sl->rbuff[SLC_CMD_LEN + SLC_EFF_ID_LEN]; sl->rbuff[SLC_CMD_LEN + SLC_EFF_ID_LEN] = 0; /* point to payload data behind the dlc */ cmd += SLC_CMD_LEN + SLC_EFF_ID_LEN + 1; break; default: return; } if (kstrtou32(sl->rbuff + SLC_CMD_LEN, 16, &tmpid)) return; cf.can_id |= tmpid; /* get can_dlc from sanitized ASCII value */ if (cf.can_dlc >= '0' && cf.can_dlc < '9') cf.can_dlc -= '0'; else return; /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf.can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf.can_dlc; i++) { tmp = hex_to_bin(*cmd++); if (tmp < 0) return; cf.data[i] = (tmp << 4); tmp = hex_to_bin(*cmd++); if (tmp < 0) return; cf.data[i] |= tmp; } } skb = dev_alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv)); if (!skb) return; skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = sl->dev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb_put_data(skb, &cf, sizeof(struct can_frame)); sl->dev->stats.rx_packets++; sl->dev->stats.rx_bytes += cf.can_dlc; netif_rx_ni(skb); } /* parse tty input stream */ static void slcan_unesc(struct slcan *sl, unsigned char s) { if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */ if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && (sl->rcount > 4)) { slc_bump(sl); } sl->rcount = 0; } else { if (!test_bit(SLF_ERROR, &sl->flags)) { if (sl->rcount < SLC_MTU) { sl->rbuff[sl->rcount++] = s; return; } else { sl->dev->stats.rx_over_errors++; set_bit(SLF_ERROR, &sl->flags); } } } } /************************************************************************ * STANDARD SLCAN ENCAPSULATION * ************************************************************************/ /* Encapsulate one can_frame and stuff into a TTY queue. */ static void slc_encaps(struct slcan *sl, struct can_frame *cf) { int actual, i; unsigned char *pos; unsigned char *endpos; canid_t id = cf->can_id; pos = sl->xbuff; if (cf->can_id & CAN_RTR_FLAG) *pos = 'R'; /* becomes 'r' in standard frame format (SFF) */ else *pos = 'T'; /* becomes 't' in standard frame format (SSF) */ /* determine number of chars for the CAN-identifier */ if (cf->can_id & CAN_EFF_FLAG) { id &= CAN_EFF_MASK; endpos = pos + SLC_EFF_ID_LEN; } else { *pos |= 0x20; /* convert R/T to lower case for SFF */ id &= CAN_SFF_MASK; endpos = pos + SLC_SFF_ID_LEN; } /* build 3 (SFF) or 8 (EFF) digit CAN identifier */ pos++; while (endpos >= pos) { *endpos-- = hex_asc_upper[id & 0xf]; id >>= 4; } pos += (cf->can_id & CAN_EFF_FLAG) ? SLC_EFF_ID_LEN : SLC_SFF_ID_LEN; *pos++ = cf->can_dlc + '0'; /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf->can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf->can_dlc; i++) pos = hex_byte_pack_upper(pos, cf->data[i]); } *pos++ = '\r'; /* Order of next two lines is *very* important. * When we are sending a little amount of data, * the transfer may be completed inside the ops->write() * routine, because it's running with interrupts enabled. * In this case we *never* got WRITE_WAKEUP event, * if we did not request it before write operation. * 14 Oct 1994 Dmitry Gorodchanin. */ set_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); actual = sl->tty->ops->write(sl->tty, sl->xbuff, pos - sl->xbuff); sl->xleft = (pos - sl->xbuff) - actual; sl->xhead = sl->xbuff + actual; sl->dev->stats.tx_bytes += cf->can_dlc; } /* Write out any remaining transmit buffer. Scheduled when tty is writable */ static void slcan_transmit(struct work_struct *work) { struct slcan *sl = container_of(work, struct slcan, tx_work); int actual; spin_lock_bh(&sl->lock); /* First make sure we're connected. */ if (!sl->tty || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) { spin_unlock_bh(&sl->lock); return; } if (sl->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->dev->stats.tx_packets++; clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); spin_unlock_bh(&sl->lock); netif_wake_queue(sl->dev); return; } actual = sl->tty->ops->write(sl->tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; spin_unlock_bh(&sl->lock); } /* * Called by the driver when there's room for more data. * Schedule the transmit. */ static void slcan_write_wakeup(struct tty_struct *tty) { struct slcan *sl; rcu_read_lock(); sl = rcu_dereference(tty->disc_data); if (!sl) goto out; schedule_work(&sl->tx_work); out: rcu_read_unlock(); } /* Send a can_frame to a TTY queue. */ static netdev_tx_t slc_xmit(struct sk_buff *skb, struct net_device *dev) { struct slcan *sl = netdev_priv(dev); if (skb->len != CAN_MTU) goto out; spin_lock(&sl->lock); if (!netif_running(dev)) { spin_unlock(&sl->lock); printk(KERN_WARNING "%s: xmit: iface is down\n", dev->name); goto out; } if (sl->tty == NULL) { spin_unlock(&sl->lock); goto out; } netif_stop_queue(sl->dev); slc_encaps(sl, (struct can_frame *) skb->data); /* encaps & send */ spin_unlock(&sl->lock); out: kfree_skb(skb); return NETDEV_TX_OK; } /****************************************** * Routines looking at netdevice side. ******************************************/ /* Netdevice UP -> DOWN routine */ static int slc_close(struct net_device *dev) { struct slcan *sl = netdev_priv(dev); spin_lock_bh(&sl->lock); if (sl->tty) { /* TTY discipline is running. */ clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); } netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; spin_unlock_bh(&sl->lock); return 0; } /* Netdevice DOWN -> UP routine */ static int slc_open(struct net_device *dev) { struct slcan *sl = netdev_priv(dev); if (sl->tty == NULL) return -ENODEV; sl->flags &= (1 << SLF_INUSE); netif_start_queue(dev); return 0; } /* Hook the destructor so we can free slcan devs at the right point in time */ static void slc_free_netdev(struct net_device *dev) { int i = dev->base_addr; slcan_devs[i] = NULL; } static int slcan_change_mtu(struct net_device *dev, int new_mtu) { return -EINVAL; } static const struct net_device_ops slc_netdev_ops = { .ndo_open = slc_open, .ndo_stop = slc_close, .ndo_start_xmit = slc_xmit, .ndo_change_mtu = slcan_change_mtu, }; static void slc_setup(struct net_device *dev) { dev->netdev_ops = &slc_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = slc_free_netdev; dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 10; dev->mtu = CAN_MTU; dev->type = ARPHRD_CAN; /* New-style flags. */ dev->flags = IFF_NOARP; dev->features = NETIF_F_HW_CSUM; } /****************************************** Routines looking at TTY side. ******************************************/ /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when * a block of SLCAN data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. This will not * be re-entered while running but other ldisc functions may be called * in parallel */ static void slcan_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct slcan *sl = (struct slcan *) tty->disc_data; if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) return; /* Read the characters out of the buffer */ while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; cp++; continue; } slcan_unesc(sl, *cp++); } } /************************************ * slcan_open helper routines. ************************************/ /* Collect hanged up channels */ static void slc_sync(void) { int i; struct net_device *dev; struct slcan *sl; for (i = 0; i < maxdev; i++) { dev = slcan_devs[i]; if (dev == NULL) break; sl = netdev_priv(dev); if (sl->tty) continue; if (dev->flags & IFF_UP) dev_close(dev); } } /* Find a free SLCAN channel, and link in this `tty' line. */ static struct slcan *slc_alloc(void) { int i; char name[IFNAMSIZ]; struct net_device *dev = NULL; struct slcan *sl; for (i = 0; i < maxdev; i++) { dev = slcan_devs[i]; if (dev == NULL) break; } /* Sorry, too many, all slots in use */ if (i >= maxdev) return NULL; sprintf(name, "slcan%d", i); dev = alloc_netdev(sizeof(*sl), name, NET_NAME_UNKNOWN, slc_setup); if (!dev) return NULL; dev->base_addr = i; sl = netdev_priv(dev); /* Initialize channel control data */ sl->magic = SLCAN_MAGIC; sl->dev = dev; spin_lock_init(&sl->lock); INIT_WORK(&sl->tx_work, slcan_transmit); slcan_devs[i] = dev; return sl; } /* * Open the high-level part of the SLCAN channel. * This function is called by the TTY module when the * SLCAN line discipline is called for. Because we are * sure the tty line exists, we only have to link it to * a free SLCAN channel... * * Called in process context serialized from other ldisc calls. */ static int slcan_open(struct tty_struct *tty) { struct slcan *sl; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EOPNOTSUPP; /* RTnetlink lock is misused here to serialize concurrent opens of slcan channels. There are better ways, but it is the simplest one. */ rtnl_lock(); /* Collect hanged up channels. */ slc_sync(); sl = tty->disc_data; err = -EEXIST; /* First make sure we're not already connected. */ if (sl && sl->magic == SLCAN_MAGIC) goto err_exit; /* OK. Find a free SLCAN channel to use. */ err = -ENFILE; sl = slc_alloc(); if (sl == NULL) goto err_exit; sl->tty = tty; tty->disc_data = sl; if (!test_bit(SLF_INUSE, &sl->flags)) { /* Perform the low-level SLCAN initialization. */ sl->rcount = 0; sl->xleft = 0; set_bit(SLF_INUSE, &sl->flags); err = register_netdevice(sl->dev); if (err) goto err_free_chan; } /* Done. We have linked the TTY line to a channel. */ rtnl_unlock(); tty->receive_room = 65536; /* We don't flow control */ /* TTY layer expects 0 on success */ return 0; err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); slc_free_netdev(sl->dev); /* do not call free_netdev before rtnl_unlock */ rtnl_unlock(); free_netdev(sl->dev); return err; err_exit: rtnl_unlock(); /* Count references from TTY module */ return err; } /* * Close down a SLCAN channel. * This means flushing out any pending queues, and then returning. This * call is serialized against other ldisc functions. * * We also use this method for a hangup event. */ static void slcan_close(struct tty_struct *tty) { struct slcan *sl = (struct slcan *) tty->disc_data; /* First make sure we're connected. */ if (!sl || sl->magic != SLCAN_MAGIC || sl->tty != tty) return; spin_lock_bh(&sl->lock); rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); synchronize_rcu(); flush_work(&sl->tx_work); /* Flush network side */ unregister_netdev(sl->dev); /* This will complete via sl_free_netdev */ } static int slcan_hangup(struct tty_struct *tty) { slcan_close(tty); return 0; } /* Perform I/O control on an active SLCAN channel. */ static int slcan_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { struct slcan *sl = (struct slcan *) tty->disc_data; unsigned int tmp; /* First make sure we're connected. */ if (!sl || sl->magic != SLCAN_MAGIC) return -EINVAL; switch (cmd) { case SIOCGIFNAME: tmp = strlen(sl->dev->name) + 1; if (copy_to_user((void __user *)arg, sl->dev->name, tmp)) return -EFAULT; return 0; case SIOCSIFHWADDR: return -EINVAL; default: return tty_mode_ioctl(tty, file, cmd, arg); } } static struct tty_ldisc_ops slc_ldisc = { .owner = THIS_MODULE, .magic = TTY_LDISC_MAGIC, .name = "slcan", .open = slcan_open, .close = slcan_close, .hangup = slcan_hangup, .ioctl = slcan_ioctl, .receive_buf = slcan_receive_buf, .write_wakeup = slcan_write_wakeup, }; static int __init slcan_init(void) { int status; if (maxdev < 4) maxdev = 4; /* Sanity */ pr_info("slcan: serial line CAN interface driver\n"); pr_info("slcan: %d dynamic interface channels.\n", maxdev); slcan_devs = kcalloc(maxdev, sizeof(struct net_device *), GFP_KERNEL); if (!slcan_devs) return -ENOMEM; /* Fill in our line protocol discipline, and register it */ status = tty_register_ldisc(N_SLCAN, &slc_ldisc); if (status) { printk(KERN_ERR "slcan: can't register line discipline\n"); kfree(slcan_devs); } return status; } static void __exit slcan_exit(void) { int i; struct net_device *dev; struct slcan *sl; unsigned long timeout = jiffies + HZ; int busy = 0; if (slcan_devs == NULL) return; /* First of all: check for active disciplines and hangup them. */ do { if (busy) msleep_interruptible(100); busy = 0; for (i = 0; i < maxdev; i++) { dev = slcan_devs[i]; if (!dev) continue; sl = netdev_priv(dev); spin_lock_bh(&sl->lock); if (sl->tty) { busy++; tty_hangup(sl->tty); } spin_unlock_bh(&sl->lock); } } while (busy && time_before(jiffies, timeout)); /* FIXME: hangup is async so we should wait when doing this second phase */ for (i = 0; i < maxdev; i++) { dev = slcan_devs[i]; if (!dev) continue; slcan_devs[i] = NULL; sl = netdev_priv(dev); if (sl->tty) { printk(KERN_ERR "%s: tty discipline still running\n", dev->name); } unregister_netdev(dev); } kfree(slcan_devs); slcan_devs = NULL; i = tty_unregister_ldisc(N_SLCAN); if (i) printk(KERN_ERR "slcan: can't unregister ldisc (err %d)\n", i); } module_init(slcan_init); module_exit(slcan_exit);
4 4 9 4 3 3 2 1 3 3 4 3 2 7 7 7 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 /* * CMAC: Cipher Block Mode for Authentication * * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * Based on work by: * Copyright © 2013 Tom St Denis <tstdenis@elliptictech.com> * Based on crypto/xcbc.c: * Copyright © 2006 USAGI/WIDE Project, * Author: Kazunori Miyazawa <miyazawa@linux-ipv6.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * */ #include <crypto/internal/hash.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> /* * +------------------------ * | <parent tfm> * +------------------------ * | cmac_tfm_ctx * +------------------------ * | consts (block size * 2) * +------------------------ */ struct cmac_tfm_ctx { struct crypto_cipher *child; u8 ctx[]; }; /* * +------------------------ * | <shash desc> * +------------------------ * | cmac_desc_ctx * +------------------------ * | odds (block size) * +------------------------ * | prev (block size) * +------------------------ */ struct cmac_desc_ctx { unsigned int len; u8 ctx[]; }; static int crypto_cmac_digest_setkey(struct crypto_shash *parent, const u8 *inkey, unsigned int keylen) { unsigned long alignmask = crypto_shash_alignmask(parent); struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent); unsigned int bs = crypto_shash_blocksize(parent); __be64 *consts = PTR_ALIGN((void *)ctx->ctx, (alignmask | (__alignof__(__be64) - 1)) + 1); u64 _const[2]; int i, err = 0; u8 msb_mask, gfmask; err = crypto_cipher_setkey(ctx->child, inkey, keylen); if (err) return err; /* encrypt the zero block */ memset(consts, 0, bs); crypto_cipher_encrypt_one(ctx->child, (u8 *)consts, (u8 *)consts); switch (bs) { case 16: gfmask = 0x87; _const[0] = be64_to_cpu(consts[1]); _const[1] = be64_to_cpu(consts[0]); /* gf(2^128) multiply zero-ciphertext with u and u^2 */ for (i = 0; i < 4; i += 2) { msb_mask = ((s64)_const[1] >> 63) & gfmask; _const[1] = (_const[1] << 1) | (_const[0] >> 63); _const[0] = (_const[0] << 1) ^ msb_mask; consts[i + 0] = cpu_to_be64(_const[1]); consts[i + 1] = cpu_to_be64(_const[0]); } break; case 8: gfmask = 0x1B; _const[0] = be64_to_cpu(consts[0]); /* gf(2^64) multiply zero-ciphertext with u and u^2 */ for (i = 0; i < 2; i++) { msb_mask = ((s64)_const[0] >> 63) & gfmask; _const[0] = (_const[0] << 1) ^ msb_mask; consts[i] = cpu_to_be64(_const[0]); } break; } return 0; } static int crypto_cmac_digest_init(struct shash_desc *pdesc) { unsigned long alignmask = crypto_shash_alignmask(pdesc->tfm); struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); int bs = crypto_shash_blocksize(pdesc->tfm); u8 *prev = PTR_ALIGN((void *)ctx->ctx, alignmask + 1) + bs; ctx->len = 0; memset(prev, 0, bs); return 0; } static int crypto_cmac_digest_update(struct shash_desc *pdesc, const u8 *p, unsigned int len) { struct crypto_shash *parent = pdesc->tfm; unsigned long alignmask = crypto_shash_alignmask(parent); struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent); struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_blocksize(parent); u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); u8 *prev = odds + bs; /* checking the data can fill the block */ if ((ctx->len + len) <= bs) { memcpy(odds + ctx->len, p, len); ctx->len += len; return 0; } /* filling odds with new data and encrypting it */ memcpy(odds + ctx->len, p, bs - ctx->len); len -= bs - ctx->len; p += bs - ctx->len; crypto_xor(prev, odds, bs); crypto_cipher_encrypt_one(tfm, prev, prev); /* clearing the length */ ctx->len = 0; /* encrypting the rest of data */ while (len > bs) { crypto_xor(prev, p, bs); crypto_cipher_encrypt_one(tfm, prev, prev); p += bs; len -= bs; } /* keeping the surplus of blocksize */ if (len) { memcpy(odds, p, len); ctx->len = len; } return 0; } static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out) { struct crypto_shash *parent = pdesc->tfm; unsigned long alignmask = crypto_shash_alignmask(parent); struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent); struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_blocksize(parent); u8 *consts = PTR_ALIGN((void *)tctx->ctx, (alignmask | (__alignof__(__be64) - 1)) + 1); u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); u8 *prev = odds + bs; unsigned int offset = 0; if (ctx->len != bs) { unsigned int rlen; u8 *p = odds + ctx->len; *p = 0x80; p++; rlen = bs - ctx->len - 1; if (rlen) memset(p, 0, rlen); offset += bs; } crypto_xor(prev, odds, bs); crypto_xor(prev, consts + offset, bs); crypto_cipher_encrypt_one(tfm, out, prev); return 0; } static int cmac_init_tfm(struct crypto_tfm *tfm) { struct crypto_cipher *cipher; struct crypto_instance *inst = (void *)tfm->__crt_alg; struct crypto_spawn *spawn = crypto_instance_ctx(inst); struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); cipher = crypto_spawn_cipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; return 0; }; static void cmac_exit_tfm(struct crypto_tfm *tfm) { struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); crypto_free_cipher(ctx->child); } static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) { struct shash_instance *inst; struct crypto_alg *alg; unsigned long alignmask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH); if (err) return err; alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); if (IS_ERR(alg)) return PTR_ERR(alg); switch (alg->cra_blocksize) { case 16: case 8: break; default: err = -EINVAL; goto out_put_alg; } inst = shash_alloc_instance("cmac", alg); err = PTR_ERR(inst); if (IS_ERR(inst)) goto out_put_alg; err = crypto_init_spawn(shash_instance_ctx(inst), alg, shash_crypto_instance(inst), CRYPTO_ALG_TYPE_MASK); if (err) goto out_free_inst; alignmask = alg->cra_alignmask; inst->alg.base.cra_alignmask = alignmask; inst->alg.base.cra_priority = alg->cra_priority; inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.digestsize = alg->cra_blocksize; inst->alg.descsize = ALIGN(sizeof(struct cmac_desc_ctx), crypto_tfm_ctx_alignment()) + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)) + alg->cra_blocksize * 2; inst->alg.base.cra_ctxsize = ALIGN(sizeof(struct cmac_tfm_ctx), crypto_tfm_ctx_alignment()) + ((alignmask | (__alignof__(__be64) - 1)) & ~(crypto_tfm_ctx_alignment() - 1)) + alg->cra_blocksize * 2; inst->alg.base.cra_init = cmac_init_tfm; inst->alg.base.cra_exit = cmac_exit_tfm; inst->alg.init = crypto_cmac_digest_init; inst->alg.update = crypto_cmac_digest_update; inst->alg.final = crypto_cmac_digest_final; inst->alg.setkey = crypto_cmac_digest_setkey; err = shash_register_instance(tmpl, inst); if (err) { out_free_inst: shash_free_instance(shash_crypto_instance(inst)); } out_put_alg: crypto_mod_put(alg); return err; } static struct crypto_template crypto_cmac_tmpl = { .name = "cmac", .create = cmac_create, .free = shash_free_instance, .module = THIS_MODULE, }; static int __init crypto_cmac_module_init(void) { return crypto_register_template(&crypto_cmac_tmpl); } static void __exit crypto_cmac_module_exit(void) { crypto_unregister_template(&crypto_cmac_tmpl); } module_init(crypto_cmac_module_init); module_exit(crypto_cmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CMAC keyed hash algorithm"); MODULE_ALIAS_CRYPTO("cmac");
51 267 294 295 142 134 304 27 27 303 5 2 2 2 5 230 230 220 228 228 228 65 65 34 65 64 65 31 65 65 60 63 63 70 71 71 50 65 65 17 65 65 29 65 69 13 8 4 3 3 7 7 4 7 7 120 121 32 121 4 6 4 1 1 1 1 1 7 7 17 1 7 5 5 5 4 146 20 20 7 2 8 3 11 17 16 146 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines * which can be dynamically activated and de-activated by the line * discipline handling modules (like SLIP). */ #include <linux/types.h> #include <linux/termios.h> #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/tty.h> #include <linux/fcntl.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/compat.h> #include <asm/io.h> #include <linux/uaccess.h> #undef TTY_DEBUG_WAIT_UNTIL_SENT #ifdef TTY_DEBUG_WAIT_UNTIL_SENT # define tty_debug_wait_until_sent(tty, f, args...) tty_debug(tty, f, ##args) #else # define tty_debug_wait_until_sent(tty, f, args...) do {} while (0) #endif #undef DEBUG /* * Internal flag options for termios setting behavior */ #define TERMIOS_FLUSH 1 #define TERMIOS_WAIT 2 #define TERMIOS_TERMIO 4 #define TERMIOS_OLD 8 /** * tty_chars_in_buffer - characters pending * @tty: terminal * * Return the number of bytes of data in the device private * output queue. If no private method is supplied there is assumed * to be no queue on the device. */ int tty_chars_in_buffer(struct tty_struct *tty) { if (tty->ops->chars_in_buffer) return tty->ops->chars_in_buffer(tty); else return 0; } EXPORT_SYMBOL(tty_chars_in_buffer); /** * tty_write_room - write queue space * @tty: terminal * * Return the number of bytes that can be queued to this device * at the present time. The result should be treated as a guarantee * and the driver cannot offer a value it later shrinks by more than * the number of bytes written. If no method is provided 2K is always * returned and data may be lost as there will be no flow control. */ int tty_write_room(struct tty_struct *tty) { if (tty->ops->write_room) return tty->ops->write_room(tty); return 2048; } EXPORT_SYMBOL(tty_write_room); /** * tty_driver_flush_buffer - discard internal buffer * @tty: terminal * * Discard the internal output buffer for this device. If no method * is provided then either the buffer cannot be hardware flushed or * there is no buffer driver side. */ void tty_driver_flush_buffer(struct tty_struct *tty) { if (tty->ops->flush_buffer) tty->ops->flush_buffer(tty); } EXPORT_SYMBOL(tty_driver_flush_buffer); /** * tty_throttle - flow control * @tty: terminal * * Indicate that a tty should stop transmitting data down the stack. * Takes the termios rwsem to protect against parallel throttle/unthrottle * and also to ensure the driver can consistently reference its own * termios data at this point when implementing software flow control. */ void tty_throttle(struct tty_struct *tty) { down_write(&tty->termios_rwsem); /* check TTY_THROTTLED first so it indicates our state */ if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && tty->ops->throttle) tty->ops->throttle(tty); tty->flow_change = 0; up_write(&tty->termios_rwsem); } EXPORT_SYMBOL(tty_throttle); /** * tty_unthrottle - flow control * @tty: terminal * * Indicate that a tty may continue transmitting data down the stack. * Takes the termios rwsem to protect against parallel throttle/unthrottle * and also to ensure the driver can consistently reference its own * termios data at this point when implementing software flow control. * * Drivers should however remember that the stack can issue a throttle, * then change flow control method, then unthrottle. */ void tty_unthrottle(struct tty_struct *tty) { down_write(&tty->termios_rwsem); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && tty->ops->unthrottle) tty->ops->unthrottle(tty); tty->flow_change = 0; up_write(&tty->termios_rwsem); } EXPORT_SYMBOL(tty_unthrottle); /** * tty_throttle_safe - flow control * @tty: terminal * * Similar to tty_throttle() but will only attempt throttle * if tty->flow_change is TTY_THROTTLE_SAFE. Prevents an accidental * throttle due to race conditions when throttling is conditional * on factors evaluated prior to throttling. * * Returns 0 if tty is throttled (or was already throttled) */ int tty_throttle_safe(struct tty_struct *tty) { int ret = 0; mutex_lock(&tty->throttle_mutex); if (!tty_throttled(tty)) { if (tty->flow_change != TTY_THROTTLE_SAFE) ret = 1; else { set_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->throttle) tty->ops->throttle(tty); } } mutex_unlock(&tty->throttle_mutex); return ret; } /** * tty_unthrottle_safe - flow control * @tty: terminal * * Similar to tty_unthrottle() but will only attempt unthrottle * if tty->flow_change is TTY_UNTHROTTLE_SAFE. Prevents an accidental * unthrottle due to race conditions when unthrottling is conditional * on factors evaluated prior to unthrottling. * * Returns 0 if tty is unthrottled (or was already unthrottled) */ int tty_unthrottle_safe(struct tty_struct *tty) { int ret = 0; mutex_lock(&tty->throttle_mutex); if (tty_throttled(tty)) { if (tty->flow_change != TTY_UNTHROTTLE_SAFE) ret = 1; else { clear_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->unthrottle) tty->ops->unthrottle(tty); } } mutex_unlock(&tty->throttle_mutex); return ret; } /** * tty_wait_until_sent - wait for I/O to finish * @tty: tty we are waiting for * @timeout: how long we will wait * * Wait for characters pending in a tty driver to hit the wire, or * for a timeout to occur (eg due to flow control) * * Locking: none */ void tty_wait_until_sent(struct tty_struct *tty, long timeout) { tty_debug_wait_until_sent(tty, "wait until sent, timeout=%ld\n", timeout); if (!timeout) timeout = MAX_SCHEDULE_TIMEOUT; timeout = wait_event_interruptible_timeout(tty->write_wait, !tty_chars_in_buffer(tty), timeout); if (timeout <= 0) return; if (timeout == MAX_SCHEDULE_TIMEOUT) timeout = 0; if (tty->ops->wait_until_sent) tty->ops->wait_until_sent(tty, timeout); } EXPORT_SYMBOL(tty_wait_until_sent); /* * Termios Helper Methods */ static void unset_locked_termios(struct tty_struct *tty, struct ktermios *old) { struct ktermios *termios = &tty->termios; struct ktermios *locked = &tty->termios_locked; int i; #define NOSET_MASK(x, y, z) (x = ((x) & ~(z)) | ((y) & (z))) NOSET_MASK(termios->c_iflag, old->c_iflag, locked->c_iflag); NOSET_MASK(termios->c_oflag, old->c_oflag, locked->c_oflag); NOSET_MASK(termios->c_cflag, old->c_cflag, locked->c_cflag); NOSET_MASK(termios->c_lflag, old->c_lflag, locked->c_lflag); termios->c_line = locked->c_line ? old->c_line : termios->c_line; for (i = 0; i < NCCS; i++) termios->c_cc[i] = locked->c_cc[i] ? old->c_cc[i] : termios->c_cc[i]; /* FIXME: What should we do for i/ospeed */ } /** * tty_termios_copy_hw - copy hardware settings * @new: New termios * @old: Old termios * * Propagate the hardware specific terminal setting bits from * the old termios structure to the new one. This is used in cases * where the hardware does not support reconfiguration or as a helper * in some cases where only minimal reconfiguration is supported */ void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old) { /* The bits a dumb device handles in software. Smart devices need to always provide a set_termios method */ new->c_cflag &= HUPCL | CREAD | CLOCAL; new->c_cflag |= old->c_cflag & ~(HUPCL | CREAD | CLOCAL); new->c_ispeed = old->c_ispeed; new->c_ospeed = old->c_ospeed; } EXPORT_SYMBOL(tty_termios_copy_hw); /** * tty_termios_hw_change - check for setting change * @a: termios * @b: termios to compare * * Check if any of the bits that affect a dumb device have changed * between the two termios structures, or a speed change is needed. */ int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b) { if (a->c_ispeed != b->c_ispeed || a->c_ospeed != b->c_ospeed) return 1; if ((a->c_cflag ^ b->c_cflag) & ~(HUPCL | CREAD | CLOCAL)) return 1; return 0; } EXPORT_SYMBOL(tty_termios_hw_change); /** * tty_set_termios - update termios values * @tty: tty to update * @new_termios: desired new value * * Perform updates to the termios values set on this terminal. * A master pty's termios should never be set. * * Locking: termios_rwsem */ int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) { struct ktermios old_termios; struct tty_ldisc *ld; WARN_ON(tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER); /* * Perform the actual termios internal changes under lock. */ /* FIXME: we need to decide on some locking/ordering semantics for the set_termios notification eventually */ down_write(&tty->termios_rwsem); old_termios = tty->termios; tty->termios = *new_termios; unset_locked_termios(tty, &old_termios); if (tty->ops->set_termios) tty->ops->set_termios(tty, &old_termios); else tty_termios_copy_hw(&tty->termios, &old_termios); ld = tty_ldisc_ref(tty); if (ld != NULL) { if (ld->ops->set_termios) ld->ops->set_termios(tty, &old_termios); tty_ldisc_deref(ld); } up_write(&tty->termios_rwsem); return 0; } EXPORT_SYMBOL_GPL(tty_set_termios); /** * set_termios - set termios values for a tty * @tty: terminal device * @arg: user data * @opt: option information * * Helper function to prepare termios data and run necessary other * functions before using tty_set_termios to do the actual changes. * * Locking: * Called functions take ldisc and termios_rwsem locks */ static int set_termios(struct tty_struct *tty, void __user *arg, int opt) { struct ktermios tmp_termios; struct tty_ldisc *ld; int retval = tty_check_change(tty); if (retval) return retval; down_read(&tty->termios_rwsem); tmp_termios = tty->termios; up_read(&tty->termios_rwsem); if (opt & TERMIOS_TERMIO) { if (user_termio_to_kernel_termios(&tmp_termios, (struct termio __user *)arg)) return -EFAULT; #ifdef TCGETS2 } else if (opt & TERMIOS_OLD) { if (user_termios_to_kernel_termios_1(&tmp_termios, (struct termios __user *)arg)) return -EFAULT; } else { if (user_termios_to_kernel_termios(&tmp_termios, (struct termios2 __user *)arg)) return -EFAULT; } #else } else if (user_termios_to_kernel_termios(&tmp_termios, (struct termios __user *)arg)) return -EFAULT; #endif /* If old style Bfoo values are used then load c_ispeed/c_ospeed * with the real speed so its unconditionally usable */ tmp_termios.c_ispeed = tty_termios_input_baud_rate(&tmp_termios); tmp_termios.c_ospeed = tty_termios_baud_rate(&tmp_termios); ld = tty_ldisc_ref(tty); if (ld != NULL) { if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer) ld->ops->flush_buffer(tty); tty_ldisc_deref(ld); } if (opt & TERMIOS_WAIT) { tty_wait_until_sent(tty, 0); if (signal_pending(current)) return -ERESTARTSYS; } tty_set_termios(tty, &tmp_termios); /* FIXME: Arguably if tmp_termios == tty->termios AND the actual requested termios was not tmp_termios then we may want to return an error as no user requested change has succeeded */ return 0; } static void copy_termios(struct tty_struct *tty, struct ktermios *kterm) { down_read(&tty->termios_rwsem); *kterm = tty->termios; up_read(&tty->termios_rwsem); } static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm) { down_read(&tty->termios_rwsem); *kterm = tty->termios_locked; up_read(&tty->termios_rwsem); } static int get_termio(struct tty_struct *tty, struct termio __user *termio) { struct ktermios kterm; copy_termios(tty, &kterm); if (kernel_termios_to_user_termio(termio, &kterm)) return -EFAULT; return 0; } #ifdef TCGETX /** * set_termiox - set termiox fields if possible * @tty: terminal * @arg: termiox structure from user * @opt: option flags for ioctl type * * Implement the device calling points for the SYS5 termiox ioctl * interface in Linux */ static int set_termiox(struct tty_struct *tty, void __user *arg, int opt) { struct termiox tnew; struct tty_ldisc *ld; if (tty->termiox == NULL) return -EINVAL; if (copy_from_user(&tnew, arg, sizeof(struct termiox))) return -EFAULT; ld = tty_ldisc_ref(tty); if (ld != NULL) { if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer) ld->ops->flush_buffer(tty); tty_ldisc_deref(ld); } if (opt & TERMIOS_WAIT) { tty_wait_until_sent(tty, 0); if (signal_pending(current)) return -ERESTARTSYS; } down_write(&tty->termios_rwsem); if (tty->ops->set_termiox) tty->ops->set_termiox(tty, &tnew); up_write(&tty->termios_rwsem); return 0; } #endif #ifdef TIOCGETP /* * These are deprecated, but there is limited support.. * * The "sg_flags" translation is a joke.. */ static int get_sgflags(struct tty_struct *tty) { int flags = 0; if (!L_ICANON(tty)) { if (L_ISIG(tty)) flags |= 0x02; /* cbreak */ else flags |= 0x20; /* raw */ } if (L_ECHO(tty)) flags |= 0x08; /* echo */ if (O_OPOST(tty)) if (O_ONLCR(tty)) flags |= 0x10; /* crmod */ return flags; } static int get_sgttyb(struct tty_struct *tty, struct sgttyb __user *sgttyb) { struct sgttyb tmp; down_read(&tty->termios_rwsem); tmp.sg_ispeed = tty->termios.c_ispeed; tmp.sg_ospeed = tty->termios.c_ospeed; tmp.sg_erase = tty->termios.c_cc[VERASE]; tmp.sg_kill = tty->termios.c_cc[VKILL]; tmp.sg_flags = get_sgflags(tty); up_read(&tty->termios_rwsem); return copy_to_user(sgttyb, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static void set_sgflags(struct ktermios *termios, int flags) { termios->c_iflag = ICRNL | IXON; termios->c_oflag = 0; termios->c_lflag = ISIG | ICANON; if (flags & 0x02) { /* cbreak */ termios->c_iflag = 0; termios->c_lflag &= ~ICANON; } if (flags & 0x08) { /* echo */ termios->c_lflag |= ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN; } if (flags & 0x10) { /* crmod */ termios->c_oflag |= OPOST | ONLCR; } if (flags & 0x20) { /* raw */ termios->c_iflag = 0; termios->c_lflag &= ~(ISIG | ICANON); } if (!(termios->c_lflag & ICANON)) { termios->c_cc[VMIN] = 1; termios->c_cc[VTIME] = 0; } } /** * set_sgttyb - set legacy terminal values * @tty: tty structure * @sgttyb: pointer to old style terminal structure * * Updates a terminal from the legacy BSD style terminal information * structure. * * Locking: termios_rwsem */ static int set_sgttyb(struct tty_struct *tty, struct sgttyb __user *sgttyb) { int retval; struct sgttyb tmp; struct ktermios termios; retval = tty_check_change(tty); if (retval) return retval; if (copy_from_user(&tmp, sgttyb, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); termios = tty->termios; termios.c_cc[VERASE] = tmp.sg_erase; termios.c_cc[VKILL] = tmp.sg_kill; set_sgflags(&termios, tmp.sg_flags); /* Try and encode into Bfoo format */ #ifdef BOTHER tty_termios_encode_baud_rate(&termios, termios.c_ispeed, termios.c_ospeed); #endif up_write(&tty->termios_rwsem); tty_set_termios(tty, &termios); return 0; } #endif #ifdef TIOCGETC static int get_tchars(struct tty_struct *tty, struct tchars __user *tchars) { struct tchars tmp; down_read(&tty->termios_rwsem); tmp.t_intrc = tty->termios.c_cc[VINTR]; tmp.t_quitc = tty->termios.c_cc[VQUIT]; tmp.t_startc = tty->termios.c_cc[VSTART]; tmp.t_stopc = tty->termios.c_cc[VSTOP]; tmp.t_eofc = tty->termios.c_cc[VEOF]; tmp.t_brkc = tty->termios.c_cc[VEOL2]; /* what is brkc anyway? */ up_read(&tty->termios_rwsem); return copy_to_user(tchars, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static int set_tchars(struct tty_struct *tty, struct tchars __user *tchars) { struct tchars tmp; if (copy_from_user(&tmp, tchars, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); tty->termios.c_cc[VINTR] = tmp.t_intrc; tty->termios.c_cc[VQUIT] = tmp.t_quitc; tty->termios.c_cc[VSTART] = tmp.t_startc; tty->termios.c_cc[VSTOP] = tmp.t_stopc; tty->termios.c_cc[VEOF] = tmp.t_eofc; tty->termios.c_cc[VEOL2] = tmp.t_brkc; /* what is brkc anyway? */ up_write(&tty->termios_rwsem); return 0; } #endif #ifdef TIOCGLTC static int get_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars) { struct ltchars tmp; down_read(&tty->termios_rwsem); tmp.t_suspc = tty->termios.c_cc[VSUSP]; /* what is dsuspc anyway? */ tmp.t_dsuspc = tty->termios.c_cc[VSUSP]; tmp.t_rprntc = tty->termios.c_cc[VREPRINT]; /* what is flushc anyway? */ tmp.t_flushc = tty->termios.c_cc[VEOL2]; tmp.t_werasc = tty->termios.c_cc[VWERASE]; tmp.t_lnextc = tty->termios.c_cc[VLNEXT]; up_read(&tty->termios_rwsem); return copy_to_user(ltchars, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static int set_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars) { struct ltchars tmp; if (copy_from_user(&tmp, ltchars, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); tty->termios.c_cc[VSUSP] = tmp.t_suspc; /* what is dsuspc anyway? */ tty->termios.c_cc[VEOL2] = tmp.t_dsuspc; tty->termios.c_cc[VREPRINT] = tmp.t_rprntc; /* what is flushc anyway? */ tty->termios.c_cc[VEOL2] = tmp.t_flushc; tty->termios.c_cc[VWERASE] = tmp.t_werasc; tty->termios.c_cc[VLNEXT] = tmp.t_lnextc; up_write(&tty->termios_rwsem); return 0; } #endif /** * tty_change_softcar - carrier change ioctl helper * @tty: tty to update * @arg: enable/disable CLOCAL * * Perform a change to the CLOCAL state and call into the driver * layer to make it visible. All done with the termios rwsem */ static int tty_change_softcar(struct tty_struct *tty, int arg) { int ret = 0; int bit = arg ? CLOCAL : 0; struct ktermios old; down_write(&tty->termios_rwsem); old = tty->termios; tty->termios.c_cflag &= ~CLOCAL; tty->termios.c_cflag |= bit; if (tty->ops->set_termios) tty->ops->set_termios(tty, &old); if (C_CLOCAL(tty) != bit) ret = -EINVAL; up_write(&tty->termios_rwsem); return ret; } /** * tty_mode_ioctl - mode related ioctls * @tty: tty for the ioctl * @file: file pointer for the tty * @cmd: command * @arg: ioctl argument * * Perform non line discipline specific mode control ioctls. This * is designed to be called by line disciplines to ensure they provide * consistent mode setting. */ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { struct tty_struct *real_tty; void __user *p = (void __user *)arg; int ret = 0; struct ktermios kterm; BUG_ON(file == NULL); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) real_tty = tty->link; else real_tty = tty; switch (cmd) { #ifdef TIOCGETP case TIOCGETP: return get_sgttyb(real_tty, (struct sgttyb __user *) arg); case TIOCSETP: case TIOCSETN: return set_sgttyb(real_tty, (struct sgttyb __user *) arg); #endif #ifdef TIOCGETC case TIOCGETC: return get_tchars(real_tty, p); case TIOCSETC: return set_tchars(real_tty, p); #endif #ifdef TIOCGLTC case TIOCGLTC: return get_ltchars(real_tty, p); case TIOCSLTC: return set_ltchars(real_tty, p); #endif case TCSETSF: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT | TERMIOS_OLD); case TCSETSW: return set_termios(real_tty, p, TERMIOS_WAIT | TERMIOS_OLD); case TCSETS: return set_termios(real_tty, p, TERMIOS_OLD); #ifndef TCGETS2 case TCGETS: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; #else case TCGETS: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TCGETS2: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm)) ret = -EFAULT; return ret; case TCSETSF2: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT); case TCSETSW2: return set_termios(real_tty, p, TERMIOS_WAIT); case TCSETS2: return set_termios(real_tty, p, 0); #endif case TCGETA: return get_termio(real_tty, p); case TCSETAF: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT | TERMIOS_TERMIO); case TCSETAW: return set_termios(real_tty, p, TERMIOS_WAIT | TERMIOS_TERMIO); case TCSETA: return set_termios(real_tty, p, TERMIOS_TERMIO); #ifndef TCGETS2 case TIOCGLCKTRMIOS: copy_termios_locked(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: if (!capable(CAP_SYS_ADMIN)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios(&kterm, (struct termios __user *) arg)) return -EFAULT; down_write(&real_tty->termios_rwsem); real_tty->termios_locked = kterm; up_write(&real_tty->termios_rwsem); return 0; #else case TIOCGLCKTRMIOS: copy_termios_locked(real_tty, &kterm); if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: if (!capable(CAP_SYS_ADMIN)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios_1(&kterm, (struct termios __user *) arg)) return -EFAULT; down_write(&real_tty->termios_rwsem); real_tty->termios_locked = kterm; up_write(&real_tty->termios_rwsem); return ret; #endif #ifdef TCGETX case TCGETX: { struct termiox ktermx; if (real_tty->termiox == NULL) return -EINVAL; down_read(&real_tty->termios_rwsem); memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox)); up_read(&real_tty->termios_rwsem); if (copy_to_user(p, &ktermx, sizeof(struct termiox))) ret = -EFAULT; return ret; } case TCSETX: return set_termiox(real_tty, p, 0); case TCSETXW: return set_termiox(real_tty, p, TERMIOS_WAIT); case TCSETXF: return set_termiox(real_tty, p, TERMIOS_FLUSH); #endif case TIOCGSOFTCAR: copy_termios(real_tty, &kterm); ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0, (int __user *)arg); return ret; case TIOCSSOFTCAR: if (get_user(arg, (unsigned int __user *) arg)) return -EFAULT; return tty_change_softcar(real_tty, arg); default: return -ENOIOCTLCMD; } } EXPORT_SYMBOL_GPL(tty_mode_ioctl); /* Caller guarantees ldisc reference is held */ static int __tty_perform_flush(struct tty_struct *tty, unsigned long arg) { struct tty_ldisc *ld = tty->ldisc; switch (arg) { case TCIFLUSH: if (ld && ld->ops->flush_buffer) { ld->ops->flush_buffer(tty); tty_unthrottle(tty); } break; case TCIOFLUSH: if (ld && ld->ops->flush_buffer) { ld->ops->flush_buffer(tty); tty_unthrottle(tty); } /* fall through */ case TCOFLUSH: tty_driver_flush_buffer(tty); break; default: return -EINVAL; } return 0; } int tty_perform_flush(struct tty_struct *tty, unsigned long arg) { struct tty_ldisc *ld; int retval = tty_check_change(tty); if (retval) return retval; ld = tty_ldisc_ref_wait(tty); retval = __tty_perform_flush(tty, arg); if (ld) tty_ldisc_deref(ld); return retval; } EXPORT_SYMBOL_GPL(tty_perform_flush); int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { int retval; switch (cmd) { case TCXONC: retval = tty_check_change(tty); if (retval) return retval; switch (arg) { case TCOOFF: spin_lock_irq(&tty->flow_lock); if (!tty->flow_stopped) { tty->flow_stopped = 1; __stop_tty(tty); } spin_unlock_irq(&tty->flow_lock); break; case TCOON: spin_lock_irq(&tty->flow_lock); if (tty->flow_stopped) { tty->flow_stopped = 0; __start_tty(tty); } spin_unlock_irq(&tty->flow_lock); break; case TCIOFF: if (STOP_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, STOP_CHAR(tty)); break; case TCION: if (START_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, START_CHAR(tty)); break; default: return -EINVAL; } return retval; case TCFLSH: retval = tty_check_change(tty); if (retval) return retval; return __tty_perform_flush(tty, arg); default: /* Try the mode commands */ return tty_mode_ioctl(tty, file, cmd, arg); } } EXPORT_SYMBOL(n_tty_ioctl_helper); #ifdef CONFIG_COMPAT long n_tty_compat_ioctl_helper(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { case TIOCGLCKTRMIOS: case TIOCSLCKTRMIOS: return tty_mode_ioctl(tty, file, cmd, (unsigned long) compat_ptr(arg)); default: return -ENOIOCTLCMD; } } EXPORT_SYMBOL(n_tty_compat_ioctl_helper); #endif
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 /* * IPVS: Round-Robin Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Fixes/Changes: * Wensong Zhang : changed the ip_vs_rr_schedule to return dest * Julian Anastasov : fixed the NULL pointer access bug in debugging * Wensong Zhang : changed some comestics things for debugging * Wensong Zhang : changed for the d-linked destination list * Wensong Zhang : added the ip_vs_rr_update_svc * Wensong Zhang : added any dest with weight=0 is quiesced * */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> static int ip_vs_rr_init_svc(struct ip_vs_service *svc) { svc->sched_data = &svc->destinations; return 0; } static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct list_head *p; spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; /* dest is already unlinked, so p->prev is not valid but * p->next is valid, use it to reach previous entry. */ if (p == &dest->n_list) svc->sched_data = p->next->prev; spin_unlock_bh(&svc->sched_lock); return 0; } /* * Round-Robin Scheduling */ static struct ip_vs_dest * ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; int pass = 0; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; last = dest = list_entry(p, struct ip_vs_dest, n_list); do { list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) /* HIT */ goto out; if (dest == last) goto stop; } pass++; /* Previous dest could be unlinked, do not loop forever. * If we stay at head there is no need for 2nd pass. */ } while (pass < 2 && p != &svc->destinations); stop: spin_unlock_bh(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: svc->sched_data = &dest->n_list; spin_unlock_bh(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), refcount_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, .add_dest = NULL, .del_dest = ip_vs_rr_del_dest, .schedule = ip_vs_rr_schedule, }; static int __init ip_vs_rr_init(void) { return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); synchronize_rcu(); } module_init(ip_vs_rr_init); module_exit(ip_vs_rr_cleanup); MODULE_LICENSE("GPL");
1840 1841 1843 491 15 349 1071 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 /* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * This file is provided under a dual BSD/GPLv2 license. * * SipHash: a fast short-input PRF * https://131002.net/siphash/ * * This implementation is specifically for SipHash2-4 for a secure PRF * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for * hashtables. */ #include <linux/siphash.h> #include <asm/unaligned.h> #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 #include <linux/dcache.h> #include <asm/word-at-a-time.h> #endif #define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) #define PREAMBLE(len) \ u64 v0 = SIPHASH_CONST_0; \ u64 v1 = SIPHASH_CONST_1; \ u64 v2 = SIPHASH_CONST_2; \ u64 v3 = SIPHASH_CONST_3; \ u64 b = ((u64)(len)) << 56; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ v1 ^= key->key[1]; \ v0 ^= key->key[0]; #define POSTAMBLE \ v3 ^= b; \ SIPROUND; \ SIPROUND; \ v0 ^= b; \ v2 ^= 0xff; \ SIPROUND; \ SIPROUND; \ SIPROUND; \ SIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; PREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = le64_to_cpup(data); v3 ^= m; SIPROUND; SIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; case 6: b |= ((u64)end[5]) << 40; case 5: b |= ((u64)end[4]) << 32; case 4: b |= le32_to_cpup(data); break; case 3: b |= ((u64)end[2]) << 16; case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } #endif POSTAMBLE } EXPORT_SYMBOL(__siphash_aligned); #endif u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; PREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = get_unaligned_le64(data); v3 ^= m; SIPROUND; SIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; case 6: b |= ((u64)end[5]) << 40; case 5: b |= ((u64)end[4]) << 32; case 4: b |= get_unaligned_le32(end); break; case 3: b |= ((u64)end[2]) << 16; case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } #endif POSTAMBLE } EXPORT_SYMBOL(__siphash_unaligned); /** * siphash_1u64 - compute 64-bit siphash PRF value of a u64 * @first: first u64 * @key: the siphash key */ u64 siphash_1u64(const u64 first, const siphash_key_t *key) { PREAMBLE(8) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; POSTAMBLE } EXPORT_SYMBOL(siphash_1u64); /** * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64 * @first: first u64 * @second: second u64 * @key: the siphash key */ u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) { PREAMBLE(16) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; v3 ^= second; SIPROUND; SIPROUND; v0 ^= second; POSTAMBLE } EXPORT_SYMBOL(siphash_2u64); /** * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64 * @first: first u64 * @second: second u64 * @third: third u64 * @key: the siphash key */ u64 siphash_3u64(const u64 first, const u64 second, const u64 third, const siphash_key_t *key) { PREAMBLE(24) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; v3 ^= second; SIPROUND; SIPROUND; v0 ^= second; v3 ^= third; SIPROUND; SIPROUND; v0 ^= third; POSTAMBLE } EXPORT_SYMBOL(siphash_3u64); /** * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64 * @first: first u64 * @second: second u64 * @third: third u64 * @forth: forth u64 * @key: the siphash key */ u64 siphash_4u64(const u64 first, const u64 second, const u64 third, const u64 forth, const siphash_key_t *key) { PREAMBLE(32) v3 ^= first; SIPROUND; SIPROUND; v0 ^= first; v3 ^= second; SIPROUND; SIPROUND; v0 ^= second; v3 ^= third; SIPROUND; SIPROUND; v0 ^= third; v3 ^= forth; SIPROUND; SIPROUND; v0 ^= forth; POSTAMBLE } EXPORT_SYMBOL(siphash_4u64); u64 siphash_1u32(const u32 first, const siphash_key_t *key) { PREAMBLE(4) b |= first; POSTAMBLE } EXPORT_SYMBOL(siphash_1u32); u64 siphash_3u32(const u32 first, const u32 second, const u32 third, const siphash_key_t *key) { u64 combined = (u64)second << 32 | first; PREAMBLE(12) v3 ^= combined; SIPROUND; SIPROUND; v0 ^= combined; b |= third; POSTAMBLE } EXPORT_SYMBOL(siphash_3u32); #if BITS_PER_LONG == 64 /* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3. */ #define HSIPROUND SIPROUND #define HPREAMBLE(len) PREAMBLE(len) #define HPOSTAMBLE \ v3 ^= b; \ HSIPROUND; \ v0 ^= b; \ v2 ^= 0xff; \ HSIPROUND; \ HSIPROUND; \ HSIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; HPREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = le64_to_cpup(data); v3 ^= m; HSIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; case 6: b |= ((u64)end[5]) << 40; case 5: b |= ((u64)end[4]) << 32; case 4: b |= le32_to_cpup(data); break; case 3: b |= ((u64)end[2]) << 16; case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } #endif HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); #endif u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); const u8 left = len & (sizeof(u64) - 1); u64 m; HPREAMBLE(len) for (; data != end; data += sizeof(u64)) { m = get_unaligned_le64(data); v3 ^= m; HSIPROUND; v0 ^= m; } #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 if (left) b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & bytemask_from_count(left))); #else switch (left) { case 7: b |= ((u64)end[6]) << 48; case 6: b |= ((u64)end[5]) << 40; case 5: b |= ((u64)end[4]) << 32; case 4: b |= get_unaligned_le32(end); break; case 3: b |= ((u64)end[2]) << 16; case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } #endif HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); /** * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 * @first: first u32 * @key: the hsiphash key */ u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) { HPREAMBLE(4) b |= first; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_1u32); /** * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 * @first: first u32 * @second: second u32 * @key: the hsiphash key */ u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) { u64 combined = (u64)second << 32 | first; HPREAMBLE(8) v3 ^= combined; HSIPROUND; v0 ^= combined; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_2u32); /** * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @key: the hsiphash key */ u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, const hsiphash_key_t *key) { u64 combined = (u64)second << 32 | first; HPREAMBLE(12) v3 ^= combined; HSIPROUND; v0 ^= combined; b |= third; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_3u32); /** * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @forth: forth u32 * @key: the hsiphash key */ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, const u32 forth, const hsiphash_key_t *key) { u64 combined = (u64)second << 32 | first; HPREAMBLE(16) v3 ^= combined; HSIPROUND; v0 ^= combined; combined = (u64)forth << 32 | third; v3 ^= combined; HSIPROUND; v0 ^= combined; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_4u32); #else #define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3) #define HPREAMBLE(len) \ u32 v0 = HSIPHASH_CONST_0; \ u32 v1 = HSIPHASH_CONST_1; \ u32 v2 = HSIPHASH_CONST_2; \ u32 v3 = HSIPHASH_CONST_3; \ u32 b = ((u32)(len)) << 24; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ v1 ^= key->key[1]; \ v0 ^= key->key[0]; #define HPOSTAMBLE \ v3 ^= b; \ HSIPROUND; \ v0 ^= b; \ v2 ^= 0xff; \ HSIPROUND; \ HSIPROUND; \ HSIPROUND; \ return v1 ^ v3; #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); const u8 left = len & (sizeof(u32) - 1); u32 m; HPREAMBLE(len) for (; data != end; data += sizeof(u32)) { m = le32_to_cpup(data); v3 ^= m; HSIPROUND; v0 ^= m; } switch (left) { case 3: b |= ((u32)end[2]) << 16; case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); #endif u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); const u8 left = len & (sizeof(u32) - 1); u32 m; HPREAMBLE(len) for (; data != end; data += sizeof(u32)) { m = get_unaligned_le32(data); v3 ^= m; HSIPROUND; v0 ^= m; } switch (left) { case 3: b |= ((u32)end[2]) << 16; case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); /** * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 * @first: first u32 * @key: the hsiphash key */ u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) { HPREAMBLE(4) v3 ^= first; HSIPROUND; v0 ^= first; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_1u32); /** * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 * @first: first u32 * @second: second u32 * @key: the hsiphash key */ u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) { HPREAMBLE(8) v3 ^= first; HSIPROUND; v0 ^= first; v3 ^= second; HSIPROUND; v0 ^= second; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_2u32); /** * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @key: the hsiphash key */ u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, const hsiphash_key_t *key) { HPREAMBLE(12) v3 ^= first; HSIPROUND; v0 ^= first; v3 ^= second; HSIPROUND; v0 ^= second; v3 ^= third; HSIPROUND; v0 ^= third; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_3u32); /** * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 * @first: first u32 * @second: second u32 * @third: third u32 * @forth: forth u32 * @key: the hsiphash key */ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, const u32 forth, const hsiphash_key_t *key) { HPREAMBLE(16) v3 ^= first; HSIPROUND; v0 ^= first; v3 ^= second; HSIPROUND; v0 ^= second; v3 ^= third; HSIPROUND; v0 ^= third; v3 ^= forth; HSIPROUND; v0 ^= forth; HPOSTAMBLE } EXPORT_SYMBOL(hsiphash_4u32); #endif
940 274 266 211 183 321 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ #include <linux/netlink.h> #include <linux/user_namespace.h> #include <net/net_namespace.h> #include <net/sock.h> #include <uapi/linux/sock_diag.h> struct sk_buff; struct nlmsghdr; struct sock; struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); u64 sock_gen_cookie(struct sock *sk); int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); static inline enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) { switch (sk->sk_family) { case AF_INET: if (sk->sk_type == SOCK_RAW) return SKNLGRP_NONE; switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET_TCP_DESTROY; case IPPROTO_UDP: return SKNLGRP_INET_UDP_DESTROY; default: return SKNLGRP_NONE; } case AF_INET6: if (sk->sk_type == SOCK_RAW) return SKNLGRP_NONE; switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET6_TCP_DESTROY; case IPPROTO_UDP: return SKNLGRP_INET6_UDP_DESTROY; default: return SKNLGRP_NONE; } default: return SKNLGRP_NONE; } } static inline bool sock_diag_has_destroy_listeners(const struct sock *sk) { const struct net *n = sock_net(sk); const enum sknetlink_groups group = sock_diag_destroy_group(sk); return group != SKNLGRP_NONE && n->diag_nlsk && netlink_has_listeners(n->diag_nlsk, group); } void sock_diag_broadcast_destroy(struct sock *sk); int sock_diag_destroy(struct sock *sk, int err); #endif
23 2 22 9 8 22 12 11 21 1 43 44 43 43 1 43 41 39 38 38 54 54 50 54 92 85 76 67 46 24 12 24 7 19 10 12 12 12 92 124 25 125 114 3 26 125 21 20 2 21 25 25 25 5 24 24 24 2 23 22 23 6 23 23 23 23 11 23 25 20 25 45 45 45 45 45 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 /* scm.c - Socket level control messages processing. * * Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Alignment and value checking mods by Craig Metz * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> #include <linux/signal.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/file.h> #include <linux/fcntl.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> #include <linux/pid_namespace.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/compat.h> #include <net/scm.h> #include <net/cls_cgroup.h> /* * Only allow a user to send credentials, that they could set with * setu(g)id. */ static __inline__ int scm_check_creds(struct ucred *creds) { const struct cred *cred = current_cred(); kuid_t uid = make_kuid(cred->user_ns, creds->uid); kgid_t gid = make_kgid(cred->user_ns, creds->gid); if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) { return 0; } return -EPERM; } static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) { int *fdp = (int*)CMSG_DATA(cmsg); struct scm_fp_list *fpl = *fplp; struct file **fpp; int i, num; num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int); if (num <= 0) return 0; if (num > SCM_MAX_FD) return -EINVAL; if (!fpl) { fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL); if (!fpl) return -ENOMEM; *fplp = fpl; fpl->count = 0; fpl->max = SCM_MAX_FD; fpl->user = NULL; } fpp = &fpl->fp[fpl->count]; if (fpl->count + num > fpl->max) return -EINVAL; /* * Verify the descriptors and increment the usage count. */ for (i=0; i< num; i++) { int fd = fdp[i]; struct file *file; if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; *fpp++ = file; fpl->count++; } if (!fpl->user) fpl->user = get_uid(current_user()); return num; } void __scm_destroy(struct scm_cookie *scm) { struct scm_fp_list *fpl = scm->fp; int i; if (fpl) { scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); free_uid(fpl->user); kfree(fpl); } } EXPORT_SYMBOL(__scm_destroy); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) { struct cmsghdr *cmsg; int err; for_each_cmsghdr(cmsg, msg) { err = -EINVAL; /* Verify that cmsg_len is at least sizeof(struct cmsghdr) */ /* The first check was omitted in <= 2.2.5. The reasoning was that parser checks cmsg_len in any case, so that additional check would be work duplication. But if cmsg_level is not SOL_SOCKET, we do not check for too short ancillary data object at all! Oops. OK, let's add it... */ if (!CMSG_OK(msg, cmsg)) goto error; if (cmsg->cmsg_level != SOL_SOCKET) continue; switch (cmsg->cmsg_type) { case SCM_RIGHTS: if (!sock->ops || sock->ops->family != PF_UNIX) goto error; err=scm_fp_copy(cmsg, &p->fp); if (err<0) goto error; break; case SCM_CREDENTIALS: { struct ucred creds; kuid_t uid; kgid_t gid; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred)); err = scm_check_creds(&creds); if (err) goto error; p->creds.pid = creds.pid; if (!p->pid || pid_vnr(p->pid) != creds.pid) { struct pid *pid; err = -ESRCH; pid = find_get_pid(creds.pid); if (!pid) goto error; put_pid(p->pid); p->pid = pid; } err = -EINVAL; uid = make_kuid(current_user_ns(), creds.uid); gid = make_kgid(current_user_ns(), creds.gid); if (!uid_valid(uid) || !gid_valid(gid)) goto error; p->creds.uid = uid; p->creds.gid = gid; break; } default: goto error; } } if (p->fp && !p->fp->count) { kfree(p->fp); p->fp = NULL; } return 0; error: scm_destroy(p); return err; } EXPORT_SYMBOL(__scm_send); int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { struct cmsghdr __user *cm = (__force struct cmsghdr __user *)msg->msg_control; struct cmsghdr cmhdr; int cmlen = CMSG_LEN(len); int err; if (MSG_CMSG_COMPAT & msg->msg_flags) return put_cmsg_compat(msg, level, type, len, data); if (cm==NULL || msg->msg_controllen < sizeof(*cm)) { msg->msg_flags |= MSG_CTRUNC; return 0; /* XXX: return error? check spec. */ } if (msg->msg_controllen < cmlen) { msg->msg_flags |= MSG_CTRUNC; cmlen = msg->msg_controllen; } cmhdr.cmsg_level = level; cmhdr.cmsg_type = type; cmhdr.cmsg_len = cmlen; err = -EFAULT; if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) goto out; if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr))) goto out; cmlen = CMSG_SPACE(len); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control += cmlen; msg->msg_controllen -= cmlen; err = 0; out: return err; } EXPORT_SYMBOL(put_cmsg); void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { struct cmsghdr __user *cm = (__force struct cmsghdr __user*)msg->msg_control; int fdmax = 0; int fdnum = scm->fp->count; struct file **fp = scm->fp->fp; int __user *cmfptr; int err = 0, i; if (MSG_CMSG_COMPAT & msg->msg_flags) { scm_detach_fds_compat(msg, scm); return; } if (msg->msg_controllen > sizeof(struct cmsghdr)) fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr)) / sizeof(int)); if (fdnum < fdmax) fdmax = fdnum; for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) { struct socket *sock; int new_fd; err = security_file_receive(fp[i]); if (err) break; err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags ? O_CLOEXEC : 0); if (err < 0) break; new_fd = err; err = put_user(new_fd, cmfptr); if (err) { put_unused_fd(new_fd); break; } /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { sock_update_netprioidx(&sock->sk->sk_cgrp_data); sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(fp[i])); } if (i > 0) { int cmlen = CMSG_LEN(i*sizeof(int)); err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i*sizeof(int)); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } } if (i < fdnum || (fdnum && fdmax <= 0)) msg->msg_flags |= MSG_CTRUNC; /* * All of the files that fit in the message have had their * usage counts incremented, so we just free the list. */ __scm_destroy(scm); } EXPORT_SYMBOL(scm_detach_fds); struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) { struct scm_fp_list *new_fpl; int i; if (!fpl) return NULL; new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), GFP_KERNEL); if (new_fpl) { for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; new_fpl->user = get_uid(fpl->user); } return new_fpl; } EXPORT_SYMBOL(scm_fp_dup);
10574 10574 10820 1271 2300 18 1253 1934 444 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM net #if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NET_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/tracepoint.h> TRACE_EVENT(net_dev_start_xmit, TP_PROTO(const struct sk_buff *skb, const struct net_device *dev), TP_ARGS(skb, dev), TP_STRUCT__entry( __string( name, dev->name ) __field( u16, queue_mapping ) __field( const void *, skbaddr ) __field( bool, vlan_tagged ) __field( u16, vlan_proto ) __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( int, network_offset ) __field( bool, transport_offset_valid) __field( int, transport_offset) __field( u8, tx_flags ) __field( u16, gso_size ) __field( u16, gso_segs ) __field( u16, gso_type ) ), TP_fast_assign( __assign_str(name, dev->name); __entry->queue_mapping = skb->queue_mapping; __entry->skbaddr = skb; __entry->vlan_tagged = skb_vlan_tag_present(skb); __entry->vlan_proto = ntohs(skb->vlan_proto); __entry->vlan_tci = skb_vlan_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->network_offset = skb_network_offset(skb); __entry->transport_offset_valid = skb_transport_header_was_set(skb); __entry->transport_offset = skb_transport_offset(skb); __entry->tx_flags = skb_shinfo(skb)->tx_flags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_segs = skb_shinfo(skb)->gso_segs; __entry->gso_type = skb_shinfo(skb)->gso_type; ), TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, __entry->data_len, __entry->network_offset, __entry->transport_offset_valid, __entry->transport_offset, __entry->tx_flags, __entry->gso_size, __entry->gso_segs, __entry->gso_type) ); TRACE_EVENT(net_dev_xmit, TP_PROTO(struct sk_buff *skb, int rc, struct net_device *dev, unsigned int skb_len), TP_ARGS(skb, rc, dev, skb_len), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __field( int, rc ) __string( name, dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb_len; __entry->rc = rc; __assign_str(name, dev->name); ), TP_printk("dev=%s skbaddr=%p len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); DECLARE_EVENT_CLASS(net_dev_template, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __string( name, skb->dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb->len; __assign_str(name, skb->dev->name); ), TP_printk("dev=%s skbaddr=%p len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) DEFINE_EVENT(net_dev_template, net_dev_queue, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_template, netif_receive_skb, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_template, netif_rx, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __string( name, skb->dev->name ) __field( unsigned int, napi_id ) __field( u16, queue_mapping ) __field( const void *, skbaddr ) __field( bool, vlan_tagged ) __field( u16, vlan_proto ) __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) __field( u32, hash ) __field( bool, l4_hash ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( unsigned int, truesize ) __field( bool, mac_header_valid) __field( int, mac_header ) __field( unsigned char, nr_frags ) __field( u16, gso_size ) __field( u16, gso_type ) ), TP_fast_assign( __assign_str(name, skb->dev->name); #ifdef CONFIG_NET_RX_BUSY_POLL __entry->napi_id = skb->napi_id; #else __entry->napi_id = 0; #endif __entry->queue_mapping = skb->queue_mapping; __entry->skbaddr = skb; __entry->vlan_tagged = skb_vlan_tag_present(skb); __entry->vlan_proto = ntohs(skb->vlan_proto); __entry->vlan_tci = skb_vlan_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; __entry->hash = skb->hash; __entry->l4_hash = skb->l4_hash; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->truesize = skb->truesize; __entry->mac_header_valid = skb_mac_header_was_set(skb); __entry->mac_header = skb_mac_header(skb) - skb->data; __entry->nr_frags = skb_shinfo(skb)->nr_frags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_type = skb_shinfo(skb)->gso_type; ), TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->hash, __entry->l4_hash, __entry->len, __entry->data_len, __entry->truesize, __entry->mac_header_valid, __entry->mac_header, __entry->nr_frags, __entry->gso_size, __entry->gso_type) ); DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_frags_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_receive_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_list_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_ni_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); #endif /* _TRACE_NET_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
361 361 361 361 361 361 361 592 592 95 97 97 58 97 97 644 340 650 321 50 650 602 281 602 17 17 17 17 17 17 17 17 17 17 17 17 17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 /* SCTP kernel implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2002 International Business Machines, Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * This abstraction represents an SCTP endpoint. * * The SCTP implementation is free software; * you can redistribute it and/or modify it under the terms of * the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * The SCTP implementation is distributed in the hope that it * will be useful, but WITHOUT ANY WARRANTY; without even the implied * ************************ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU CC; see the file COPYING. If not, see * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@austin.ibm.com> * Daisy Chang <daisyc@us.ibm.com> * Dajiang Zhang <dajiang.zhang@nokia.com> */ #include <linux/types.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/random.h> /* get_random_bytes() */ #include <net/sock.h> #include <net/ipv6.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ static void sctp_endpoint_bh_rcv(struct work_struct *work); /* * Initialize the base fields of the endpoint structure. */ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { struct net *net = sock_net(sk); struct sctp_hmac_algo_param *auth_hmacs = NULL; struct sctp_chunks_param *auth_chunks = NULL; struct sctp_shared_key *null_key; int err; ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; ep->auth_enable = net->sctp.auth_enable; if (ep->auth_enable) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. */ auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids, SCTP_AUTH_NUM_HMACS), gfp); if (!auth_hmacs) goto nomem; auth_chunks = kzalloc(sizeof(*auth_chunks) + SCTP_NUM_CHUNK_TYPES, gfp); if (!auth_chunks) goto nomem; /* Initialize the HMACS parameter. * SCTP-AUTH: Section 3.3 * Every endpoint supporting SCTP chunk authentication MUST * support the HMAC based on the SHA-1 algorithm. */ auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; auth_hmacs->param_hdr.length = htons(sizeof(struct sctp_paramhdr) + 2); auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); /* Initialize the CHUNKS parameter */ auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; auth_chunks->param_hdr.length = htons(sizeof(struct sctp_paramhdr)); /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks */ if (net->sctp.addip_enable) { auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; auth_chunks->param_hdr.length = htons(sizeof(struct sctp_paramhdr) + 2); } } /* Initialize the base structure. */ /* What type of endpoint are we? */ ep->base.type = SCTP_EP_TYPE_SOCKET; /* Initialize the basic object fields. */ refcount_set(&ep->base.refcnt, 1); ep->base.dead = false; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); /* Set its top-half handler */ sctp_inq_set_th_handler(&ep->base.inqueue, sctp_endpoint_bh_rcv); /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); /* Create the lists of associations. */ INIT_LIST_HEAD(&ep->asocs); /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = net->sctp.sndbuf_policy; sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); /* Get the receive buffer policy for this endpoint */ ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ get_random_bytes(ep->secret_key, sizeof(ep->secret_key)); /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); null_key = sctp_auth_shkey_create(0, gfp); if (!null_key) goto nomem; list_add(&null_key->key_list, &ep->endpoint_shared_keys); /* Allocate and initialize transorms arrays for supported HMACs. */ err = sctp_auth_init_hmacs(ep, gfp); if (err) goto nomem_hmacs; /* Add the null key to the endpoint shared keys list and * set the hmcas and chunks pointers. */ ep->auth_hmacs_list = auth_hmacs; ep->auth_chunk_list = auth_chunks; ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; /* Remember who we are attached to. */ ep->base.sk = sk; ep->base.net = sock_net(sk); sock_hold(ep->base.sk); return ep; nomem_hmacs: sctp_auth_destroy_keys(&ep->endpoint_shared_keys); nomem: /* Free all allocations */ kfree(auth_hmacs); kfree(auth_chunks); kfree(ep->digest); return NULL; } /* Create a sctp_endpoint with all that boring stuff initialized. * Returns NULL if there isn't enough memory. */ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) { struct sctp_endpoint *ep; /* Build a local endpoint. */ ep = kzalloc(sizeof(*ep), gfp); if (!ep) goto fail; if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; SCTP_DBG_OBJCNT_INC(ep); return ep; fail_init: kfree(ep); fail: return NULL; } /* Add an association to an endpoint. */ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, struct sctp_association *asoc) { struct sock *sk = ep->base.sk; /* If this is a temporary association, don't bother * since we'll be removing it shortly and don't * want anyone to find it anyway. */ if (asoc->temp) return; /* Now just add it to our list of asocs */ list_add_tail(&asoc->asocs, &ep->asocs); /* Increment the backlog value for a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) sk->sk_ack_backlog++; } /* Free the endpoint structure. Delay cleanup until * all users have released their reference count on this structure. */ void sctp_endpoint_free(struct sctp_endpoint *ep) { ep->base.dead = true; inet_sk_set_state(ep->base.sk, SCTP_SS_CLOSED); /* Unlink this endpoint, so we can't find it again! */ sctp_unhash_endpoint(ep); sctp_endpoint_put(ep); } /* Final destructor for endpoint. */ static void sctp_endpoint_destroy_rcu(struct rcu_head *head) { struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); struct sock *sk = ep->base.sk; sctp_sk(sk)->ep = NULL; sock_put(sk); kfree(ep); SCTP_DBG_OBJCNT_DEC(ep); } static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; if (unlikely(!ep->base.dead)) { WARN(1, "Attempt to destroy undead endpoint %p!\n", ep); return; } /* Free the digest buffer */ kfree(ep->digest); /* SCTP-AUTH: Free up AUTH releated data such as shared keys * chunks and hmacs arrays that were allocated */ sctp_auth_destroy_keys(&ep->endpoint_shared_keys); kfree(ep->auth_hmacs_list); kfree(ep->auth_chunk_list); /* AUTH - Free any allocated HMAC transform containers */ sctp_auth_destroy_hmacs(ep->auth_hmacs); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); memset(ep->secret_key, 0, sizeof(ep->secret_key)); sk = ep->base.sk; /* Remove and free the port */ if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ int sctp_endpoint_hold(struct sctp_endpoint *ep) { return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are * no more references. */ void sctp_endpoint_put(struct sctp_endpoint *ep) { if (refcount_dec_and_test(&ep->base.refcnt)) sctp_endpoint_destroy(ep); } /* Is this the endpoint we are looking for? */ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, struct net *net, const union sctp_addr *laddr) { struct sctp_endpoint *retval = NULL; if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) && net_eq(sock_net(ep->base.sk), net)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; } return retval; } /* Find the association that goes with this chunk. * We lookup the transport from hashtable at first, then get association * through t->assoc. */ struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, const union sctp_addr *paddr, struct sctp_transport **transport) { struct sctp_association *asoc = NULL; struct sctp_transport *t; *transport = NULL; /* If the local port is not set, there can't be any associations * on this endpoint. */ if (!ep->base.bind_addr.port) return NULL; rcu_read_lock(); t = sctp_epaddr_lookup_transport(ep, paddr); if (!t) goto out; *transport = t; asoc = t->asoc; out: rcu_read_unlock(); return asoc; } /* Look for any peeled off association from the endpoint that matches the * given peer address. */ bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; struct net *net = sock_net(ep->base.sk); bp = &ep->base.bind_addr; /* This function is called with the socket lock held, * so the address_list can not change. */ list_for_each_entry(addr, &bp->address_list, list) { if (sctp_has_association(net, &addr->a, paddr)) return true; } return false; } /* Do delayed input processing. This is scheduled by sctp_rcv(). * This may be called on BH or task time. */ static void sctp_endpoint_bh_rcv(struct work_struct *work) { struct sctp_endpoint *ep = container_of(work, struct sctp_endpoint, base.inqueue.immediate); struct sctp_association *asoc; struct sock *sk; struct net *net; struct sctp_transport *transport; struct sctp_chunk *chunk; struct sctp_inq *inqueue; union sctp_subtype subtype; enum sctp_state state; int error = 0; int first_time = 1; /* is this the first time through the loop */ if (ep->base.dead) return; asoc = NULL; inqueue = &ep->base.inqueue; sk = ep->base.sk; net = sock_net(sk); while (NULL != (chunk = sctp_inq_pop(inqueue))) { subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); /* If the first chunk in the packet is AUTH, do special * processing specified in Section 6.3 of SCTP-AUTH spec */ if (first_time && (subtype.chunk == SCTP_CID_AUTH)) { struct sctp_chunkhdr *next_hdr; next_hdr = sctp_inq_peek(inqueue); if (!next_hdr) goto normal; /* If the next chunk is COOKIE-ECHO, skip the AUTH * chunk while saving a pointer to it so we can do * Authentication later (during cookie-echo * processing). */ if (next_hdr->type == SCTP_CID_COOKIE_ECHO) { chunk->auth_chunk = skb_clone(chunk->skb, GFP_ATOMIC); chunk->auth = 1; continue; } } normal: /* We might have grown an association since last we * looked, so try again. * * This happens when we've just processed our * COOKIE-ECHO chunk. */ if (NULL == chunk->asoc) { asoc = sctp_endpoint_lookup_assoc(ep, sctp_source(chunk), &transport); chunk->asoc = asoc; chunk->transport = transport; } state = asoc ? asoc->state : SCTP_STATE_CLOSED; if (sctp_auth_recv_cid(subtype.chunk, asoc) && !chunk->auth) continue; /* Remember where the last DATA chunk came from so we * know where to send the SACK. */ if (asoc && sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else { SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS); if (asoc) asoc->stats.ictrlchunks++; } if (chunk->transport) chunk->transport->last_time_heard = ktime_get(); error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); if (error && chunk) chunk->pdiscard = 1; /* Check to see if the endpoint is freed in response to * the incoming chunk. If so, get out of the while loop. */ if (!sctp_sk(sk)->ep) break; if (first_time) first_time = 0; } }
3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 // SPDX-License-Identifier: GPL-2.0 /* * super.c * * Copyright (c) 1999 Al Smith * * Portions derived from work (c) 1995,1996 Christian Vogelgsang. */ #include <linux/init.h> #include <linux/module.h> #include <linux/exportfs.h> #include <linux/slab.h> #include <linux/buffer_head.h> #include <linux/vfs.h> #include "efs.h" #include <linux/efs_vh.h> #include <linux/efs_fs_sb.h> static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); static int efs_fill_super(struct super_block *s, void *d, int silent); static struct dentry *efs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super); } static void efs_kill_sb(struct super_block *s) { struct efs_sb_info *sbi = SUPER_INFO(s); kill_block_super(s); kfree(sbi); } static struct file_system_type efs_fs_type = { .owner = THIS_MODULE, .name = "efs", .mount = efs_mount, .kill_sb = efs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("efs"); static struct pt_types sgi_pt_types[] = { {0x00, "SGI vh"}, {0x01, "SGI trkrepl"}, {0x02, "SGI secrepl"}, {0x03, "SGI raw"}, {0x04, "SGI bsd"}, {SGI_SYSV, "SGI sysv"}, {0x06, "SGI vol"}, {SGI_EFS, "SGI efs"}, {0x08, "SGI lv"}, {0x09, "SGI rlv"}, {0x0A, "SGI xfs"}, {0x0B, "SGI xfslog"}, {0x0C, "SGI xlv"}, {0x82, "Linux swap"}, {0x83, "Linux native"}, {0, NULL} }; static struct kmem_cache * efs_inode_cachep; static struct inode *efs_alloc_inode(struct super_block *sb) { struct efs_inode_info *ei; ei = kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; } static void efs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } static void efs_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, efs_i_callback); } static void init_once(void *foo) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; inode_init_once(&ei->vfs_inode); } static int __init init_inodecache(void) { efs_inode_cachep = kmem_cache_create("efs_inode_cache", sizeof(struct efs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| SLAB_ACCOUNT, init_once); if (efs_inode_cachep == NULL) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(efs_inode_cachep); } static int efs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); *flags |= SB_RDONLY; return 0; } static const struct super_operations efs_superblock_operations = { .alloc_inode = efs_alloc_inode, .destroy_inode = efs_destroy_inode, .statfs = efs_statfs, .remount_fs = efs_remount, }; static const struct export_operations efs_export_ops = { .fh_to_dentry = efs_fh_to_dentry, .fh_to_parent = efs_fh_to_parent, .get_parent = efs_get_parent, }; static int __init init_efs_fs(void) { int err; pr_info(EFS_VERSION" - http://aeschi.ch.eu.org/efs/\n"); err = init_inodecache(); if (err) goto out1; err = register_filesystem(&efs_fs_type); if (err) goto out; return 0; out: destroy_inodecache(); out1: return err; } static void __exit exit_efs_fs(void) { unregister_filesystem(&efs_fs_type); destroy_inodecache(); } module_init(init_efs_fs) module_exit(exit_efs_fs) static efs_block_t efs_validate_vh(struct volume_header *vh) { int i; __be32 cs, *ui; int csum; efs_block_t sblock = 0; /* shuts up gcc */ struct pt_types *pt_entry; int pt_type, slice = -1; if (be32_to_cpu(vh->vh_magic) != VHMAGIC) { /* * assume that we're dealing with a partition and allow * read_super() to try and detect a valid superblock * on the next block. */ return 0; } ui = ((__be32 *) (vh + 1)) - 1; for(csum = 0; ui >= ((__be32 *) vh);) { cs = *ui--; csum += be32_to_cpu(cs); } if (csum) { pr_warn("SGI disklabel: checksum bad, label corrupted\n"); return 0; } #ifdef DEBUG pr_debug("bf: \"%16s\"\n", vh->vh_bootfile); for(i = 0; i < NVDIR; i++) { int j; char name[VDNAMESIZE+1]; for(j = 0; j < VDNAMESIZE; j++) { name[j] = vh->vh_vd[i].vd_name[j]; } name[j] = (char) 0; if (name[0]) { pr_debug("vh: %8s block: 0x%08x size: 0x%08x\n", name, (int) be32_to_cpu(vh->vh_vd[i].vd_lbn), (int) be32_to_cpu(vh->vh_vd[i].vd_nbytes)); } } #endif for(i = 0; i < NPARTAB; i++) { pt_type = (int) be32_to_cpu(vh->vh_pt[i].pt_type); for(pt_entry = sgi_pt_types; pt_entry->pt_name; pt_entry++) { if (pt_type == pt_entry->pt_type) break; } #ifdef DEBUG if (be32_to_cpu(vh->vh_pt[i].pt_nblks)) { pr_debug("pt %2d: start: %08d size: %08d type: 0x%02x (%s)\n", i, (int)be32_to_cpu(vh->vh_pt[i].pt_firstlbn), (int)be32_to_cpu(vh->vh_pt[i].pt_nblks), pt_type, (pt_entry->pt_name) ? pt_entry->pt_name : "unknown"); } #endif if (IS_EFS(pt_type)) { sblock = be32_to_cpu(vh->vh_pt[i].pt_firstlbn); slice = i; } } if (slice == -1) { pr_notice("partition table contained no EFS partitions\n"); #ifdef DEBUG } else { pr_info("using slice %d (type %s, offset 0x%x)\n", slice, (pt_entry->pt_name) ? pt_entry->pt_name : "unknown", sblock); #endif } return sblock; } static int efs_validate_super(struct efs_sb_info *sb, struct efs_super *super) { if (!IS_EFS_MAGIC(be32_to_cpu(super->fs_magic))) return -1; sb->fs_magic = be32_to_cpu(super->fs_magic); sb->total_blocks = be32_to_cpu(super->fs_size); sb->first_block = be32_to_cpu(super->fs_firstcg); sb->group_size = be32_to_cpu(super->fs_cgfsize); sb->data_free = be32_to_cpu(super->fs_tfree); sb->inode_free = be32_to_cpu(super->fs_tinode); sb->inode_blocks = be16_to_cpu(super->fs_cgisize); sb->total_groups = be16_to_cpu(super->fs_ncg); return 0; } static int efs_fill_super(struct super_block *s, void *d, int silent) { struct efs_sb_info *sb; struct buffer_head *bh; struct inode *root; sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL); if (!sb) return -ENOMEM; s->s_fs_info = sb; s->s_magic = EFS_SUPER_MAGIC; if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) { pr_err("device does not support %d byte blocks\n", EFS_BLOCKSIZE); return -EINVAL; } /* read the vh (volume header) block */ bh = sb_bread(s, 0); if (!bh) { pr_err("cannot read volume header\n"); return -EIO; } /* * if this returns zero then we didn't find any partition table. * this isn't (yet) an error - just assume for the moment that * the device is valid and go on to search for a superblock. */ sb->fs_start = efs_validate_vh((struct volume_header *) bh->b_data); brelse(bh); if (sb->fs_start == -1) { return -EINVAL; } bh = sb_bread(s, sb->fs_start + EFS_SUPER); if (!bh) { pr_err("cannot read superblock\n"); return -EIO; } if (efs_validate_super(sb, (struct efs_super *) bh->b_data)) { #ifdef DEBUG pr_warn("invalid superblock at block %u\n", sb->fs_start + EFS_SUPER); #endif brelse(bh); return -EINVAL; } brelse(bh); if (!sb_rdonly(s)) { #ifdef DEBUG pr_info("forcing read-only mode\n"); #endif s->s_flags |= SB_RDONLY; } s->s_op = &efs_superblock_operations; s->s_export_op = &efs_export_ops; root = efs_iget(s, EFS_ROOTINODE); if (IS_ERR(root)) { pr_err("get root inode failed\n"); return PTR_ERR(root); } s->s_root = d_make_root(root); if (!(s->s_root)) { pr_err("get root dentry failed\n"); return -ENOMEM; } return 0; } static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct efs_sb_info *sbi = SUPER_INFO(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = EFS_SUPER_MAGIC; /* efs magic number */ buf->f_bsize = EFS_BLOCKSIZE; /* blocksize */ buf->f_blocks = sbi->total_groups * /* total data blocks */ (sbi->group_size - sbi->inode_blocks); buf->f_bfree = sbi->data_free; /* free data blocks */ buf->f_bavail = sbi->data_free; /* free blocks for non-root */ buf->f_files = sbi->total_groups * /* total inodes */ sbi->inode_blocks * (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); buf->f_ffree = sbi->inode_free; /* free inodes */ buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ return 0; }
52 52 52 52 51 52 55 55 55 55 25 29 29 29 40 40 40 40 40 40 40 40 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 /* * Core registration and callback routines for MTD * drivers and users. * * Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org> * Copyright © 2006 Red Hat UK Limited * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/ptrace.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/timer.h> #include <linux/major.h> #include <linux/fs.h> #include <linux/err.h> #include <linux/ioctl.h> #include <linux/init.h> #include <linux/of.h> #include <linux/proc_fs.h> #include <linux/idr.h> #include <linux/backing-dev.h> #include <linux/gfp.h> #include <linux/slab.h> #include <linux/reboot.h> #include <linux/leds.h> #include <linux/debugfs.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include "mtdcore.h" struct backing_dev_info *mtd_bdi; #ifdef CONFIG_PM_SLEEP static int mtd_cls_suspend(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); return mtd ? mtd_suspend(mtd) : 0; } static int mtd_cls_resume(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); if (mtd) mtd_resume(mtd); return 0; } static SIMPLE_DEV_PM_OPS(mtd_cls_pm_ops, mtd_cls_suspend, mtd_cls_resume); #define MTD_CLS_PM_OPS (&mtd_cls_pm_ops) #else #define MTD_CLS_PM_OPS NULL #endif static struct class mtd_class = { .name = "mtd", .owner = THIS_MODULE, .pm = MTD_CLS_PM_OPS, }; static DEFINE_IDR(mtd_idr); /* These are exported solely for the purpose of mtd_blkdevs.c. You should not use them for _anything_ else */ DEFINE_MUTEX(mtd_table_mutex); EXPORT_SYMBOL_GPL(mtd_table_mutex); struct mtd_info *__mtd_next_device(int i) { return idr_get_next(&mtd_idr, &i); } EXPORT_SYMBOL_GPL(__mtd_next_device); static LIST_HEAD(mtd_notifiers); #define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2) /* REVISIT once MTD uses the driver model better, whoever allocates * the mtd_info will probably want to use the release() hook... */ static void mtd_release(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); dev_t index = MTD_DEVT(mtd->index); /* remove /dev/mtdXro node */ device_destroy(&mtd_class, index + 1); } static ssize_t mtd_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); char *type; switch (mtd->type) { case MTD_ABSENT: type = "absent"; break; case MTD_RAM: type = "ram"; break; case MTD_ROM: type = "rom"; break; case MTD_NORFLASH: type = "nor"; break; case MTD_NANDFLASH: type = "nand"; break; case MTD_DATAFLASH: type = "dataflash"; break; case MTD_UBIVOLUME: type = "ubi"; break; case MTD_MLCNANDFLASH: type = "mlc-nand"; break; default: type = "unknown"; } return snprintf(buf, PAGE_SIZE, "%s\n", type); } static DEVICE_ATTR(type, S_IRUGO, mtd_type_show, NULL); static ssize_t mtd_flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)mtd->flags); } static DEVICE_ATTR(flags, S_IRUGO, mtd_flags_show, NULL); static ssize_t mtd_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)mtd->size); } static DEVICE_ATTR(size, S_IRUGO, mtd_size_show, NULL); static ssize_t mtd_erasesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->erasesize); } static DEVICE_ATTR(erasesize, S_IRUGO, mtd_erasesize_show, NULL); static ssize_t mtd_writesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->writesize); } static DEVICE_ATTR(writesize, S_IRUGO, mtd_writesize_show, NULL); static ssize_t mtd_subpagesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); unsigned int subpagesize = mtd->writesize >> mtd->subpage_sft; return snprintf(buf, PAGE_SIZE, "%u\n", subpagesize); } static DEVICE_ATTR(subpagesize, S_IRUGO, mtd_subpagesize_show, NULL); static ssize_t mtd_oobsize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->oobsize); } static DEVICE_ATTR(oobsize, S_IRUGO, mtd_oobsize_show, NULL); static ssize_t mtd_oobavail_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%u\n", mtd->oobavail); } static DEVICE_ATTR(oobavail, S_IRUGO, mtd_oobavail_show, NULL); static ssize_t mtd_numeraseregions_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%u\n", mtd->numeraseregions); } static DEVICE_ATTR(numeraseregions, S_IRUGO, mtd_numeraseregions_show, NULL); static ssize_t mtd_name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%s\n", mtd->name); } static DEVICE_ATTR(name, S_IRUGO, mtd_name_show, NULL); static ssize_t mtd_ecc_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%u\n", mtd->ecc_strength); } static DEVICE_ATTR(ecc_strength, S_IRUGO, mtd_ecc_strength_show, NULL); static ssize_t mtd_bitflip_threshold_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%u\n", mtd->bitflip_threshold); } static ssize_t mtd_bitflip_threshold_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mtd_info *mtd = dev_get_drvdata(dev); unsigned int bitflip_threshold; int retval; retval = kstrtouint(buf, 0, &bitflip_threshold); if (retval) return retval; mtd->bitflip_threshold = bitflip_threshold; return count; } static DEVICE_ATTR(bitflip_threshold, S_IRUGO | S_IWUSR, mtd_bitflip_threshold_show, mtd_bitflip_threshold_store); static ssize_t mtd_ecc_step_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return snprintf(buf, PAGE_SIZE, "%u\n", mtd->ecc_step_size); } static DEVICE_ATTR(ecc_step_size, S_IRUGO, mtd_ecc_step_size_show, NULL); static ssize_t mtd_ecc_stats_corrected_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stats->corrected); } static DEVICE_ATTR(corrected_bits, S_IRUGO, mtd_ecc_stats_corrected_show, NULL); static ssize_t mtd_ecc_stats_errors_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stats->failed); } static DEVICE_ATTR(ecc_failures, S_IRUGO, mtd_ecc_stats_errors_show, NULL); static ssize_t mtd_badblocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stats->badblocks); } static DEVICE_ATTR(bad_blocks, S_IRUGO, mtd_badblocks_show, NULL); static ssize_t mtd_bbtblocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stats->bbtblocks); } static DEVICE_ATTR(bbt_blocks, S_IRUGO, mtd_bbtblocks_show, NULL); static struct attribute *mtd_attrs[] = { &dev_attr_type.attr, &dev_attr_flags.attr, &dev_attr_size.attr, &dev_attr_erasesize.attr, &dev_attr_writesize.attr, &dev_attr_subpagesize.attr, &dev_attr_oobsize.attr, &dev_attr_oobavail.attr, &dev_attr_numeraseregions.attr, &dev_attr_name.attr, &dev_attr_ecc_strength.attr, &dev_attr_ecc_step_size.attr, &dev_attr_corrected_bits.attr, &dev_attr_ecc_failures.attr, &dev_attr_bad_blocks.attr, &dev_attr_bbt_blocks.attr, &dev_attr_bitflip_threshold.attr, NULL, }; ATTRIBUTE_GROUPS(mtd); static const struct device_type mtd_devtype = { .name = "mtd", .groups = mtd_groups, .release = mtd_release, }; #ifndef CONFIG_MMU unsigned mtd_mmap_capabilities(struct mtd_info *mtd) { switch (mtd->type) { case MTD_RAM: return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_EXEC | NOMMU_MAP_READ | NOMMU_MAP_WRITE; case MTD_ROM: return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_EXEC | NOMMU_MAP_READ; default: return NOMMU_MAP_COPY; } } EXPORT_SYMBOL_GPL(mtd_mmap_capabilities); #endif static int mtd_reboot_notifier(struct notifier_block *n, unsigned long state, void *cmd) { struct mtd_info *mtd; mtd = container_of(n, struct mtd_info, reboot_notifier); mtd->_reboot(mtd); return NOTIFY_DONE; } /** * mtd_wunit_to_pairing_info - get pairing information of a wunit * @mtd: pointer to new MTD device info structure * @wunit: write unit we are interested in * @info: returned pairing information * * Retrieve pairing information associated to the wunit. * This is mainly useful when dealing with MLC/TLC NANDs where pages can be * paired together, and where programming a page may influence the page it is * paired with. * The notion of page is replaced by the term wunit (write-unit) to stay * consistent with the ->writesize field. * * The @wunit argument can be extracted from an absolute offset using * mtd_offset_to_wunit(). @info is filled with the pairing information attached * to @wunit. * * From the pairing info the MTD user can find all the wunits paired with * @wunit using the following loop: * * for (i = 0; i < mtd_pairing_groups(mtd); i++) { * info.pair = i; * mtd_pairing_info_to_wunit(mtd, &info); * ... * } */ int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit, struct mtd_pairing_info *info) { int npairs = mtd_wunit_per_eb(mtd) / mtd_pairing_groups(mtd); if (wunit < 0 || wunit >= npairs) return -EINVAL; if (mtd->pairing && mtd->pairing->get_info) return mtd->pairing->get_info(mtd, wunit, info); info->group = 0; info->pair = wunit; return 0; } EXPORT_SYMBOL_GPL(mtd_wunit_to_pairing_info); /** * mtd_pairing_info_to_wunit - get wunit from pairing information * @mtd: pointer to new MTD device info structure * @info: pairing information struct * * Returns a positive number representing the wunit associated to the info * struct, or a negative error code. * * This is the reverse of mtd_wunit_to_pairing_info(), and can help one to * iterate over all wunits of a given pair (see mtd_wunit_to_pairing_info() * doc). * * It can also be used to only program the first page of each pair (i.e. * page attached to group 0), which allows one to use an MLC NAND in * software-emulated SLC mode: * * info.group = 0; * npairs = mtd_wunit_per_eb(mtd) / mtd_pairing_groups(mtd); * for (info.pair = 0; info.pair < npairs; info.pair++) { * wunit = mtd_pairing_info_to_wunit(mtd, &info); * mtd_write(mtd, mtd_wunit_to_offset(mtd, blkoffs, wunit), * mtd->writesize, &retlen, buf + (i * mtd->writesize)); * } */ int mtd_pairing_info_to_wunit(struct mtd_info *mtd, const struct mtd_pairing_info *info) { int ngroups = mtd_pairing_groups(mtd); int npairs = mtd_wunit_per_eb(mtd) / ngroups; if (!info || info->pair < 0 || info->pair >= npairs || info->group < 0 || info->group >= ngroups) return -EINVAL; if (mtd->pairing && mtd->pairing->get_wunit) return mtd->pairing->get_wunit(mtd, info); return info->pair; } EXPORT_SYMBOL_GPL(mtd_pairing_info_to_wunit); /** * mtd_pairing_groups - get the number of pairing groups * @mtd: pointer to new MTD device info structure * * Returns the number of pairing groups. * * This number is usually equal to the number of bits exposed by a single * cell, and can be used in conjunction with mtd_pairing_info_to_wunit() * to iterate over all pages of a given pair. */ int mtd_pairing_groups(struct mtd_info *mtd) { if (!mtd->pairing || !mtd->pairing->ngroups) return 1; return mtd->pairing->ngroups; } EXPORT_SYMBOL_GPL(mtd_pairing_groups); static struct dentry *dfs_dir_mtd; /** * add_mtd_device - register an MTD device * @mtd: pointer to new MTD device info structure * * Add a device to the list of MTD devices present in the system, and * notify each currently active MTD 'user' of its arrival. Returns * zero on success or non-zero on failure. */ int add_mtd_device(struct mtd_info *mtd) { struct mtd_notifier *not; int i, error; /* * May occur, for instance, on buggy drivers which call * mtd_device_parse_register() multiple times on the same master MTD, * especially with CONFIG_MTD_PARTITIONED_MASTER=y. */ if (WARN_ONCE(mtd->dev.type, "MTD already registered\n")) return -EEXIST; BUG_ON(mtd->writesize == 0); if (WARN_ON((!mtd->erasesize || !mtd->_erase) && !(mtd->flags & MTD_NO_ERASE))) return -EINVAL; mutex_lock(&mtd_table_mutex); i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); if (i < 0) { error = i; goto fail_locked; } mtd->index = i; mtd->usecount = 0; /* default value if not set by driver */ if (mtd->bitflip_threshold == 0) mtd->bitflip_threshold = mtd->ecc_strength; if (is_power_of_2(mtd->erasesize)) mtd->erasesize_shift = ffs(mtd->erasesize) - 1; else mtd->erasesize_shift = 0; if (is_power_of_2(mtd->writesize)) mtd->writesize_shift = ffs(mtd->writesize) - 1; else mtd->writesize_shift = 0; mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1; mtd->writesize_mask = (1 << mtd->writesize_shift) - 1; /* Some chips always power up locked. Unlock them now */ if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) { error = mtd_unlock(mtd, 0, mtd->size); if (error && error != -EOPNOTSUPP) printk(KERN_WARNING "%s: unlock failed, writes may not work\n", mtd->name); /* Ignore unlock failures? */ error = 0; } /* Caller should have set dev.parent to match the * physical device, if appropriate. */ mtd->dev.type = &mtd_devtype; mtd->dev.class = &mtd_class; mtd->dev.devt = MTD_DEVT(i); dev_set_name(&mtd->dev, "mtd%d", i); dev_set_drvdata(&mtd->dev, mtd); of_node_get(mtd_get_of_node(mtd)); error = device_register(&mtd->dev); if (error) goto fail_added; if (!IS_ERR_OR_NULL(dfs_dir_mtd)) { mtd->dbg.dfs_dir = debugfs_create_dir(dev_name(&mtd->dev), dfs_dir_mtd); if (IS_ERR_OR_NULL(mtd->dbg.dfs_dir)) { pr_debug("mtd device %s won't show data in debugfs\n", dev_name(&mtd->dev)); } } device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, "mtd%dro", i); pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) not->add(mtd); mutex_unlock(&mtd_table_mutex); /* We _know_ we aren't being removed, because our caller is still holding us here. So none of this try_ nonsense, and no bitching about it either. :) */ __module_get(THIS_MODULE); return 0; fail_added: of_node_put(mtd_get_of_node(mtd)); idr_remove(&mtd_idr, i); fail_locked: mutex_unlock(&mtd_table_mutex); return error; } /** * del_mtd_device - unregister an MTD device * @mtd: pointer to MTD device info structure * * Remove a device from the list of MTD devices present in the system, * and notify each currently active MTD 'user' of its departure. * Returns zero on success or 1 on failure, which currently will happen * if the requested device does not appear to be present in the list. */ int del_mtd_device(struct mtd_info *mtd) { int ret; struct mtd_notifier *not; mutex_lock(&mtd_table_mutex); debugfs_remove_recursive(mtd->dbg.dfs_dir); if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; } /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) not->remove(mtd); if (mtd->usecount) { printk(KERN_NOTICE "Removing MTD device #%d (%s) with use count %d\n", mtd->index, mtd->name, mtd->usecount); ret = -EBUSY; } else { device_unregister(&mtd->dev); idr_remove(&mtd_idr, mtd->index); of_node_put(mtd_get_of_node(mtd)); module_put(THIS_MODULE); ret = 0; } out_error: mutex_unlock(&mtd_table_mutex); return ret; } /* * Set a few defaults based on the parent devices, if not provided by the * driver */ static void mtd_set_dev_defaults(struct mtd_info *mtd) { if (mtd->dev.parent) { if (!mtd->owner && mtd->dev.parent->driver) mtd->owner = mtd->dev.parent->driver->owner; if (!mtd->name) mtd->name = dev_name(mtd->dev.parent); } else { pr_debug("mtd device won't show a device symlink in sysfs\n"); } } /** * mtd_device_parse_register - parse partitions and register an MTD device. * * @mtd: the MTD device to register * @types: the list of MTD partition probes to try, see * 'parse_mtd_partitions()' for more information * @parser_data: MTD partition parser-specific data * @parts: fallback partition information to register, if parsing fails; * only valid if %nr_parts > %0 * @nr_parts: the number of partitions in parts, if zero then the full * MTD device is registered if no partition info is found * * This function aggregates MTD partitions parsing (done by * 'parse_mtd_partitions()') and MTD device and partitions registering. It * basically follows the most common pattern found in many MTD drivers: * * * If the MTD_PARTITIONED_MASTER option is set, then the device as a whole is * registered first. * * Then It tries to probe partitions on MTD device @mtd using parsers * specified in @types (if @types is %NULL, then the default list of parsers * is used, see 'parse_mtd_partitions()' for more information). If none are * found this functions tries to fallback to information specified in * @parts/@nr_parts. * * If no partitions were found this function just registers the MTD device * @mtd and exits. * * Returns zero in case of success and a negative error code in case of failure. */ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, struct mtd_part_parser_data *parser_data, const struct mtd_partition *parts, int nr_parts) { int ret; mtd_set_dev_defaults(mtd); if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) { ret = add_mtd_device(mtd); if (ret) return ret; } /* Prefer parsed partitions over driver-provided fallback */ ret = parse_mtd_partitions(mtd, types, parser_data); if (ret == -EPROBE_DEFER) goto out; if (ret > 0) ret = 0; else if (nr_parts) ret = add_mtd_partitions(mtd, parts, nr_parts); else if (!device_is_registered(&mtd->dev)) ret = add_mtd_device(mtd); else ret = 0; if (ret) goto out; /* * FIXME: some drivers unfortunately call this function more than once. * So we have to check if we've already assigned the reboot notifier. * * Generally, we can make multiple calls work for most cases, but it * does cause problems with parse_mtd_partitions() above (e.g., * cmdlineparts will register partitions more than once). */ WARN_ONCE(mtd->_reboot && mtd->reboot_notifier.notifier_call, "MTD already registered\n"); if (mtd->_reboot && !mtd->reboot_notifier.notifier_call) { mtd->reboot_notifier.notifier_call = mtd_reboot_notifier; register_reboot_notifier(&mtd->reboot_notifier); } out: if (ret && device_is_registered(&mtd->dev)) del_mtd_device(mtd); return ret; } EXPORT_SYMBOL_GPL(mtd_device_parse_register); /** * mtd_device_unregister - unregister an existing MTD device. * * @master: the MTD device to unregister. This will unregister both the master * and any partitions if registered. */ int mtd_device_unregister(struct mtd_info *master) { int err; if (master->_reboot) unregister_reboot_notifier(&master->reboot_notifier); err = del_mtd_partitions(master); if (err) return err; if (!device_is_registered(&master->dev)) return 0; return del_mtd_device(master); } EXPORT_SYMBOL_GPL(mtd_device_unregister); /** * register_mtd_user - register a 'user' of MTD devices. * @new: pointer to notifier info structure * * Registers a pair of callbacks function to be called upon addition * or removal of MTD devices. Causes the 'add' callback to be immediately * invoked for each MTD device currently present in the system. */ void register_mtd_user (struct mtd_notifier *new) { struct mtd_info *mtd; mutex_lock(&mtd_table_mutex); list_add(&new->list, &mtd_notifiers); __module_get(THIS_MODULE); mtd_for_each_device(mtd) new->add(mtd); mutex_unlock(&mtd_table_mutex); } EXPORT_SYMBOL_GPL(register_mtd_user); /** * unregister_mtd_user - unregister a 'user' of MTD devices. * @old: pointer to notifier info structure * * Removes a callback function pair from the list of 'users' to be * notified upon addition or removal of MTD devices. Causes the * 'remove' callback to be immediately invoked for each MTD device * currently present in the system. */ int unregister_mtd_user (struct mtd_notifier *old) { struct mtd_info *mtd; mutex_lock(&mtd_table_mutex); module_put(THIS_MODULE); mtd_for_each_device(mtd) old->remove(mtd); list_del(&old->list); mutex_unlock(&mtd_table_mutex); return 0; } EXPORT_SYMBOL_GPL(unregister_mtd_user); /** * get_mtd_device - obtain a validated handle for an MTD device * @mtd: last known address of the required MTD device * @num: internal device number of the required MTD device * * Given a number and NULL address, return the num'th entry in the device * table, if any. Given an address and num == -1, search the device table * for a device with that address and return if it's still present. Given * both, return the num'th driver only if its address matches. Return * error code if not. */ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num) { struct mtd_info *ret = NULL, *other; int err = -ENODEV; mutex_lock(&mtd_table_mutex); if (num == -1) { mtd_for_each_device(other) { if (other == mtd) { ret = mtd; break; } } } else if (num >= 0) { ret = idr_find(&mtd_idr, num); if (mtd && mtd != ret) ret = NULL; } if (!ret) { ret = ERR_PTR(err); goto out; } err = __get_mtd_device(ret); if (err) ret = ERR_PTR(err); out: mutex_unlock(&mtd_table_mutex); return ret; } EXPORT_SYMBOL_GPL(get_mtd_device); int __get_mtd_device(struct mtd_info *mtd) { int err; if (!try_module_get(mtd->owner)) return -ENODEV; if (mtd->_get_device) { err = mtd->_get_device(mtd); if (err) { module_put(mtd->owner); return err; } } mtd->usecount++; return 0; } EXPORT_SYMBOL_GPL(__get_mtd_device); /** * get_mtd_device_nm - obtain a validated handle for an MTD device by * device name * @name: MTD device name to open * * This function returns MTD device description structure in case of * success and an error code in case of failure. */ struct mtd_info *get_mtd_device_nm(const char *name) { int err = -ENODEV; struct mtd_info *mtd = NULL, *other; mutex_lock(&mtd_table_mutex); mtd_for_each_device(other) { if (!strcmp(name, other->name)) { mtd = other; break; } } if (!mtd) goto out_unlock; err = __get_mtd_device(mtd); if (err) goto out_unlock; mutex_unlock(&mtd_table_mutex); return mtd; out_unlock: mutex_unlock(&mtd_table_mutex); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(get_mtd_device_nm); void put_mtd_device(struct mtd_info *mtd) { mutex_lock(&mtd_table_mutex); __put_mtd_device(mtd); mutex_unlock(&mtd_table_mutex); } EXPORT_SYMBOL_GPL(put_mtd_device); void __put_mtd_device(struct mtd_info *mtd) { --mtd->usecount; BUG_ON(mtd->usecount < 0); if (mtd->_put_device) mtd->_put_device(mtd); module_put(mtd->owner); } EXPORT_SYMBOL_GPL(__put_mtd_device); /* * Erase is an synchronous operation. Device drivers are epected to return a * negative error code if the operation failed and update instr->fail_addr * to point the portion that was not properly erased. */ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) { instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; if (!mtd->erasesize || !mtd->_erase) return -ENOTSUPP; if (instr->addr >= mtd->size || instr->len > mtd->size - instr->addr) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!instr->len) return 0; ledtrig_mtd_activity(); return mtd->_erase(mtd, instr); } EXPORT_SYMBOL_GPL(mtd_erase); /* * This stuff for eXecute-In-Place. phys is optional and may be set to NULL. */ int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys) { *retlen = 0; *virt = NULL; if (phys) *phys = 0; if (!mtd->_point) return -EOPNOTSUPP; if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; return mtd->_point(mtd, from, len, retlen, virt, phys); } EXPORT_SYMBOL_GPL(mtd_point); /* We probably shouldn't allow XIP if the unpoint isn't a NULL */ int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) { if (!mtd->_unpoint) return -EOPNOTSUPP; if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; return mtd->_unpoint(mtd, from, len); } EXPORT_SYMBOL_GPL(mtd_unpoint); /* * Allow NOMMU mmap() to directly map the device (if not NULL) * - return the address to which the offset maps * - return -ENOSYS to indicate refusal to do the mapping */ unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, unsigned long len, unsigned long offset, unsigned long flags) { size_t retlen; void *virt; int ret; ret = mtd_point(mtd, offset, len, &retlen, &virt, NULL); if (ret) return ret; if (retlen != len) { mtd_unpoint(mtd, offset, retlen); return -ENOSYS; } return (unsigned long)virt; } EXPORT_SYMBOL_GPL(mtd_get_unmapped_area); int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { int ret_code; *retlen = 0; if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; ledtrig_mtd_activity(); /* * In the absence of an error, drivers return a non-negative integer * representing the maximum number of bitflips that were corrected on * any one ecc region (if applicable; zero otherwise). */ if (mtd->_read) { ret_code = mtd->_read(mtd, from, len, retlen, buf); } else if (mtd->_read_oob) { struct mtd_oob_ops ops = { .len = len, .datbuf = buf, }; ret_code = mtd->_read_oob(mtd, from, &ops); *retlen = ops.retlen; } else { return -ENOTSUPP; } if (unlikely(ret_code < 0)) return ret_code; if (mtd->ecc_strength == 0) return 0; /* device lacks ecc */ return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0; } EXPORT_SYMBOL_GPL(mtd_read); int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { *retlen = 0; if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if ((!mtd->_write && !mtd->_write_oob) || !(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!len) return 0; ledtrig_mtd_activity(); if (!mtd->_write) { struct mtd_oob_ops ops = { .len = len, .datbuf = (u8 *)buf, }; int ret; ret = mtd->_write_oob(mtd, to, &ops); *retlen = ops.retlen; return ret; } return mtd->_write(mtd, to, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_write); /* * In blackbox flight recorder like scenarios we want to make successful writes * in interrupt context. panic_write() is only intended to be called when its * known the kernel is about to panic and we need the write to succeed. Since * the kernel is not going to be running for much longer, this function can * break locks and delay to ensure the write succeeds (but not sleep). */ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { *retlen = 0; if (!mtd->_panic_write) return -EOPNOTSUPP; if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!len) return 0; return mtd->_panic_write(mtd, to, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_panic_write); static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs, struct mtd_oob_ops *ops) { /* * Some users are setting ->datbuf or ->oobbuf to NULL, but are leaving * ->len or ->ooblen uninitialized. Force ->len and ->ooblen to 0 in * this case. */ if (!ops->datbuf) ops->len = 0; if (!ops->oobbuf) ops->ooblen = 0; if (offs < 0 || offs + ops->len > mtd->size) return -EINVAL; if (ops->ooblen) { u64 maxooblen; if (ops->ooboffs >= mtd_oobavail(mtd, ops)) return -EINVAL; maxooblen = ((mtd_div_by_ws(mtd->size, mtd) - mtd_div_by_ws(offs, mtd)) * mtd_oobavail(mtd, ops)) - ops->ooboffs; if (ops->ooblen > maxooblen) return -EINVAL; } return 0; } int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { int ret_code; ops->retlen = ops->oobretlen = 0; ret_code = mtd_check_oob_ops(mtd, from, ops); if (ret_code) return ret_code; ledtrig_mtd_activity(); /* Check the validity of a potential fallback on mtd->_read */ if (!mtd->_read_oob && (!mtd->_read || ops->oobbuf)) return -EOPNOTSUPP; if (mtd->_read_oob) ret_code = mtd->_read_oob(mtd, from, ops); else ret_code = mtd->_read(mtd, from, ops->len, &ops->retlen, ops->datbuf); /* * In cases where ops->datbuf != NULL, mtd->_read_oob() has semantics * similar to mtd->_read(), returning a non-negative integer * representing max bitflips. In other cases, mtd->_read_oob() may * return -EUCLEAN. In all cases, perform similar logic to mtd_read(). */ if (unlikely(ret_code < 0)) return ret_code; if (mtd->ecc_strength == 0) return 0; /* device lacks ecc */ return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0; } EXPORT_SYMBOL_GPL(mtd_read_oob); int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { int ret; ops->retlen = ops->oobretlen = 0; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; ret = mtd_check_oob_ops(mtd, to, ops); if (ret) return ret; ledtrig_mtd_activity(); /* Check the validity of a potential fallback on mtd->_write */ if (!mtd->_write_oob && (!mtd->_write || ops->oobbuf)) return -EOPNOTSUPP; if (mtd->_write_oob) return mtd->_write_oob(mtd, to, ops); else return mtd->_write(mtd, to, ops->len, &ops->retlen, ops->datbuf); } EXPORT_SYMBOL_GPL(mtd_write_oob); /** * mtd_ooblayout_ecc - Get the OOB region definition of a specific ECC section * @mtd: MTD device structure * @section: ECC section. Depending on the layout you may have all the ECC * bytes stored in a single contiguous section, or one section * per ECC chunk (and sometime several sections for a single ECC * ECC chunk) * @oobecc: OOB region struct filled with the appropriate ECC position * information * * This function returns ECC section information in the OOB area. If you want * to get all the ECC bytes information, then you should call * mtd_ooblayout_ecc(mtd, section++, oobecc) until it returns -ERANGE. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *oobecc) { memset(oobecc, 0, sizeof(*oobecc)); if (!mtd || section < 0) return -EINVAL; if (!mtd->ooblayout || !mtd->ooblayout->ecc) return -ENOTSUPP; return mtd->ooblayout->ecc(mtd, section, oobecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_ecc); /** * mtd_ooblayout_free - Get the OOB region definition of a specific free * section * @mtd: MTD device structure * @section: Free section you are interested in. Depending on the layout * you may have all the free bytes stored in a single contiguous * section, or one section per ECC chunk plus an extra section * for the remaining bytes (or other funky layout). * @oobfree: OOB region struct filled with the appropriate free position * information * * This function returns free bytes position in the OOB area. If you want * to get all the free bytes information, then you should call * mtd_ooblayout_free(mtd, section++, oobfree) until it returns -ERANGE. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_free(struct mtd_info *mtd, int section, struct mtd_oob_region *oobfree) { memset(oobfree, 0, sizeof(*oobfree)); if (!mtd || section < 0) return -EINVAL; if (!mtd->ooblayout || !mtd->ooblayout->free) return -ENOTSUPP; return mtd->ooblayout->free(mtd, section, oobfree); } EXPORT_SYMBOL_GPL(mtd_ooblayout_free); /** * mtd_ooblayout_find_region - Find the region attached to a specific byte * @mtd: mtd info structure * @byte: the byte we are searching for * @sectionp: pointer where the section id will be stored * @oobregion: used to retrieve the ECC position * @iter: iterator function. Should be either mtd_ooblayout_free or * mtd_ooblayout_ecc depending on the region type you're searching for * * This function returns the section id and oobregion information of a * specific byte. For example, say you want to know where the 4th ECC byte is * stored, you'll use: * * mtd_ooblayout_find_region(mtd, 3, &section, &oobregion, mtd_ooblayout_ecc); * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_find_region(struct mtd_info *mtd, int byte, int *sectionp, struct mtd_oob_region *oobregion, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { int pos = 0, ret, section = 0; memset(oobregion, 0, sizeof(*oobregion)); while (1) { ret = iter(mtd, section, oobregion); if (ret) return ret; if (pos + oobregion->length > byte) break; pos += oobregion->length; section++; } /* * Adjust region info to make it start at the beginning at the * 'start' ECC byte. */ oobregion->offset += byte - pos; oobregion->length -= byte - pos; *sectionp = section; return 0; } /** * mtd_ooblayout_find_eccregion - Find the ECC region attached to a specific * ECC byte * @mtd: mtd info structure * @eccbyte: the byte we are searching for * @sectionp: pointer where the section id will be stored * @oobregion: OOB region information * * Works like mtd_ooblayout_find_region() except it searches for a specific ECC * byte. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_find_eccregion(struct mtd_info *mtd, int eccbyte, int *section, struct mtd_oob_region *oobregion) { return mtd_ooblayout_find_region(mtd, eccbyte, section, oobregion, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_find_eccregion); /** * mtd_ooblayout_get_bytes - Extract OOB bytes from the oob buffer * @mtd: mtd info structure * @buf: destination buffer to store OOB bytes * @oobbuf: OOB buffer * @start: first byte to retrieve * @nbytes: number of bytes to retrieve * @iter: section iterator * * Extract bytes attached to a specific category (ECC or free) * from the OOB buffer and copy them into buf. * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_get_bytes(struct mtd_info *mtd, u8 *buf, const u8 *oobbuf, int start, int nbytes, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section, ret; ret = mtd_ooblayout_find_region(mtd, start, &section, &oobregion, iter); while (!ret) { int cnt; cnt = min_t(int, nbytes, oobregion.length); memcpy(buf, oobbuf + oobregion.offset, cnt); buf += cnt; nbytes -= cnt; if (!nbytes) break; ret = iter(mtd, ++section, &oobregion); } return ret; } /** * mtd_ooblayout_set_bytes - put OOB bytes into the oob buffer * @mtd: mtd info structure * @buf: source buffer to get OOB bytes from * @oobbuf: OOB buffer * @start: first OOB byte to set * @nbytes: number of OOB bytes to set * @iter: section iterator * * Fill the OOB buffer with data provided in buf. The category (ECC or free) * is selected by passing the appropriate iterator. * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_set_bytes(struct mtd_info *mtd, const u8 *buf, u8 *oobbuf, int start, int nbytes, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section, ret; ret = mtd_ooblayout_find_region(mtd, start, &section, &oobregion, iter); while (!ret) { int cnt; cnt = min_t(int, nbytes, oobregion.length); memcpy(oobbuf + oobregion.offset, buf, cnt); buf += cnt; nbytes -= cnt; if (!nbytes) break; ret = iter(mtd, ++section, &oobregion); } return ret; } /** * mtd_ooblayout_count_bytes - count the number of bytes in a OOB category * @mtd: mtd info structure * @iter: category iterator * * Count the number of bytes in a given category. * * Returns a positive value on success, a negative error code otherwise. */ static int mtd_ooblayout_count_bytes(struct mtd_info *mtd, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section = 0, ret, nbytes = 0; while (1) { ret = iter(mtd, section++, &oobregion); if (ret) { if (ret == -ERANGE) ret = nbytes; break; } nbytes += oobregion.length; } return ret; } /** * mtd_ooblayout_get_eccbytes - extract ECC bytes from the oob buffer * @mtd: mtd info structure * @eccbuf: destination buffer to store ECC bytes * @oobbuf: OOB buffer * @start: first ECC byte to retrieve * @nbytes: number of ECC bytes to retrieve * * Works like mtd_ooblayout_get_bytes(), except it acts on ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_get_eccbytes(struct mtd_info *mtd, u8 *eccbuf, const u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_get_bytes(mtd, eccbuf, oobbuf, start, nbytes, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_get_eccbytes); /** * mtd_ooblayout_set_eccbytes - set ECC bytes into the oob buffer * @mtd: mtd info structure * @eccbuf: source buffer to get ECC bytes from * @oobbuf: OOB buffer * @start: first ECC byte to set * @nbytes: number of ECC bytes to set * * Works like mtd_ooblayout_set_bytes(), except it acts on ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_set_eccbytes(struct mtd_info *mtd, const u8 *eccbuf, u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_set_bytes(mtd, eccbuf, oobbuf, start, nbytes, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_set_eccbytes); /** * mtd_ooblayout_get_databytes - extract data bytes from the oob buffer * @mtd: mtd info structure * @databuf: destination buffer to store ECC bytes * @oobbuf: OOB buffer * @start: first ECC byte to retrieve * @nbytes: number of ECC bytes to retrieve * * Works like mtd_ooblayout_get_bytes(), except it acts on free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_get_databytes(struct mtd_info *mtd, u8 *databuf, const u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_get_bytes(mtd, databuf, oobbuf, start, nbytes, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_get_databytes); /** * mtd_ooblayout_set_databytes - set data bytes into the oob buffer * @mtd: mtd info structure * @databuf: source buffer to get data bytes from * @oobbuf: OOB buffer * @start: first ECC byte to set * @nbytes: number of ECC bytes to set * * Works like mtd_ooblayout_get_bytes(), except it acts on free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_set_bytes(mtd, databuf, oobbuf, start, nbytes, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_set_databytes); /** * mtd_ooblayout_count_freebytes - count the number of free bytes in OOB * @mtd: mtd info structure * * Works like mtd_ooblayout_count_bytes(), except it count free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_count_freebytes(struct mtd_info *mtd) { return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_count_freebytes); /** * mtd_ooblayout_count_eccbytes - count the number of ECC bytes in OOB * @mtd: mtd info structure * * Works like mtd_ooblayout_count_bytes(), except it count ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd) { return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_count_eccbytes); /* * Method to access the protection register area, present in some flash * devices. The user data is one time programmable but the factory data is read * only. */ int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf) { if (!mtd->_get_fact_prot_info) return -EOPNOTSUPP; if (!len) return 0; return mtd->_get_fact_prot_info(mtd, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_fact_prot_info); int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { *retlen = 0; if (!mtd->_read_fact_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return mtd->_read_fact_prot_reg(mtd, from, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_read_fact_prot_reg); int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf) { if (!mtd->_get_user_prot_info) return -EOPNOTSUPP; if (!len) return 0; return mtd->_get_user_prot_info(mtd, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_user_prot_info); int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { *retlen = 0; if (!mtd->_read_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return mtd->_read_user_prot_reg(mtd, from, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_read_user_prot_reg); int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, u_char *buf) { int ret; *retlen = 0; if (!mtd->_write_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; ret = mtd->_write_user_prot_reg(mtd, to, len, retlen, buf); if (ret) return ret; /* * If no data could be written at all, we are out of memory and * must return -ENOSPC. */ return (*retlen) ? 0 : -ENOSPC; } EXPORT_SYMBOL_GPL(mtd_write_user_prot_reg); int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { if (!mtd->_lock_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return mtd->_lock_user_prot_reg(mtd, from, len); } EXPORT_SYMBOL_GPL(mtd_lock_user_prot_reg); /* Chip-supported device locking */ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_lock) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; return mtd->_lock(mtd, ofs, len); } EXPORT_SYMBOL_GPL(mtd_lock); int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_unlock) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; return mtd->_unlock(mtd, ofs, len); } EXPORT_SYMBOL_GPL(mtd_unlock); int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_is_locked) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; return mtd->_is_locked(mtd, ofs, len); } EXPORT_SYMBOL_GPL(mtd_is_locked); int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs) { if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!mtd->_block_isreserved) return 0; return mtd->_block_isreserved(mtd, ofs); } EXPORT_SYMBOL_GPL(mtd_block_isreserved); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) { if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!mtd->_block_isbad) return 0; return mtd->_block_isbad(mtd, ofs); } EXPORT_SYMBOL_GPL(mtd_block_isbad); int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) { if (!mtd->_block_markbad) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; return mtd->_block_markbad(mtd, ofs); } EXPORT_SYMBOL_GPL(mtd_block_markbad); /* * default_mtd_writev - the default writev method * @mtd: mtd device description object pointer * @vecs: the vectors to write * @count: count of vectors in @vecs * @to: the MTD device offset to write to * @retlen: on exit contains the count of bytes written to the MTD device. * * This function returns zero in case of success and a negative error code in * case of failure. */ static int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen) { unsigned long i; size_t totlen = 0, thislen; int ret = 0; for (i = 0; i < count; i++) { if (!vecs[i].iov_len) continue; ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base); totlen += thislen; if (ret || thislen != vecs[i].iov_len) break; to += vecs[i].iov_len; } *retlen = totlen; return ret; } /* * mtd_writev - the vector-based MTD write method * @mtd: mtd device description object pointer * @vecs: the vectors to write * @count: count of vectors in @vecs * @to: the MTD device offset to write to * @retlen: on exit contains the count of bytes written to the MTD device. * * This function returns zero in case of success and a negative error code in * case of failure. */ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen) { *retlen = 0; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!mtd->_writev) return default_mtd_writev(mtd, vecs, count, to, retlen); return mtd->_writev(mtd, vecs, count, to, retlen); } EXPORT_SYMBOL_GPL(mtd_writev); /** * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size * @mtd: mtd device description object pointer * @size: a pointer to the ideal or maximum size of the allocation, points * to the actual allocation size on success. * * This routine attempts to allocate a contiguous kernel buffer up to * the specified size, backing off the size of the request exponentially * until the request succeeds or until the allocation size falls below * the system page size. This attempts to make sure it does not adversely * impact system performance, so when allocating more than one page, we * ask the memory allocator to avoid re-trying, swapping, writing back * or performing I/O. * * Note, this function also makes sure that the allocated buffer is aligned to * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. * * This is called, for example by mtd_{read,write} and jffs2_scan_medium, * to handle smaller (i.e. degraded) buffer allocations under low- or * fragmented-memory situations where such reduced allocations, from a * requested ideal, are allowed. * * Returns a pointer to the allocated buffer on success; otherwise, NULL. */ void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) { gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY; size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); void *kbuf; *size = min_t(size_t, *size, KMALLOC_MAX_SIZE); while (*size > min_alloc) { kbuf = kmalloc(*size, flags); if (kbuf) return kbuf; *size >>= 1; *size = ALIGN(*size, mtd->writesize); } /* * For the last resort allocation allow 'kmalloc()' to do all sorts of * things (write-back, dropping caches, etc) by using GFP_KERNEL. */ return kmalloc(*size, GFP_KERNEL); } EXPORT_SYMBOL_GPL(mtd_kmalloc_up_to); #ifdef CONFIG_PROC_FS /*====================================================================*/ /* Support for /proc/mtd */ static int mtd_proc_show(struct seq_file *m, void *v) { struct mtd_info *mtd; seq_puts(m, "dev: size erasesize name\n"); mutex_lock(&mtd_table_mutex); mtd_for_each_device(mtd) { seq_printf(m, "mtd%d: %8.8llx %8.8x \"%s\"\n", mtd->index, (unsigned long long)mtd->size, mtd->erasesize, mtd->name); } mutex_unlock(&mtd_table_mutex); return 0; } #endif /* CONFIG_PROC_FS */ /*====================================================================*/ /* Init code */ static struct backing_dev_info * __init mtd_bdi_init(char *name) { struct backing_dev_info *bdi; int ret; bdi = bdi_alloc(GFP_KERNEL); if (!bdi) return ERR_PTR(-ENOMEM); bdi->name = name; /* * We put '-0' suffix to the name to get the same name format as we * used to get. Since this is called only once, we get a unique name. */ ret = bdi_register(bdi, "%.28s-0", name); if (ret) bdi_put(bdi); return ret ? ERR_PTR(ret) : bdi; } static struct proc_dir_entry *proc_mtd; static int __init init_mtd(void) { int ret; ret = class_register(&mtd_class); if (ret) goto err_reg; mtd_bdi = mtd_bdi_init("mtd"); if (IS_ERR(mtd_bdi)) { ret = PTR_ERR(mtd_bdi); goto err_bdi; } proc_mtd = proc_create_single("mtd", 0, NULL, mtd_proc_show); ret = init_mtdchar(); if (ret) goto out_procfs; dfs_dir_mtd = debugfs_create_dir("mtd", NULL); return 0; out_procfs: if (proc_mtd) remove_proc_entry("mtd", NULL); bdi_put(mtd_bdi); err_bdi: class_unregister(&mtd_class); err_reg: pr_err("Error registering mtd class or bdi: %d\n", ret); return ret; } static void __exit cleanup_mtd(void) { debugfs_remove_recursive(dfs_dir_mtd); cleanup_mtdchar(); if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); bdi_put(mtd_bdi); idr_destroy(&mtd_idr); } module_init(init_mtd); module_exit(cleanup_mtd); MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>"); MODULE_DESCRIPTION("Core MTD registration and access routines");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 /* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ /* Kernel module implementing an IP set type: the hash:ip,mac type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <linux/if_ether.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 #define IPSET_TYPE_REV_MAX 0 MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>"); IP_SET_MODULE_DESC("hash:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,mac"); /* Type specific function prefix */ #define HTYPE hash_ipmac /* IPv4 variant */ /* Member elements */ struct hash_ipmac4_elem { /* Zero valued IP addresses cannot be stored */ __be32 ip; union { unsigned char ether[ETH_ALEN]; __be32 foo[2]; }; }; /* Common functions */ static inline bool hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1, const struct hash_ipmac4_elem *e2, u32 *multi) { return e1->ip == e2->ip && ether_addr_equal(e1->ether, e2->ether); } static bool hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip) || nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) goto nla_put_failure; return false; nla_put_failure: return true; } static inline void hash_ipmac4_data_next(struct hash_ipmac4_elem *next, const struct hash_ipmac4_elem *e) { next->ip = e->ip; } #define MTYPE hash_ipmac4 #define PF 4 #define HOST_MASK 32 #define HKEY_DATALEN sizeof(struct hash_ipmac4_elem) #include "ip_set_hash_gen.h" static int hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); if (is_zero_ether_addr(e.ether)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_ETHER] || nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || ip_set_get_extensions(set, tb, &ext); if (ret) return ret; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags); } /* IPv6 variant */ /* Member elements */ struct hash_ipmac6_elem { /* Zero valued IP addresses cannot be stored */ union nf_inet_addr ip; union { unsigned char ether[ETH_ALEN]; __be32 foo[2]; }; }; /* Common functions */ static inline bool hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1, const struct hash_ipmac6_elem *e2, u32 *multi) { return ipv6_addr_equal(&e1->ip.in6, &e2->ip.in6) && ether_addr_equal(e1->ether, e2->ether); } static bool hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether)) goto nla_put_failure; return false; nla_put_failure: return true; } static inline void hash_ipmac6_data_next(struct hash_ipmac6_elem *next, const struct hash_ipmac6_elem *e) { } #undef MTYPE #undef PF #undef HOST_MASK #undef HKEY_DATALEN #define MTYPE hash_ipmac6 #define PF 6 #define HOST_MASK 128 #define HKEY_DATALEN sizeof(struct hash_ipmac6_elem) #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmac6_elem e = { { .all = { 0 } }, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); if (is_zero_ether_addr(e.ether)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmac6_elem e = { { .all = { 0 } }, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_ETHER] || nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || ip_set_get_extensions(set, tb, &ext); if (ret) return ret; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags); } static struct ip_set_type hash_ipmac_type __read_mostly = { .name = "hash:ip,mac", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipmac_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipmac_init(void) { return ip_set_type_register(&hash_ipmac_type); } static void __exit hash_ipmac_fini(void) { ip_set_type_unregister(&hash_ipmac_type); } module_init(hash_ipmac_init); module_exit(hash_ipmac_fini);
5615 1715 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM pagemap #if !defined(_TRACE_PAGEMAP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_PAGEMAP_H #include <linux/tracepoint.h> #include <linux/mm.h> #define PAGEMAP_MAPPED 0x0001u #define PAGEMAP_ANONYMOUS 0x0002u #define PAGEMAP_FILE 0x0004u #define PAGEMAP_SWAPCACHE 0x0008u #define PAGEMAP_SWAPBACKED 0x0010u #define PAGEMAP_MAPPEDDISK 0x0020u #define PAGEMAP_BUFFERS 0x0040u #define trace_pagemap_flags(page) ( \ (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \ (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \ (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \ (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \ (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \ ) TRACE_EVENT(mm_lru_insertion, TP_PROTO( struct page *page, int lru ), TP_ARGS(page, lru), TP_STRUCT__entry( __field(struct page *, page ) __field(unsigned long, pfn ) __field(int, lru ) __field(unsigned long, flags ) ), TP_fast_assign( __entry->page = page; __entry->pfn = page_to_pfn(page); __entry->lru = lru; __entry->flags = trace_pagemap_flags(page); ), /* Flag format is based on page-types.c formatting for pagemap */ TP_printk("page=%p pfn=%lu lru=%d flags=%s%s%s%s%s%s", __entry->page, __entry->pfn, __entry->lru, __entry->flags & PAGEMAP_MAPPED ? "M" : " ", __entry->flags & PAGEMAP_ANONYMOUS ? "a" : "f", __entry->flags & PAGEMAP_SWAPCACHE ? "s" : " ", __entry->flags & PAGEMAP_SWAPBACKED ? "b" : " ", __entry->flags & PAGEMAP_MAPPEDDISK ? "d" : " ", __entry->flags & PAGEMAP_BUFFERS ? "B" : " ") ); TRACE_EVENT(mm_lru_activate, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field(struct page *, page ) __field(unsigned long, pfn ) ), TP_fast_assign( __entry->page = page; __entry->pfn = page_to_pfn(page); ), /* Flag format is based on page-types.c formatting for pagemap */ TP_printk("page=%p pfn=%lu", __entry->page, __entry->pfn) ); #endif /* _TRACE_PAGEMAP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <asm/string.h> #include <linux/kmod.h> #include <linux/sysctl.h> #include <net/ip_vs.h> /* IPVS pe list */ static LIST_HEAD(ip_vs_pe); /* semaphore for IPVS PEs. */ static DEFINE_MUTEX(ip_vs_pe_mutex); /* Get pe in the pe list by name */ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) { struct ip_vs_pe *pe; IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, pe_name); rcu_read_lock(); list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) { /* Test and get the modules atomically */ if (pe->module && !try_module_get(pe->module)) { /* This pe is just deleted */ continue; } if (strcmp(pe_name, pe->name)==0) { /* HIT */ rcu_read_unlock(); return pe; } module_put(pe->module); } rcu_read_unlock(); return NULL; } /* Lookup pe and try to load it if it doesn't exist */ struct ip_vs_pe *ip_vs_pe_getbyname(const char *name) { struct ip_vs_pe *pe; /* Search for the pe by name */ pe = __ip_vs_pe_getbyname(name); /* If pe not found, load the module and search again */ if (!pe) { request_module("ip_vs_pe_%s", name); pe = __ip_vs_pe_getbyname(name); } return pe; } /* Register a pe in the pe list */ int register_ip_vs_pe(struct ip_vs_pe *pe) { struct ip_vs_pe *tmp; /* increase the module use count */ if (!ip_vs_use_count_inc()) return -ENOENT; mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist * in the pe list. */ list_for_each_entry(tmp, &ip_vs_pe, n_list) { if (strcmp(tmp->name, pe->name) == 0) { mutex_unlock(&ip_vs_pe_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] pe already existed " "in the system\n", __func__, pe->name); return -EINVAL; } } /* Add it into the d-linked pe list */ list_add_rcu(&pe->n_list, &ip_vs_pe); mutex_unlock(&ip_vs_pe_mutex); pr_info("[%s] pe registered.\n", pe->name); return 0; } EXPORT_SYMBOL_GPL(register_ip_vs_pe); /* Unregister a pe from the pe list */ int unregister_ip_vs_pe(struct ip_vs_pe *pe) { mutex_lock(&ip_vs_pe_mutex); /* Remove it from the d-linked pe list */ list_del_rcu(&pe->n_list); mutex_unlock(&ip_vs_pe_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); pr_info("[%s] pe unregistered.\n", pe->name); return 0; } EXPORT_SYMBOL_GPL(unregister_ip_vs_pe);
80 84 77 78 59 16 74 74 74 74 74 54 74 74 74 3 3 3 3 3 3 9 9 1 1 1 1 21 22 22 1 2 9 9 18 1 85 85 1 1 1 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 /* * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/slab.h> #include <linux/types.h> #include <linux/rbtree.h> #include <linux/bitops.h> #include <linux/export.h> #include "rds.h" /* * This file implements the receive side of the unconventional congestion * management in RDS. * * Messages waiting in the receive queue on the receiving socket are accounted * against the sockets SO_RCVBUF option value. Only the payload bytes in the * message are accounted for. If the number of bytes queued equals or exceeds * rcvbuf then the socket is congested. All sends attempted to this socket's * address should return block or return -EWOULDBLOCK. * * Applications are expected to be reasonably tuned such that this situation * very rarely occurs. An application encountering this "back-pressure" is * considered a bug. * * This is implemented by having each node maintain bitmaps which indicate * which ports on bound addresses are congested. As the bitmap changes it is * sent through all the connections which terminate in the local address of the * bitmap which changed. * * The bitmaps are allocated as connections are brought up. This avoids * allocation in the interrupt handling path which queues messages on sockets. * The dense bitmaps let transports send the entire bitmap on any bitmap change * reasonably efficiently. This is much easier to implement than some * finer-grained communication of per-port congestion. The sender does a very * inexpensive bit test to test if the port it's about to send to is congested * or not. */ /* * Interaction with poll is a tad tricky. We want all processes stuck in * poll to wake up and check whether a congested destination became uncongested. * The really sad thing is we have no idea which destinations the application * wants to send to - we don't even know which rds_connections are involved. * So until we implement a more flexible rds poll interface, we have to make * do with this: * We maintain a global counter that is incremented each time a congestion map * update is received. Each rds socket tracks this value, and if rds_poll * finds that the saved generation number is smaller than the global generation * number, it wakes up the process. */ static atomic_t rds_cong_generation = ATOMIC_INIT(0); /* * Congestion monitoring */ static LIST_HEAD(rds_cong_monitor); static DEFINE_RWLOCK(rds_cong_monitor_lock); /* * Yes, a global lock. It's used so infrequently that it's worth keeping it * global to simplify the locking. It's only used in the following * circumstances: * * - on connection buildup to associate a conn with its maps * - on map changes to inform conns of a new map to send * * It's sadly ordered under the socket callback lock and the connection lock. * Receive paths can mark ports congested from interrupt context so the * lock masks interrupts. */ static DEFINE_SPINLOCK(rds_cong_lock); static struct rb_root rds_cong_tree = RB_ROOT; static struct rds_cong_map *rds_cong_tree_walk(const struct in6_addr *addr, struct rds_cong_map *insert) { struct rb_node **p = &rds_cong_tree.rb_node; struct rb_node *parent = NULL; struct rds_cong_map *map; while (*p) { int diff; parent = *p; map = rb_entry(parent, struct rds_cong_map, m_rb_node); diff = rds_addr_cmp(addr, &map->m_addr); if (diff < 0) p = &(*p)->rb_left; else if (diff > 0) p = &(*p)->rb_right; else return map; } if (insert) { rb_link_node(&insert->m_rb_node, parent, p); rb_insert_color(&insert->m_rb_node, &rds_cong_tree); } return NULL; } /* * There is only ever one bitmap for any address. Connections try and allocate * these bitmaps in the process getting pointers to them. The bitmaps are only * ever freed as the module is removed after all connections have been freed. */ static struct rds_cong_map *rds_cong_from_addr(const struct in6_addr *addr) { struct rds_cong_map *map; struct rds_cong_map *ret = NULL; unsigned long zp; unsigned long i; unsigned long flags; map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); if (!map) return NULL; map->m_addr = *addr; init_waitqueue_head(&map->m_waitq); INIT_LIST_HEAD(&map->m_conn_list); for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { zp = get_zeroed_page(GFP_KERNEL); if (zp == 0) goto out; map->m_page_addrs[i] = zp; } spin_lock_irqsave(&rds_cong_lock, flags); ret = rds_cong_tree_walk(addr, map); spin_unlock_irqrestore(&rds_cong_lock, flags); if (!ret) { ret = map; map = NULL; } out: if (map) { for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) free_page(map->m_page_addrs[i]); kfree(map); } rdsdebug("map %p for addr %pI6c\n", ret, addr); return ret; } /* * Put the conn on its local map's list. This is called when the conn is * really added to the hash. It's nested under the rds_conn_lock, sadly. */ void rds_cong_add_conn(struct rds_connection *conn) { unsigned long flags; rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong); spin_lock_irqsave(&rds_cong_lock, flags); list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list); spin_unlock_irqrestore(&rds_cong_lock, flags); } void rds_cong_remove_conn(struct rds_connection *conn) { unsigned long flags; rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong); spin_lock_irqsave(&rds_cong_lock, flags); list_del_init(&conn->c_map_item); spin_unlock_irqrestore(&rds_cong_lock, flags); } int rds_cong_get_maps(struct rds_connection *conn) { conn->c_lcong = rds_cong_from_addr(&conn->c_laddr); conn->c_fcong = rds_cong_from_addr(&conn->c_faddr); if (!(conn->c_lcong && conn->c_fcong)) return -ENOMEM; return 0; } void rds_cong_queue_updates(struct rds_cong_map *map) { struct rds_connection *conn; unsigned long flags; spin_lock_irqsave(&rds_cong_lock, flags); list_for_each_entry(conn, &map->m_conn_list, c_map_item) { struct rds_conn_path *cp = &conn->c_path[0]; rcu_read_lock(); if (!test_and_set_bit(0, &conn->c_map_queued) && !rds_destroy_pending(cp->cp_conn)) { rds_stats_inc(s_cong_update_queued); /* We cannot inline the call to rds_send_xmit() here * for two reasons (both pertaining to a TCP transport): * 1. When we get here from the receive path, we * are already holding the sock_lock (held by * tcp_v4_rcv()). So inlining calls to * tcp_setsockopt and/or tcp_sendmsg will deadlock * when it tries to get the sock_lock()) * 2. Interrupts are masked so that we can mark the * the port congested from both send and recv paths. * (See comment around declaration of rdc_cong_lock). * An attempt to get the sock_lock() here will * therefore trigger warnings. * Defer the xmit to rds_send_worker() instead. */ queue_delayed_work(rds_wq, &cp->cp_send_w, 0); } rcu_read_unlock(); } spin_unlock_irqrestore(&rds_cong_lock, flags); } void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask) { rdsdebug("waking map %p for %pI4\n", map, &map->m_addr); rds_stats_inc(s_cong_update_received); atomic_inc(&rds_cong_generation); if (waitqueue_active(&map->m_waitq)) wake_up(&map->m_waitq); if (waitqueue_active(&rds_poll_waitq)) wake_up_all(&rds_poll_waitq); if (portmask && !list_empty(&rds_cong_monitor)) { unsigned long flags; struct rds_sock *rs; read_lock_irqsave(&rds_cong_monitor_lock, flags); list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) { spin_lock(&rs->rs_lock); rs->rs_cong_notify |= (rs->rs_cong_mask & portmask); rs->rs_cong_mask &= ~portmask; spin_unlock(&rs->rs_lock); if (rs->rs_cong_notify) rds_wake_sk_sleep(rs); } read_unlock_irqrestore(&rds_cong_monitor_lock, flags); } } EXPORT_SYMBOL_GPL(rds_cong_map_updated); int rds_cong_updated_since(unsigned long *recent) { unsigned long gen = atomic_read(&rds_cong_generation); if (likely(*recent == gen)) return 0; *recent = gen; return 1; } /* * We're called under the locking that protects the sockets receive buffer * consumption. This makes it a lot easier for the caller to only call us * when it knows that an existing set bit needs to be cleared, and vice versa. * We can't block and we need to deal with concurrent sockets working against * the same per-address map. */ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) { unsigned long i; unsigned long off; rdsdebug("setting congestion for %pI4:%u in map %p\n", &map->m_addr, ntohs(port), map); i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; set_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) { unsigned long i; unsigned long off; rdsdebug("clearing congestion for %pI4:%u in map %p\n", &map->m_addr, ntohs(port), map); i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; clear_bit_le(off, (void *)map->m_page_addrs[i]); } static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) { unsigned long i; unsigned long off; i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; return test_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_add_socket(struct rds_sock *rs) { unsigned long flags; write_lock_irqsave(&rds_cong_monitor_lock, flags); if (list_empty(&rs->rs_cong_list)) list_add(&rs->rs_cong_list, &rds_cong_monitor); write_unlock_irqrestore(&rds_cong_monitor_lock, flags); } void rds_cong_remove_socket(struct rds_sock *rs) { unsigned long flags; struct rds_cong_map *map; write_lock_irqsave(&rds_cong_monitor_lock, flags); list_del_init(&rs->rs_cong_list); write_unlock_irqrestore(&rds_cong_monitor_lock, flags); /* update congestion map for now-closed port */ spin_lock_irqsave(&rds_cong_lock, flags); map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL); spin_unlock_irqrestore(&rds_cong_lock, flags); if (map && rds_cong_test_bit(map, rs->rs_bound_port)) { rds_cong_clear_bit(map, rs->rs_bound_port); rds_cong_queue_updates(map); } } int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs) { if (!rds_cong_test_bit(map, port)) return 0; if (nonblock) { if (rs && rs->rs_cong_monitor) { unsigned long flags; /* It would have been nice to have an atomic set_bit on * a uint64_t. */ spin_lock_irqsave(&rs->rs_lock, flags); rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port)); spin_unlock_irqrestore(&rs->rs_lock, flags); /* Test again - a congestion update may have arrived in * the meantime. */ if (!rds_cong_test_bit(map, port)) return 0; } rds_stats_inc(s_cong_send_error); return -ENOBUFS; } rds_stats_inc(s_cong_send_blocked); rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port)); return wait_event_interruptible(map->m_waitq, !rds_cong_test_bit(map, port)); } void rds_cong_exit(void) { struct rb_node *node; struct rds_cong_map *map; unsigned long i; while ((node = rb_first(&rds_cong_tree))) { map = rb_entry(node, struct rds_cong_map, m_rb_node); rdsdebug("freeing map %p\n", map); rb_erase(&map->m_rb_node, &rds_cong_tree); for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) free_page(map->m_page_addrs[i]); kfree(map); } } /* * Allocate a RDS message containing a congestion update. */ struct rds_message *rds_cong_update_alloc(struct rds_connection *conn) { struct rds_cong_map *map = conn->c_lcong; struct rds_message *rm; rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES); if (!IS_ERR(rm)) rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP; return rm; }
23350 453 3001 4203 652 1327 445 565 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HIGHMEM_H #define _LINUX_HIGHMEM_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/bug.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include <asm/cacheflush.h> #ifndef ARCH_HAS_FLUSH_ANON_PAGE static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { } #endif #ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE static inline void flush_kernel_dcache_page(struct page *page) { } static inline void flush_kernel_vmap_range(void *vaddr, int size) { } static inline void invalidate_kernel_vmap_range(void *vaddr, int size) { } #endif #include <asm/kmap_types.h> #ifdef CONFIG_HIGHMEM #include <asm/highmem.h> /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); extern unsigned long totalhigh_pages; void kmap_flush_unused(void); struct page *kmap_to_page(void *addr); #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } static inline struct page *kmap_to_page(void *addr) { return virt_to_page(addr); } #define totalhigh_pages 0UL #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { might_sleep(); return page_address(page); } static inline void kunmap(struct page *page) { } static inline void *kmap_atomic(struct page *page) { preempt_disable(); pagefault_disable(); return page_address(page); } #define kmap_atomic_prot(page, prot) kmap_atomic(page) static inline void __kunmap_atomic(void *addr) { pagefault_enable(); preempt_enable(); } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) #define kmap_flush_unused() do {} while(0) #endif #endif /* CONFIG_HIGHMEM */ #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) DECLARE_PER_CPU(int, __kmap_atomic_idx); static inline int kmap_atomic_idx_push(void) { int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; #ifdef CONFIG_DEBUG_HIGHMEM WARN_ON_ONCE(in_irq() && !irqs_disabled()); BUG_ON(idx >= KM_TYPE_NR); #endif return idx; } static inline int kmap_atomic_idx(void) { return __this_cpu_read(__kmap_atomic_idx) - 1; } static inline void kmap_atomic_idx_pop(void) { #ifdef CONFIG_DEBUG_HIGHMEM int idx = __this_cpu_dec_return(__kmap_atomic_idx); BUG_ON(idx < 0); #else __this_cpu_dec(__kmap_atomic_idx); #endif } #endif /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() * kunmap_atomic() should get the return value of kmap_atomic, not the page. */ #define kunmap_atomic(addr) \ do { \ BUILD_BUG_ON(__same_type((addr), struct page *)); \ __kunmap_atomic(addr); \ } while (0) /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page); clear_user_page(addr, vaddr, page); kunmap_atomic(addr); } #endif #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /** * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE * @vma: The VMA the page is to be allocated for * @vaddr: The virtual address the page will be inserted into * * This function will allocate a page for a VMA but the caller is expected * to specify via movableflags whether the page will be movable in the * future or not * * An architecture may override this function by defining * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own * implementation. */ static inline struct page * __alloc_zeroed_user_highpage(gfp_t movableflags, struct vm_area_struct *vma, unsigned long vaddr) { struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, vma, vaddr); if (page) clear_user_highpage(page, vaddr); return page; } #endif /** * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move * @vma: The VMA the page is to be allocated for * @vaddr: The virtual address the page will be inserted into * * This function will allocate a page for a VMA that the caller knows will * be able to migrate in the future using move_pages() or reclaimed */ static inline struct page * alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, unsigned long vaddr) { return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); } static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page); clear_page(kaddr); kunmap_atomic(kaddr); } static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { void *kaddr = kmap_atomic(page); BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE); if (end1 > start1) memset(kaddr + start1, 0, end1 - start1); if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); kunmap_atomic(kaddr); flush_dcache_page(page); } static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) { zero_user_segments(page, start, end, 0, 0); } static inline void zero_user(struct page *page, unsigned start, unsigned size) { zero_user_segments(page, start, start + size, 0, 0); } #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { char *vfrom, *vto; vfrom = kmap_atomic(from); vto = kmap_atomic(to); copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vto); kunmap_atomic(vfrom); } #endif #ifndef __HAVE_ARCH_COPY_HIGHPAGE static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; vfrom = kmap_atomic(from); vto = kmap_atomic(to); copy_page(vto, vfrom); kunmap_atomic(vto); kunmap_atomic(vfrom); } #endif #endif /* _LINUX_HIGHMEM_H */
2097 449 2097 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 /* * net/core/netprio_cgroup.c Priority Control Group * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Authors: Neil Horman <nhorman@tuxdriver.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/cgroup.h> #include <linux/rcupdate.h> #include <linux/atomic.h> #include <linux/sched/task.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> #include <net/netprio_cgroup.h> #include <linux/fdtable.h> /* * netprio allocates per-net_device priomap array which is indexed by * css->id. Limiting css ID to 16bits doesn't lose anything. */ #define NETPRIO_ID_MAX USHRT_MAX #define PRIOMAP_MIN_SZ 128 /* * Extend @dev->priomap so that it's large enough to accommodate * @target_idx. @dev->priomap.priomap_len > @target_idx after successful * return. Must be called under rtnl lock. */ static int extend_netdev_table(struct net_device *dev, u32 target_idx) { struct netprio_map *old, *new; size_t new_sz, new_len; /* is the existing priomap large enough? */ old = rtnl_dereference(dev->priomap); if (old && old->priomap_len > target_idx) return 0; /* * Determine the new size. Let's keep it power-of-two. We start * from PRIOMAP_MIN_SZ and double it until it's large enough to * accommodate @target_idx. */ new_sz = PRIOMAP_MIN_SZ; while (true) { new_len = (new_sz - offsetof(struct netprio_map, priomap)) / sizeof(new->priomap[0]); if (new_len > target_idx) break; new_sz *= 2; /* overflowed? */ if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) return -ENOSPC; } /* allocate & copy */ new = kzalloc(new_sz, GFP_KERNEL); if (!new) return -ENOMEM; if (old) memcpy(new->priomap, old->priomap, old->priomap_len * sizeof(old->priomap[0])); new->priomap_len = new_len; /* install the new priomap */ rcu_assign_pointer(dev->priomap, new); if (old) kfree_rcu(old, rcu); return 0; } /** * netprio_prio - return the effective netprio of a cgroup-net_device pair * @css: css part of the target pair * @dev: net_device part of the target pair * * Should be called under RCU read or rtnl lock. */ static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev) { struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); int id = css->cgroup->id; if (map && id < map->priomap_len) return map->priomap[id]; return 0; } /** * netprio_set_prio - set netprio on a cgroup-net_device pair * @css: css part of the target pair * @dev: net_device part of the target pair * @prio: prio to set * * Set netprio to @prio on @css-@dev pair. Should be called under rtnl * lock and may fail under memory pressure for non-zero @prio. */ static int netprio_set_prio(struct cgroup_subsys_state *css, struct net_device *dev, u32 prio) { struct netprio_map *map; int id = css->cgroup->id; int ret; /* avoid extending priomap for zero writes */ map = rtnl_dereference(dev->priomap); if (!prio && (!map || map->priomap_len <= id)) return 0; ret = extend_netdev_table(dev, id); if (ret) return ret; map = rtnl_dereference(dev->priomap); map->priomap[id] = prio; return 0; } static struct cgroup_subsys_state * cgrp_css_alloc(struct cgroup_subsys_state *parent_css) { struct cgroup_subsys_state *css; css = kzalloc(sizeof(*css), GFP_KERNEL); if (!css) return ERR_PTR(-ENOMEM); return css; } static int cgrp_css_online(struct cgroup_subsys_state *css) { struct cgroup_subsys_state *parent_css = css->parent; struct net_device *dev; int ret = 0; if (css->id > NETPRIO_ID_MAX) return -ENOSPC; if (!parent_css) return 0; rtnl_lock(); /* * Inherit prios from the parent. As all prios are set during * onlining, there is no need to clear them on offline. */ for_each_netdev(&init_net, dev) { u32 prio = netprio_prio(parent_css, dev); ret = netprio_set_prio(css, dev, prio); if (ret) break; } rtnl_unlock(); return ret; } static void cgrp_css_free(struct cgroup_subsys_state *css) { kfree(css); } static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) { return css->cgroup->id; } static int read_priomap(struct seq_file *sf, void *v) { struct net_device *dev; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) seq_printf(sf, "%s %u\n", dev->name, netprio_prio(seq_css(sf), dev)); rcu_read_unlock(); return 0; } static ssize_t write_priomap(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { char devname[IFNAMSIZ + 1]; struct net_device *dev; u32 prio; int ret; if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) return -EINVAL; dev = dev_get_by_name(&init_net, devname); if (!dev) return -ENODEV; cgroup_sk_alloc_disable(); rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); rtnl_unlock(); dev_put(dev); return ret ?: nbytes; } static int update_netprio(const void *v, struct file *file, unsigned n) { int err; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, (unsigned long)v); spin_unlock(&cgroup_sk_update_lock); } return 0; } static void net_prio_attach(struct cgroup_taskset *tset) { struct task_struct *p; struct cgroup_subsys_state *css; cgroup_sk_alloc_disable(); cgroup_taskset_for_each(p, css, tset) { void *v = (void *)(unsigned long)css->cgroup->id; task_lock(p); iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } } static struct cftype ss_files[] = { { .name = "prioidx", .read_u64 = read_prioidx, }, { .name = "ifpriomap", .seq_show = read_priomap, .write = write_priomap, }, { } /* terminate */ }; struct cgroup_subsys net_prio_cgrp_subsys = { .css_alloc = cgrp_css_alloc, .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = net_prio_attach, .legacy_cftypes = ss_files, }; static int netprio_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netprio_map *old; /* * Note this is called with rtnl_lock held so we have update side * protection on our rcu assignments */ switch (event) { case NETDEV_UNREGISTER: old = rtnl_dereference(dev->priomap); RCU_INIT_POINTER(dev->priomap, NULL); if (old) kfree_rcu(old, rcu); break; } return NOTIFY_DONE; } static struct notifier_block netprio_device_notifier = { .notifier_call = netprio_device_event }; static int __init init_cgroup_netprio(void) { register_netdevice_notifier(&netprio_device_notifier); return 0; } subsys_initcall(init_cgroup_netprio); MODULE_LICENSE("GPL v2");
793 3281 6 4734 331 247 18 247 247 1339 1291 1302 874 1302 1291 202 1288 893 893 893 893 1302 1291 670 608 670 121 11 11 692 670 692 121 11 11 4 123 11 11 1159 666 1159 1052 1052 741 1159 696 696 1159 1158 1159 189 187 1142 1127 1127 1128 52 52 52 505 1335 1335 1249 2000 772 772 1981 2002 11 11 11 752 750 752 752 752 752 751 752 752 752 781 781 687 687 687 781 781 781 780 781 781 783 783 783 19 662 662 780 781 781 780 629 629 547 79 79 73 546 75 158 157 542 542 629 629 1209 1209 369 13 362 1175 1208 1252 1253 1203 1248 1257 1256 1257 1249 1252 1273 1273 1273 369 1272 8 1274 85 36 1256 1185 1185 1184 17 17 16 17 95 95 93 83 83 83 25 25 25 25 25 25 25 25 25 20 21 21 21 21 21 21 21 21 21 21 20 20 21 21 21 21 21 21 21 7 7 7 6 6 4 4 1507 159 156 1 1 159 226 226 226 226 225 226 76 123 268 157 269 512 512 513 4 513 511 511 24 512 512 511 497 4 510 499 498 15 31 511 510 123 123 123 126 126 126 126 3 123 123 697 14 14 1 1 157 1 552 157 148 157 481 150 693 95 19 19 19 19 736 736 582 179 81 708 9 9 2339 2338 222 2336 2339 88 9 2263 2264 2263 2263 1 1 2264 2332 2332 498 186 68 68 498 368 498 3877 3880 270 270 1334 1335 20 14 14 13 13 1340 20 1333 1333 1322 162 46 47 1279 1320 1341 367 47 323 72 367 367 361 367 1184 252 252 1182 1184 1146 203 1234 318 222 5 57 57 47 47 47 47 15 164 119 45 45 1 118 7 118 1396 3 3 1393 221 1398 1277 980 306 237 1 319 16 3996 3483 3824 3825 740 3482 2339 9 33 734 696 48 4724 4721 4713 88 4466 4721 4720 88 4067 9 1 238 1119 5 4739 4740 42 4721 4725 4726 867 867 1185 1186 18 18 18 17 18 18 9 2 2 2 2 10 10 10 10 10 10 10 53 53 53 53 8 7 6 52 3 52 52 52 53 53 23 22 22 22 35986 36081 836 736 737 112 112 831 254 253 751 751 97 98 98 2255 2254 2251 555 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 /* * linux/mm/memory.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ /* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to implement. - Linus */ /* * Ok, demand-loading was easy, shared pages a little bit tricker. Shared * pages started 02.12.91, seems to work. - Linus. * * Tested sharing by executing about 30 /bin/sh: under the old kernel it * would have taken more than the 6M I have free, but it worked well as * far as I could see. * * Also corrected some "invalidate()"s - I wasn't doing enough of them. */ /* * Real VM (paging to/from disk) started 18.12.91. Much more work and * thought has to go into this. Oh, well.. * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. * Found it. Everything seems to work now. * 20.12.91 - Ok, making the swap-device changeable like the root. */ /* * 05.04.94 - Multi-page memory management added for v1.1. * Idea by Alex Bligh (alex@cconcepts.co.uk) * * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG * (Gerhard.Wichert@pdb.siemens.de) * * Aug/Sep 2004 Changed to four level page tables (Andi Kleen) */ #include <linux/kernel_stat.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/numa_balancing.h> #include <linux/sched/task.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/swap.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/memremap.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/export.h> #include <linux/delayacct.h> #include <linux/init.h> #include <linux/pfn_t.h> #include <linux/writeback.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> #include <linux/swapops.h> #include <linux/elf.h> #include <linux/gfp.h> #include <linux/migrate.h> #include <linux/string.h> #include <linux/dma-debug.h> #include <linux/debugfs.h> #include <linux/userfaultfd_k.h> #include <linux/dax.h> #include <linux/oom.h> #include <asm/io.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <linux/uaccess.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/pgtable.h> #include "internal.h" #if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif #ifndef CONFIG_NEED_MULTIPLE_NODES /* use the per-pgdat data instead for discontigmem - mbligh */ unsigned long max_mapnr; EXPORT_SYMBOL(max_mapnr); struct page *mem_map; EXPORT_SYMBOL(mem_map); #endif /* * A number of key systems in x86 including ioremap() rely on the assumption * that high_memory defines the upper bound on direct map memory, then end * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL * and ZONE_HIGHMEM. */ void *high_memory; EXPORT_SYMBOL(high_memory); /* * Randomize the address space (stacks, mmaps, brk, etc.). * * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization, * as ancient (libc5 based) binaries can segfault. ) */ int randomize_va_space __read_mostly = #ifdef CONFIG_COMPAT_BRK 1; #else 2; #endif #ifndef arch_faults_on_old_pte static inline bool arch_faults_on_old_pte(void) { /* * Those arches which don't have hw access flag feature need to * implement their own helper. By default, "true" means pagefault * will be hit on old pte. */ return true; } #endif static int __init disable_randmaps(char *s) { randomize_va_space = 0; return 1; } __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; EXPORT_SYMBOL(zero_pfn); unsigned long highest_memmap_pfn __read_mostly; /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ static int __init init_zero_pfn(void) { zero_pfn = page_to_pfn(ZERO_PAGE(0)); return 0; } early_initcall(init_zero_pfn); #if defined(SPLIT_RSS_COUNTING) void sync_mm_rss(struct mm_struct *mm) { int i; for (i = 0; i < NR_MM_COUNTERS; i++) { if (current->rss_stat.count[i]) { add_mm_counter(mm, i, current->rss_stat.count[i]); current->rss_stat.count[i] = 0; } } current->rss_stat.events = 0; } static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) { struct task_struct *task = current; if (likely(task->mm == mm)) task->rss_stat.count[member] += val; else add_mm_counter(mm, member, val); } #define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1) #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1) /* sync counter once per 64 page faults */ #define TASK_RSS_EVENTS_THRESH (64) static void check_sync_rss_stat(struct task_struct *task) { if (unlikely(task != current)) return; if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) sync_mm_rss(task->mm); } #else /* SPLIT_RSS_COUNTING */ #define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member) #define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member) static void check_sync_rss_stat(struct task_struct *task) { } #endif /* SPLIT_RSS_COUNTING */ #ifdef HAVE_GENERIC_MMU_GATHER static bool tlb_next_batch(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; batch = tlb->active; if (batch->next) { tlb->active = batch->next; return true; } if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) return false; batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); if (!batch) return false; tlb->batch_count++; batch->next = NULL; batch->nr = 0; batch->max = MAX_GATHER_BATCH; tlb->active->next = batch; tlb->active = batch; return true; } void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; /* Is it from 0 to ~0? */ tlb->fullmm = !(start | (end+1)); tlb->need_flush_all = 0; tlb->local.next = NULL; tlb->local.nr = 0; tlb->local.max = ARRAY_SIZE(tlb->__pages); tlb->active = &tlb->local; tlb->batch_count = 0; #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb->batch = NULL; #endif tlb->page_size = 0; __tlb_reset_range(tlb); } static void tlb_flush_mmu_free(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); #endif for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { free_pages_and_swap_cache(batch->pages, batch->nr); batch->nr = 0; } tlb->active = &tlb->local; } void tlb_flush_mmu(struct mmu_gather *tlb) { tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); } /* tlb_finish_mmu * Called at the end of the shootdown operation to free up any resources * that were required. */ void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force) { struct mmu_gather_batch *batch, *next; if (force) __tlb_adjust_range(tlb, start, end - start); tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ check_pgt_cache(); for (batch = tlb->local.next; batch; batch = next) { next = batch->next; free_pages((unsigned long)batch, 0); } tlb->local.next = NULL; } /* __tlb_remove_page * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while * handling the additional races in SMP caused by other CPUs caching valid * mappings in their TLBs. Returns the number of free page slots left. * When out of page slots we must call tlb_flush_mmu(). *returns true if the caller should flush. */ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { struct mmu_gather_batch *batch; VM_BUG_ON(!tlb->end); VM_WARN_ON(tlb->page_size != page_size); batch = tlb->active; /* * Add the page and check if we are full. If so * force a flush. */ batch->pages[batch->nr++] = page; if (batch->nr == batch->max) { if (!tlb_next_batch(tlb)) return true; batch = tlb->active; } VM_BUG_ON_PAGE(batch->nr > batch->max, page); return false; } void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE) tlb_flush_mmu(tlb); tlb->page_size = PMD_SIZE; tlb->start = min(tlb->start, address); tlb->end = max(tlb->end, address + size); } #endif /* HAVE_GENERIC_MMU_GATHER */ #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* * See the comment near struct mmu_table_batch. */ /* * If we want tlb_remove_table() to imply TLB invalidates. */ static inline void tlb_table_invalidate(struct mmu_gather *tlb) { #ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE /* * Invalidate page-table caches used by hardware walkers. Then we still * need to RCU-sched wait while freeing the pages because software * walkers can still be in-flight. */ tlb_flush_mmu_tlbonly(tlb); #endif } static void tlb_remove_table_smp_sync(void *arg) { /* Simply deliver the interrupt */ } static void tlb_remove_table_one(void *table) { /* * This isn't an RCU grace period and hence the page-tables cannot be * assumed to be actually RCU-freed. * * It is however sufficient for software page-table walkers that rely on * IRQ disabling. See the comment near struct mmu_table_batch. */ smp_call_function(tlb_remove_table_smp_sync, NULL, 1); __tlb_remove_table(table); } static void tlb_remove_table_rcu(struct rcu_head *head) { struct mmu_table_batch *batch; int i; batch = container_of(head, struct mmu_table_batch, rcu); for (i = 0; i < batch->nr; i++) __tlb_remove_table(batch->tables[i]); free_page((unsigned long)batch); } void tlb_table_flush(struct mmu_gather *tlb) { struct mmu_table_batch **batch = &tlb->batch; if (*batch) { tlb_table_invalidate(tlb); call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } } void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { tlb_table_invalidate(tlb); tlb_remove_table_one(table); return; } (*batch)->nr = 0; } (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) tlb_table_flush(tlb); } #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ /** * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down * @tlb: the mmu_gather structure to initialize * @mm: the mm_struct of the target address space * @start: start of the region that will be removed from the page-table * @end: end of the region that will be removed from the page-table * * Called to initialize an (on-stack) mmu_gather structure for page-table * tear-down from @mm. The @start and @end are set to 0 and -1 * respectively when @mm is without users and we're going to destroy * the full address space (exit/execve). */ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { arch_tlb_gather_mmu(tlb, mm, start, end); inc_tlb_flush_pending(tlb->mm); } void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { /* * If there are parallel threads are doing PTE changes on same range * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB * flush by batching, a thread has stable TLB entry can fail to flush * the TLB by observing pte_none|!pte_dirty, for example so flush TLB * forcefully if we detect parallel PTE batching threads. */ bool force = mm_tlb_flush_nested(tlb->mm); arch_tlb_finish_mmu(tlb, start, end, force); dec_tlb_flush_pending(tlb->mm); } /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { pgtable_t token = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free_tlb(tlb, token, addr); mm_dec_nr_ptes(tlb->mm); } static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { pmd_t *pmd; unsigned long next; unsigned long start; start = addr; pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; free_pte_range(tlb, pmd, addr); } while (pmd++, addr = next, addr != end); start &= PUD_MASK; if (start < floor) return; if (ceiling) { ceiling &= PUD_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) return; pmd = pmd_offset(pud, start); pud_clear(pud); pmd_free_tlb(tlb, pmd, start); mm_dec_nr_pmds(tlb->mm); } static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { pud_t *pud; unsigned long next; unsigned long start; start = addr; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; free_pmd_range(tlb, pud, addr, next, floor, ceiling); } while (pud++, addr = next, addr != end); start &= P4D_MASK; if (start < floor) return; if (ceiling) { ceiling &= P4D_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) return; pud = pud_offset(p4d, start); p4d_clear(p4d); pud_free_tlb(tlb, pud, start); mm_dec_nr_puds(tlb->mm); } static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { p4d_t *p4d; unsigned long next; unsigned long start; start = addr; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; free_pud_range(tlb, p4d, addr, next, floor, ceiling); } while (p4d++, addr = next, addr != end); start &= PGDIR_MASK; if (start < floor) return; if (ceiling) { ceiling &= PGDIR_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) return; p4d = p4d_offset(pgd, start); pgd_clear(pgd); p4d_free_tlb(tlb, p4d, start); } /* * This function frees user-level page tables of a process. */ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; unsigned long next; /* * The next few lines have given us lots of grief... * * Why are we testing PMD* at this top level? Because often * there will be no work to do at all, and we'd prefer not to * go all the way down to the bottom just to discover that. * * Why all these "- 1"s? Because 0 represents both the bottom * of the address space and the top of it (using -1 for the * top wouldn't help much: the masks would do the wrong thing). * The rule is that addr 0 and floor 0 refer to the bottom of * the address space, but end 0 and ceiling 0 refer to the top * Comparisons need to use "end - 1" and "ceiling - 1" (though * that end 0 case should be mythical). * * Wherever addr is brought up or ceiling brought down, we must * be careful to reject "the opposite 0" before it confuses the * subsequent tests. But what about where end is brought down * by PMD_SIZE below? no, end can't go down to 0 there. * * Whereas we round start (addr) and ceiling down, by different * masks at different levels, in order to test whether a table * now has no other vmas using it, so can be freed, we don't * bother to round floor or end up - the tests don't need that. */ addr &= PMD_MASK; if (addr < floor) { addr += PMD_SIZE; if (!addr) return; } if (ceiling) { ceiling &= PMD_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) end -= PMD_SIZE; if (addr > end - 1) return; /* * We add page table cache pages with PAGE_SIZE, * (see pte_free_tlb()), flush the tlb if we need */ tlb_remove_check_page_size_change(tlb, PAGE_SIZE); pgd = pgd_offset(tlb->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; free_p4d_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling) { while (vma) { struct vm_area_struct *next = vma->vm_next; unsigned long addr = vma->vm_start; /* * Hide vma from rmap and truncate_pagecache before freeing * pgtables */ unlink_anon_vmas(vma); unlink_file_vma(vma); if (is_vm_hugetlb_page(vma)) { hugetlb_free_pgd_range(tlb, addr, vma->vm_end, floor, next ? next->vm_start : ceiling); } else { /* * Optimization: gather nearby vmas into one call down */ while (next && next->vm_start <= vma->vm_end + PMD_SIZE && !is_vm_hugetlb_page(next)) { vma = next; next = vma->vm_next; unlink_anon_vmas(vma); unlink_file_vma(vma); } free_pgd_range(tlb, addr, vma->vm_end, floor, next ? next->vm_start : ceiling); } vma = next; } } int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); if (!new) return -ENOMEM; /* * Ensure all pte setup (eg. pte page lock and page clearing) are * visible before the pte is made visible to other CPUs by being * put into page tables. * * The other side of the story is the pointer chasing in the page * table walking code (when walking the page table without locking; * ie. most of the time). Fortunately, these data accesses consist * of a chain of data-dependent loads, meaning most CPUs (alpha * being the notable exception) will already guarantee loads are * seen in-order. See the alpha page table accessors for the * smp_read_barrier_depends() barriers in page table walking code. */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ ptl = pmd_lock(mm, pmd); if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ mm_inc_nr_ptes(mm); pmd_populate(mm, pmd, new); new = NULL; } spin_unlock(ptl); if (new) pte_free(mm, new); return 0; } int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) { pte_t *new = pte_alloc_one_kernel(&init_mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&init_mm.page_table_lock); if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; } spin_unlock(&init_mm.page_table_lock); if (new) pte_free_kernel(&init_mm, new); return 0; } static inline void init_rss_vec(int *rss) { memset(rss, 0, sizeof(int) * NR_MM_COUNTERS); } static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) { int i; if (current->mm == mm) sync_mm_rss(mm); for (i = 0; i < NR_MM_COUNTERS; i++) if (rss[i]) add_mm_counter(mm, i, rss[i]); } /* * This function is called to print an error when a bad pte * is found. For example, we might have a PFN-mapped pte in * a region that doesn't allow it. * * The calling function must still handle the error. */ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, pte_t pte, struct page *page) { pgd_t *pgd = pgd_offset(vma->vm_mm, addr); p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr); pmd_t *pmd = pmd_offset(pud, addr); struct address_space *mapping; pgoff_t index; static unsigned long resume; static unsigned long nr_shown; static unsigned long nr_unshown; /* * Allow a burst of 60 reports, then keep quiet for that minute; * or allow a steady drip of one report per second. */ if (nr_shown == 60) { if (time_before(jiffies, resume)) { nr_unshown++; return; } if (nr_unshown) { pr_alert("BUG: Bad page map: %lu messages suppressed\n", nr_unshown); nr_unshown = 0; } nr_shown = 0; } if (nr_shown++ == 0) resume = jiffies + 60 * HZ; mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; index = linear_page_index(vma, addr); pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", current->comm, (long long)pte_val(pte), (long long)pmd_val(*pmd)); if (page) dump_page(page, "bad pte"); pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n", vma->vm_file, vma->vm_ops ? vma->vm_ops->fault : NULL, vma->vm_file ? vma->vm_file->f_op->mmap : NULL, mapping ? mapping->a_ops->readpage : NULL); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } /* * vm_normal_page -- This function gets the "struct page" associated with a pte. * * "Special" mappings do not wish to be associated with a "struct page" (either * it doesn't exist, or it exists but they don't want to touch it). In this * case, NULL is returned here. "Normal" mappings do have a struct page. * * There are 2 broad cases. Firstly, an architecture may define a pte_special() * pte bit, in which case this function is trivial. Secondly, an architecture * may not have a spare pte bit, which requires a more complicated scheme, * described below. * * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a * special mapping (even if there are underlying and valid "struct pages"). * COWed pages of a VM_PFNMAP are always normal. * * The way we recognize COWed pages within VM_PFNMAP mappings is through the * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit * set, and the vm_pgoff will point to the first PFN mapped: thus every special * mapping will always honor the rule * * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) * * And for normal mappings this is false. * * This restricts such mappings to be a linear translation from virtual address * to pfn. To get around this restriction, we allow arbitrary mappings so long * as the vma is not a COW mapping; in that case, we know that all ptes are * special (because none can have been COWed). * * * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. * * VM_MIXEDMAP mappings can likewise contain memory with or without "struct * page" backing, however the difference is that _all_ pages with a struct * page (that is, those where pfn_valid is true) are refcounted and considered * normal pages by the VM. The disadvantage is that pages are refcounted * (which can be slower and simply not an option for some PFNMAP users). The * advantage is that we don't have to follow the strict linearity rule of * PFNMAP mappings in order to support COWable mappings. * */ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte, bool with_public_device) { unsigned long pfn = pte_pfn(pte); if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { if (likely(!pte_special(pte))) goto check_pfn; if (vma->vm_ops && vma->vm_ops->find_special_page) return vma->vm_ops->find_special_page(vma, addr); if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return NULL; if (is_zero_pfn(pfn)) return NULL; /* * Device public pages are special pages (they are ZONE_DEVICE * pages but different from persistent memory). They behave * allmost like normal pages. The difference is that they are * not on the lru and thus should never be involve with any- * thing that involve lru manipulation (mlock, numa balancing, * ...). * * This is why we still want to return NULL for such page from * vm_normal_page() so that we do not have to special case all * call site of vm_normal_page(). */ if (likely(pfn <= highest_memmap_pfn)) { struct page *page = pfn_to_page(pfn); if (is_device_public_page(page)) { if (with_public_device) return page; return NULL; } } if (pte_devmap(pte)) return NULL; print_bad_pte(vma, addr, pte, NULL); return NULL; } /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { if (!pfn_valid(pfn)) return NULL; goto out; } else { unsigned long off; off = (addr - vma->vm_start) >> PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; if (!is_cow_mapping(vma->vm_flags)) return NULL; } } if (is_zero_pfn(pfn)) return NULL; check_pfn: if (unlikely(pfn > highest_memmap_pfn)) { print_bad_pte(vma, addr, pte, NULL); return NULL; } /* * NOTE! We still have PageReserved() pages in the page tables. * eg. VDSO mappings can cause them to exist. */ out: return pfn_to_page(pfn); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd) { unsigned long pfn = pmd_pfn(pmd); /* * There is no pmd_special() but there may be special pmds, e.g. * in a direct-access (dax) mapping, so let's just replicate the * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here. */ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { if (!pfn_valid(pfn)) return NULL; goto out; } else { unsigned long off; off = (addr - vma->vm_start) >> PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; if (!is_cow_mapping(vma->vm_flags)) return NULL; } } if (pmd_devmap(pmd)) return NULL; if (is_zero_pfn(pfn)) return NULL; if (unlikely(pfn > highest_memmap_pfn)) return NULL; /* * NOTE! We still have PageReserved() pages in the page tables. * eg. VDSO mappings can cause them to exist. */ out: return pfn_to_page(pfn); } #endif /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. */ static inline unsigned long copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, unsigned long addr, int *rss) { unsigned long vm_flags = vma->vm_flags; pte_t pte = *src_pte; struct page *page; /* pte contains position in swap or file, so copy. */ if (unlikely(!pte_present(pte))) { swp_entry_t entry = pte_to_swp_entry(pte); if (likely(!non_swap_entry(entry))) { if (swap_duplicate(entry) < 0) return entry.val; /* make sure dst_mm is on swapoff's mmlist. */ if (unlikely(list_empty(&dst_mm->mmlist))) { spin_lock(&mmlist_lock); if (list_empty(&dst_mm->mmlist)) list_add(&dst_mm->mmlist, &src_mm->mmlist); spin_unlock(&mmlist_lock); } rss[MM_SWAPENTS]++; } else if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); rss[mm_counter(page)]++; if (is_write_migration_entry(entry) && is_cow_mapping(vm_flags)) { /* * COW mappings require pages in both * parent and child to be set to read. */ make_migration_entry_read(&entry); pte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(*src_pte)) pte = pte_swp_mksoft_dirty(pte); set_pte_at(src_mm, addr, src_pte, pte); } } else if (is_device_private_entry(entry)) { page = device_private_entry_to_page(entry); /* * Update rss count even for unaddressable pages, as * they should treated just like normal pages in this * respect. * * We will likely want to have some new rss counters * for unaddressable pages, at some point. But for now * keep things as they are. */ get_page(page); rss[mm_counter(page)]++; page_dup_rmap(page, false); /* * We do not preserve soft-dirty information, because so * far, checkpoint/restore is the only feature that * requires that. And checkpoint/restore does not work * when a device driver is involved (you cannot easily * save and restore device driver state). */ if (is_write_device_private_entry(entry) && is_cow_mapping(vm_flags)) { make_device_private_entry_read(&entry); pte = swp_entry_to_pte(entry); set_pte_at(src_mm, addr, src_pte, pte); } } goto out_set_pte; } /* * If it's a COW mapping, write protect it both * in the parent and the child */ if (is_cow_mapping(vm_flags) && pte_write(pte)) { ptep_set_wrprotect(src_mm, addr, src_pte); pte = pte_wrprotect(pte); } /* * If it's a shared mapping, mark it clean in * the child */ if (vm_flags & VM_SHARED) pte = pte_mkclean(pte); pte = pte_mkold(pte); page = vm_normal_page(vma, addr, pte); if (page) { get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; } else if (pte_devmap(pte)) { page = pte_page(pte); /* * Cache coherent device memory behave like regular page and * not like persistent memory page. For more informations see * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h */ if (is_device_public_page(page)) { get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; } } out_set_pte: set_pte_at(dst_mm, addr, dst_pte, pte); return 0; } static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pte_t *orig_src_pte, *orig_dst_pte; pte_t *src_pte, *dst_pte; spinlock_t *src_ptl, *dst_ptl; int progress = 0; int rss[NR_MM_COUNTERS]; swp_entry_t entry = (swp_entry_t){0}; again: init_rss_vec(rss); dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); if (!dst_pte) return -ENOMEM; src_pte = pte_offset_map(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); orig_src_pte = src_pte; orig_dst_pte = dst_pte; arch_enter_lazy_mmu_mode(); do { /* * We are holding two locks at this point - either of them * could generate latencies in another task on another CPU. */ if (progress >= 32) { progress = 0; if (need_resched() || spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) break; } if (pte_none(*src_pte)) { progress++; continue; } entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); if (entry.val) break; progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); spin_unlock(src_ptl); pte_unmap(orig_src_pte); add_mm_rss_vec(dst_mm, rss); pte_unmap_unlock(orig_dst_pte, dst_ptl); cond_resched(); if (entry.val) { if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) return -ENOMEM; progress = 0; } if (addr != end) goto again; return 0; } static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pmd_t *src_pmd, *dst_pmd; unsigned long next; dst_pmd = pmd_alloc(dst_mm, dst_pud, addr); if (!dst_pmd) return -ENOMEM; src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma); err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd, addr, vma); if (err == -ENOMEM) return -ENOMEM; if (!err) continue; /* fall through */ } if (pmd_none_or_clear_bad(src_pmd)) continue; if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, vma, addr, next)) return -ENOMEM; } while (dst_pmd++, src_pmd++, addr = next, addr != end); return 0; } static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pud_t *src_pud, *dst_pud; unsigned long next; dst_pud = pud_alloc(dst_mm, dst_p4d, addr); if (!dst_pud) return -ENOMEM; src_pud = pud_offset(src_p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, vma); err = copy_huge_pud(dst_mm, src_mm, dst_pud, src_pud, addr, vma); if (err == -ENOMEM) return -ENOMEM; if (!err) continue; /* fall through */ } if (pud_none_or_clear_bad(src_pud)) continue; if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, vma, addr, next)) return -ENOMEM; } while (dst_pud++, src_pud++, addr = next, addr != end); return 0; } static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { p4d_t *src_p4d, *dst_p4d; unsigned long next; dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr); if (!dst_p4d) return -ENOMEM; src_p4d = p4d_offset(src_pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(src_p4d)) continue; if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d, vma, addr, next)) return -ENOMEM; } while (dst_p4d++, src_p4d++, addr = next, addr != end); return 0; } int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct vm_area_struct *vma) { pgd_t *src_pgd, *dst_pgd; unsigned long next; unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ bool is_cow; int ret; /* * Don't copy ptes where a page fault will fill them correctly. * Fork becomes much lighter when there are big shared or private * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && !vma->anon_vma) return 0; if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); if (unlikely(vma->vm_flags & VM_PFNMAP)) { /* * We do not free on error cases below as remove_vma * gets called on error from higher level routine */ ret = track_pfn_copy(vma); if (ret) return ret; } /* * We need to invalidate the secondary MMU mappings only when * there could be a permission downgrade on the ptes of the * parent mm. And a permission downgrade will only happen if * is_cow_mapping() returns true. */ is_cow = is_cow_mapping(vma->vm_flags); mmun_start = addr; mmun_end = end; if (is_cow) mmu_notifier_invalidate_range_start(src_mm, mmun_start, mmun_end); ret = 0; dst_pgd = pgd_offset(dst_mm, addr); src_pgd = pgd_offset(src_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd, vma, addr, next))) { ret = -ENOMEM; break; } } while (dst_pgd++, src_pgd++, addr = next, addr != end); if (is_cow) mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end); return ret; } /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { /* By default, zap all pages */ if (!details) return true; /* Or, we zap COWed pages only if the caller wants to */ return !details->check_mapping; } static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, struct zap_details *details) { struct mm_struct *mm = tlb->mm; int force_flush = 0; int rss[NR_MM_COUNTERS]; spinlock_t *ptl; pte_t *start_pte; pte_t *pte; swp_entry_t entry; tlb_remove_check_page_size_change(tlb, PAGE_SIZE); again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; if (pte_none(ptent)) continue; if (pte_present(ptent)) { struct page *page; page = _vm_normal_page(vma, addr, ptent, true); if (unlikely(details) && page) { /* * unmap_shared_mapping_pages() wants to * invalidate cache without truncating: * unmap shared but keep private pages. */ if (details->check_mapping && details->check_mapping != page_rmapping(page)) continue; } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; if (!PageAnon(page)) { if (pte_dirty(ptent)) { force_flush = 1; set_page_dirty(page); } if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); } rss[mm_counter(page)]--; page_remove_rmap(page, false); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); if (unlikely(__tlb_remove_page(tlb, page))) { force_flush = 1; addr += PAGE_SIZE; break; } continue; } entry = pte_to_swp_entry(ptent); if (non_swap_entry(entry) && is_device_private_entry(entry)) { struct page *page = device_private_entry_to_page(entry); if (unlikely(details && details->check_mapping)) { /* * unmap_shared_mapping_pages() wants to * invalidate cache without truncating: * unmap shared but keep private pages. */ if (details->check_mapping != page_rmapping(page)) continue; } pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; page_remove_rmap(page, false); put_page(page); continue; } entry = pte_to_swp_entry(ptent); if (!non_swap_entry(entry)) { /* Genuine swap entry, hence a private anon page */ if (!should_zap_cows(details)) continue; rss[MM_SWAPENTS]--; } else if (is_migration_entry(entry)) { struct page *page; page = migration_entry_to_page(entry); if (details && details->check_mapping && details->check_mapping != page_rmapping(page)) continue; rss[mm_counter(page)]--; } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, addr != end); add_mm_rss_vec(mm, rss); arch_leave_lazy_mmu_mode(); /* Do the actual TLB flush before dropping ptl */ if (force_flush) tlb_flush_mmu_tlbonly(tlb); pte_unmap_unlock(start_pte, ptl); /* * If we forced a TLB flush (either due to running out of * batch buffers or because we needed to flush dirty TLB * entries before releasing the ptl), free the batched * memory too. Restart if we didn't do everything. */ if (force_flush) { force_flush = 0; tlb_flush_mmu_free(tlb); if (addr != end) goto again; } return addr; } static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, struct zap_details *details) { pmd_t *pmd; unsigned long next; pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false, NULL); else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ } else if (details && details->single_page && PageTransCompound(details->single_page) && next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { spinlock_t *ptl = pmd_lock(tlb->mm, pmd); /* * Take and drop THP pmd lock so that we cannot return * prematurely, while zap_huge_pmd() has cleared *pmd, * but not yet decremented compound_mapcount(). */ spin_unlock(ptl); } /* * Here there can be other concurrent MADV_DONTNEED or * trans huge page faults running, and if the pmd is * none or trans huge it can change under us. This is * because MADV_DONTNEED holds the mmap_sem in read * mode. */ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) goto next; next = zap_pte_range(tlb, vma, pmd, addr, next, details); next: cond_resched(); } while (pmd++, addr = next, addr != end); return addr; } static inline unsigned long zap_pud_range(struct mmu_gather *tlb, struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, struct zap_details *details) { pud_t *pud; unsigned long next; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*pud) || pud_devmap(*pud)) { if (next - addr != HPAGE_PUD_SIZE) { VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma); split_huge_pud(vma, pud, addr); } else if (zap_huge_pud(tlb, vma, pud, addr)) goto next; /* fall through */ } if (pud_none_or_clear_bad(pud)) continue; next = zap_pmd_range(tlb, vma, pud, addr, next, details); next: cond_resched(); } while (pud++, addr = next, addr != end); return addr; } static inline unsigned long zap_p4d_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, struct zap_details *details) { p4d_t *p4d; unsigned long next; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; next = zap_pud_range(tlb, vma, p4d, addr, next, details); } while (p4d++, addr = next, addr != end); return addr; } void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details) { pgd_t *pgd; unsigned long next; BUG_ON(addr >= end); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); } static void unmap_single_vma(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) { unsigned long start = max(vma->vm_start, start_addr); unsigned long end; if (start >= vma->vm_end) return; end = min(vma->vm_end, end_addr); if (end <= vma->vm_start) return; if (vma->vm_file) uprobe_munmap(vma, start, end); if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn(vma, 0, 0); if (start != end) { if (unlikely(is_vm_hugetlb_page(vma))) { /* * It is undesirable to test vma->vm_file as it * should be non-null for valid hugetlb area. * However, vm_file will be NULL in the error * cleanup path of mmap_region. When * hugetlbfs ->mmap method fails, * mmap_region() nullifies vma->vm_file * before calling this function to clean up. * Since no pte has actually been setup, it is * safe to do nothing in this case. */ if (vma->vm_file) { i_mmap_lock_write(vma->vm_file->f_mapping); __unmap_hugepage_range_final(tlb, vma, start, end, NULL); i_mmap_unlock_write(vma->vm_file->f_mapping); } } else unmap_page_range(tlb, vma, start, end, details); } } /** * unmap_vmas - unmap a range of memory covered by a list of vma's * @tlb: address of the caller's struct mmu_gather * @vma: the starting vma * @start_addr: virtual address at which to start unmapping * @end_addr: virtual address at which to end unmapping * * Unmap all pages in the vma list. * * Only addresses between `start' and `end' will be unmapped. * * The VMA list must be sorted in ascending virtual address order. * * unmap_vmas() assumes that the caller will flush the whole unmapped address * range after unmap_vmas() returns. So the only responsibility here is to * ensure that any thus-far unmapped pages are flushed before unmap_vmas() * drops the lock and schedules. */ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr) { struct mm_struct *mm = vma->vm_mm; mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) unmap_single_vma(tlb, vma, start_addr, end_addr, NULL); mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); } /** * zap_page_range - remove user pages in a given range * @vma: vm_area_struct holding the applicable pages * @start: starting address of pages to zap * @size: number of bytes to zap * * Caller must protect the VMA list */ void zap_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long size) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; unsigned long end = start + size; lru_add_drain(); tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); for ( ; vma && vma->vm_start < end; vma = vma->vm_next) unmap_single_vma(&tlb, vma, start, end, NULL); mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); } /** * zap_page_range_single - remove user pages in a given range * @vma: vm_area_struct holding the applicable pages * @address: starting address of pages to zap * @size: number of bytes to zap * @details: details of shared cache invalidation * * The range must fit into one VMA. */ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; unsigned long end = address + size; lru_add_drain(); tlb_gather_mmu(&tlb, mm, address, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, address, end); unmap_single_vma(&tlb, vma, address, end, details); mmu_notifier_invalidate_range_end(mm, address, end); tlb_finish_mmu(&tlb, address, end); } /** * zap_vma_ptes - remove ptes mapping the vma * @vma: vm_area_struct holding ptes to be zapped * @address: starting address of pages to zap * @size: number of bytes to zap * * This function only unmaps ptes assigned to VM_PFNMAP vmas. * * The entire address range must be fully contained within the vma. * */ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size) { if (address < vma->vm_start || address + size > vma->vm_end || !(vma->vm_flags & VM_PFNMAP)) return; zap_page_range_single(vma, address, size, NULL); } EXPORT_SYMBOL_GPL(zap_vma_ptes); pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return NULL; pud = pud_alloc(mm, p4d, addr); if (!pud) return NULL; pmd = pmd_alloc(mm, pud, addr); if (!pmd) return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); return pte_alloc_map_lock(mm, pmd, addr, ptl); } /* * This is the old fallback for page remapping. * * For historical reasons, it only allows reserved pages. Only * old drivers should use this, and they needed to mark their * pages reserved for the old functions anyway. */ static int insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot) { struct mm_struct *mm = vma->vm_mm; int retval; pte_t *pte; spinlock_t *ptl; retval = -EINVAL; if (PageAnon(page)) goto out; retval = -ENOMEM; flush_dcache_page(page); pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; retval = -EBUSY; if (!pte_none(*pte)) goto out_unlock; /* Ok, finally just insert the thing.. */ get_page(page); inc_mm_counter_fast(mm, mm_counter_file(page)); page_add_file_rmap(page, false); set_pte_at(mm, addr, pte, mk_pte(page, prot)); retval = 0; pte_unmap_unlock(pte, ptl); return retval; out_unlock: pte_unmap_unlock(pte, ptl); out: return retval; } /** * vm_insert_page - insert single page into user vma * @vma: user vma to map to * @addr: target user address of this page * @page: source kernel page * * This allows drivers to insert individual pages they've allocated * into a user vma. * * The page has to be a nice clean _individual_ kernel allocation. * If you allocate a compound page, you need to have marked it as * such (__GFP_COMP), or manually just split the page up yourself * (see split_page()). * * NOTE! Traditionally this was done with "remap_pfn_range()" which * took an arbitrary page protection parameter. This doesn't allow * that. Your vma protection will have to be set up correctly, which * means that if you want a shared writable mapping, you'd better * ask for a shared writable mapping! * * The page does not need to be reserved. * * Usually this function is called from f_op->mmap() handler * under mm->mmap_sem write-lock, so it can change vma->vm_flags. * Caller must set VM_MIXEDMAP on vma if it wants to call this * function from other places, for example from page-fault handler. */ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page) { if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; if (!page_count(page)) return -EINVAL; if (!(vma->vm_flags & VM_MIXEDMAP)) { BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem)); BUG_ON(vma->vm_flags & VM_PFNMAP); vma->vm_flags |= VM_MIXEDMAP; } return insert_page(vma, addr, page, vma->vm_page_prot); } EXPORT_SYMBOL(vm_insert_page); static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, pgprot_t prot, bool mkwrite) { struct mm_struct *mm = vma->vm_mm; int retval; pte_t *pte, entry; spinlock_t *ptl; retval = -ENOMEM; pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; retval = -EBUSY; if (!pte_none(*pte)) { if (mkwrite) { /* * For read faults on private mappings the PFN passed * in may not match the PFN we have mapped if the * mapped PFN is a writeable COW page. In the mkwrite * case we are creating a writable PTE for a shared * mapping and we expect the PFNs to match. If they * don't match, we are likely racing with block * allocation and mapping invalidation so just skip the * update. */ if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) { WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); goto out_unlock; } entry = pte_mkyoung(*pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (ptep_set_access_flags(vma, addr, pte, entry, 1)) update_mmu_cache(vma, addr, pte); } goto out_unlock; } /* Ok, finally just insert the thing.. */ if (pfn_t_devmap(pfn)) entry = pte_mkdevmap(pfn_t_pte(pfn, prot)); else entry = pte_mkspecial(pfn_t_pte(pfn, prot)); if (mkwrite) { entry = pte_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); } set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ retval = 0; out_unlock: pte_unmap_unlock(pte, ptl); out: return retval; } /** * vm_insert_pfn - insert single pfn into user vma * @vma: user vma to map to * @addr: target user address of this page * @pfn: source kernel pfn * * Similar to vm_insert_page, this allows drivers to insert individual pages * they've allocated into a user vma. Same comments apply. * * This function should only be called from a vm_ops->fault handler, and * in that case the handler should return NULL. * * vma cannot be a COW mapping. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. */ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); } EXPORT_SYMBOL(vm_insert_pfn); /** * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot * @vma: user vma to map to * @addr: target user address of this page * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * * This is exactly like vm_insert_pfn, except that it allows drivers to * to override pgprot on a per-page basis. * * This only makes sense for IO mappings, and it makes no sense for * cow mappings. In general, using multiple vmas is preferable; * vm_insert_pfn_prot should only be used if using multiple VMAs is * impractical. */ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot) { int ret; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like * consistency in testing and feature parity among all, so we should * try to keep these invariants in place for everybody. */ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == (VM_PFNMAP|VM_MIXEDMAP)); BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; if (!pfn_modify_allowed(pfn, pgprot)) return -EACCES; track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, false); return ret; } EXPORT_SYMBOL(vm_insert_pfn_prot); static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) { /* these checks mirror the abort conditions in vm_normal_page */ if (vma->vm_flags & VM_MIXEDMAP) return true; if (pfn_t_devmap(pfn)) return true; if (pfn_t_special(pfn)) return true; if (is_zero_pfn(pfn_t_to_pfn(pfn))) return true; return false; } static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, bool mkwrite) { pgprot_t pgprot = vma->vm_page_prot; BUG_ON(!vm_mixed_ok(vma, pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; track_pfn_insert(vma, &pgprot, pfn); if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) return -EACCES; /* * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* * refcount the page if pfn_valid is true (hence insert_page rather * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP * without pte special, it would there be refcounted as a normal page. */ if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { struct page *page; /* * At this point we are committed to insert_page() * regardless of whether the caller specified flags that * result in pfn_t_has_page() == false. */ page = pfn_to_page(pfn_t_to_pfn(pfn)); return insert_page(vma, addr, page, pgprot); } return insert_pfn(vma, addr, pfn, pgprot, mkwrite); } int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { return __vm_insert_mixed(vma, addr, pfn, false); } EXPORT_SYMBOL(vm_insert_mixed); /* * If the insertion of PTE failed because someone else already added a * different entry in the mean time, we treat that as success as we assume * the same entry was actually inserted. */ vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { int err; err = __vm_insert_mixed(vma, addr, pfn, true); if (err == -ENOMEM) return VM_FAULT_OOM; if (err < 0 && err != -EBUSY) return VM_FAULT_SIGBUS; return VM_FAULT_NOPAGE; } EXPORT_SYMBOL(vmf_insert_mixed_mkwrite); /* * maps a range of physical memory into the requested pages. the old * mappings are removed. any references to nonexistent pages results * in null mappings (currently treated as "copy-on-access") */ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { pte_t *pte, *mapped_pte; spinlock_t *ptl; int err = 0; mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); if (!pfn_modify_allowed(pfn, prot)) { err = -EACCES; break; } set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(mapped_pte, ptl); return err; } static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { pmd_t *pmd; unsigned long next; int err; pfn -= addr >> PAGE_SHIFT; pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; VM_BUG_ON(pmd_trans_huge(*pmd)); do { next = pmd_addr_end(addr, end); err = remap_pte_range(mm, pmd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) return err; } while (pmd++, addr = next, addr != end); return 0; } static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { pud_t *pud; unsigned long next; int err; pfn -= addr >> PAGE_SHIFT; pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end); err = remap_pmd_range(mm, pud, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) return err; } while (pud++, addr = next, addr != end); return 0; } static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { p4d_t *p4d; unsigned long next; int err; pfn -= addr >> PAGE_SHIFT; p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return -ENOMEM; do { next = p4d_addr_end(addr, end); err = remap_pud_range(mm, p4d, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) return err; } while (p4d++, addr = next, addr != end); return 0; } /** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to * @addr: target user address to start at * @pfn: physical address of kernel memory * @size: size of map area * @prot: page protection flags for this mapping * * Note: this is only safe if the mm semaphore is held when called. */ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { pgd_t *pgd; unsigned long next; unsigned long end = addr + PAGE_ALIGN(size); struct mm_struct *mm = vma->vm_mm; unsigned long remap_pfn = pfn; int err; /* * Physically remapped pages are special. Tell the * rest of the world about it: * VM_IO tells people not to look at these pages * (accesses can have side effects). * VM_PFNMAP tells the core MM that the base pages are just * raw PFN mappings, and do not have a "struct page" associated * with them. * VM_DONTEXPAND * Disable vma merging and expanding with mremap(). * VM_DONTDUMP * Omit vma from core dump, even when VM_IO turned off. * * There's a horrible special case to handle copy-on-write * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". * See vm_normal_page() for details. */ if (is_cow_mapping(vma->vm_flags)) { if (addr != vma->vm_start || end != vma->vm_end) return -EINVAL; vma->vm_pgoff = pfn; } err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size)); if (err) return -EINVAL; vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); err = remap_p4d_range(mm, pgd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) break; } while (pgd++, addr = next, addr != end); if (err) untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size)); return err; } EXPORT_SYMBOL(remap_pfn_range); /** * vm_iomap_memory - remap memory to userspace * @vma: user vma to map to * @start: start of area * @len: size of area * * This is a simplified io_remap_pfn_range() for common driver use. The * driver just needs to give us the physical memory range to be mapped, * we'll figure out the rest from the vma information. * * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get * whatever write-combining details or similar. */ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) { unsigned long vm_len, pfn, pages; /* Check that the physical memory area passed in looks valid */ if (start + len < start) return -EINVAL; /* * You *really* shouldn't map things that aren't page-aligned, * but we've historically allowed it because IO memory might * just have smaller alignment. */ len += start & ~PAGE_MASK; pfn = start >> PAGE_SHIFT; pages = (len + ~PAGE_MASK) >> PAGE_SHIFT; if (pfn + pages < pfn) return -EINVAL; /* We start the mapping 'vm_pgoff' pages into the area */ if (vma->vm_pgoff > pages) return -EINVAL; pfn += vma->vm_pgoff; pages -= vma->vm_pgoff; /* Can we fit all of the mapping? */ vm_len = vma->vm_end - vma->vm_start; if (vm_len >> PAGE_SHIFT > pages) return -EINVAL; /* Ok, let it rip */ return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); } EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); pte = (mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); token = pmd_pgtable(*pmd); do { err = fn(pte++, token, addr, data); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); if (mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) { pmd_t *pmd; unsigned long next; int err; BUG_ON(pud_huge(*pud)); pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; do { next = pmd_addr_end(addr, end); err = apply_to_pte_range(mm, pmd, addr, next, fn, data); if (err) break; } while (pmd++, addr = next, addr != end); return err; } static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) { pud_t *pud; unsigned long next; int err; pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end); err = apply_to_pmd_range(mm, pud, addr, next, fn, data); if (err) break; } while (pud++, addr = next, addr != end); return err; } static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) { p4d_t *p4d; unsigned long next; int err; p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return -ENOMEM; do { next = p4d_addr_end(addr, end); err = apply_to_pud_range(mm, p4d, addr, next, fn, data); if (err) break; } while (p4d++, addr = next, addr != end); return err; } /* * Scan a region of virtual memory, filling in page tables as necessary * and calling a provided function on each leaf page table. */ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, unsigned long size, pte_fn_t fn, void *data) { pgd_t *pgd; unsigned long next; unsigned long end = addr + size; int err; if (WARN_ON(addr >= end)) return -EINVAL; pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); if (err) break; } while (pgd++, addr = next, addr != end); return err; } EXPORT_SYMBOL_GPL(apply_to_page_range); /* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures * or configurations (e.g. i386 with PAE) which might give a mix of unmatched * parts, do_swap_page must check under lock before unmapping the pte and * proceeding (but do_wp_page is only called after already making such a check; * and do_anonymous_page can safely check later on). */ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, pte_t *page_table, pte_t orig_pte) { int same = 1; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) if (sizeof(pte_t) > sizeof(unsigned long)) { spinlock_t *ptl = pte_lockptr(mm, pmd); spin_lock(ptl); same = pte_same(*page_table, orig_pte); spin_unlock(ptl); } #endif pte_unmap(page_table); return same; } static inline bool cow_user_page(struct page *dst, struct page *src, struct vm_fault *vmf) { bool ret; void *kaddr; void __user *uaddr; bool locked = false; struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; debug_dma_assert_idle(src); if (likely(src)) { copy_user_highpage(dst, src, addr, vma); return true; } /* * If the source page was a PFN mapping, we don't have * a "struct page" for it. We do a best-effort copy by * just copying from the original user address. If that * fails, we just zero-fill it. Live with it. */ kaddr = kmap_atomic(dst); uaddr = (void __user *)(addr & PAGE_MASK); /* * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* * Other thread has already handled the fault * and we don't need to do anything. If it's * not the case, the fault will be triggered * again on the same address. */ ret = false; goto pte_unlock; } entry = pte_mkyoung(vmf->orig_pte); if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) update_mmu_cache(vma, addr, vmf->pte); } /* * This really shouldn't fail, because the page is there * in the page tables. But it might just be unreadable, * in which case we just give up and fill the result with * zeroes. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { if (locked) goto warn; /* Re-validate under PTL if the page is still mapped */ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* The PTE changed under us. Retry page fault. */ ret = false; goto pte_unlock; } /* * The same page can be mapped back since last copy attampt. * Try to copy again under PTL. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { /* * Give a warn in case there can be some obscure * use-case */ warn: WARN_ON_ONCE(1); clear_page(kaddr); } } ret = true; pte_unlock: if (locked) pte_unmap_unlock(vmf->pte, vmf->ptl); kunmap_atomic(kaddr); flush_dcache_page(dst); return ret; } static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) { struct file *vm_file = vma->vm_file; if (vm_file) return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; /* * Special mappings (e.g. VDSO) do not have any file so fake * a default GFP_KERNEL for them. */ return GFP_KERNEL; } /* * Notify the address space that the page is about to become writable so that * it can prohibit this or wait for the page to get into an appropriate state. * * We do this without the lock held, so that it can sleep if it needs to. */ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) { vm_fault_t ret; struct page *page = vmf->page; unsigned int old_flags = vmf->flags; vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); if (!page->mapping) { unlock_page(page); return 0; /* retry */ } ret |= VM_FAULT_LOCKED; } else VM_BUG_ON_PAGE(!PageLocked(page), page); return ret; } /* * Handle dirtying of a page in shared file mapping on a write fault. * * The function expects the page to be locked and unlocks it. */ static void fault_dirty_shared_page(struct vm_area_struct *vma, struct page *page) { struct address_space *mapping; bool dirtied; bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; dirtied = set_page_dirty(page); VM_BUG_ON_PAGE(PageAnon(page), page); /* * Take a local copy of the address_space - page.mapping may be zeroed * by truncate after unlock_page(). The address_space itself remains * pinned by vma->vm_file's reference. We rely on unlock_page()'s * release semantics to prevent the compiler from undoing this copying. */ mapping = page_rmapping(page); unlock_page(page); if ((dirtied || page_mkwrite) && mapping) { /* * Some device drivers do not set page.mapping * but still dirty their pages */ balance_dirty_pages_ratelimited(mapping); } if (!page_mkwrite) file_update_time(vma->vm_file); } /* * Handle write page faults for pages that can be reused in the current vma * * This can happen either due to the mapping being with the VM_SHARED flag, * or due to us being the last reference standing to the page. In either * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ static inline void wp_page_reuse(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; pte_t entry; /* * Clear the pages cpupid information as the existing * information potentially belongs to a now completely * unrelated process. */ if (page) page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = pte_mkyoung(vmf->orig_pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) update_mmu_cache(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); } /* * Handle the case of a page which we actually need to copy to a new page. * * Called with mmap_sem locked and the old page referenced, but * without the ptl held. * * High level logic flow: * * - Allocate a page, copy the content of the old page to the new one. * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc. * - Take the PTL. If the pte changed, bail out and release the allocated page * - If the pte is still the way we remember it, update the page table and all * relevant references. This includes dropping the reference the page-table * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ static vm_fault_t wp_page_copy(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; struct page *old_page = vmf->page; struct page *new_page = NULL; pte_t entry; int page_copied = 0; const unsigned long mmun_start = vmf->address & PAGE_MASK; const unsigned long mmun_end = mmun_start + PAGE_SIZE; struct mem_cgroup *memcg; if (unlikely(anon_vma_prepare(vma))) goto oom; if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { new_page = alloc_zeroed_user_highpage_movable(vma, vmf->address); if (!new_page) goto oom; } else { new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!new_page) goto oom; if (!cow_user_page(new_page, old_page, vmf)) { /* * COW failed, if the fault was solved by other, * it's fine. If not, userspace would re-fault on * the same address and we will handle the fault * from the second attempt. */ put_page(new_page); if (old_page) put_page(old_page); return 0; } } if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false)) goto oom_free_new; __SetPageUptodate(new_page); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); /* * Re-check the pte - we dropped the lock */ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { dec_mm_counter_fast(mm, mm_counter_file(old_page)); inc_mm_counter_fast(mm, MM_ANONPAGES); } } else { inc_mm_counter_fast(mm, MM_ANONPAGES); } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* * Clear the pte entry and flush it first, before updating the * pte with the new entry. This will avoid a race condition * seen in the presence of one thread doing SMC and another * thread doing COW. */ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); page_add_new_anon_rmap(new_page, vma, vmf->address, false); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary * mmu page tables (such as kvm shadow page tables), we want the * new page to be mapped directly into the secondary page table. */ set_pte_at_notify(mm, vmf->address, vmf->pte, entry); update_mmu_cache(vma, vmf->address, vmf->pte); if (old_page) { /* * Only after switching the pte to the new page may * we remove the mapcount here. Otherwise another * process may come and find the rmap count decremented * before the pte is switched to the new page, and * "reuse" the old page writing into it while our pte * here still points into it and can be read by other * threads. * * The critical issue is to order this * page_remove_rmap with the ptp_clear_flush above. * Those stores are ordered by (if nothing else,) * the barrier present in the atomic_add_negative * in page_remove_rmap. * * Then the TLB flush in ptep_clear_flush ensures that * no process can access the old page before the * decremented mapcount is visible. And the old page * cannot be reused until after the decremented * mapcount is visible. So transitively, TLBs to * old page will be flushed before it can be reused. */ page_remove_rmap(old_page, false); } /* Free the old page.. */ new_page = old_page; page_copied = 1; } else { mem_cgroup_cancel_charge(new_page, memcg, false); } if (new_page) put_page(new_page); pte_unmap_unlock(vmf->pte, vmf->ptl); /* * No need to double call mmu_notifier->invalidate_range() callback as * the above ptep_clear_flush_notify() did already call it. */ mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end); if (old_page) { /* * Don't let another task, with possibly unlocked vma, * keep the mlocked page. */ if (page_copied && (vma->vm_flags & VM_LOCKED)) { lock_page(old_page); /* LRU manipulation */ if (PageMlocked(old_page)) munlock_vma_page(old_page); unlock_page(old_page); } put_page(old_page); } return page_copied ? VM_FAULT_WRITE : 0; oom_free_new: put_page(new_page); oom: if (old_page) put_page(old_page); return VM_FAULT_OOM; } /** * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE * writeable once the page is prepared * * @vmf: structure describing the fault * * This function handles all that is needed to finish a write page fault in a * shared mapping due to PTE being read-only once the mapped page is prepared. * It handles locking of PTE and modifying it. The function returns * VM_FAULT_WRITE on success, 0 when PTE got changed before we acquired PTE * lock. * * The function expects the page to be locked or other protection against * concurrent faults / writeback (such as DAX radix tree locks). */ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) { WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); /* * We might have raced with another page fault while we released the * pte_offset_map_lock. */ if (!pte_same(*vmf->pte, vmf->orig_pte)) { pte_unmap_unlock(vmf->pte, vmf->ptl); return VM_FAULT_NOPAGE; } wp_page_reuse(vmf); return 0; } /* * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping */ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { vm_fault_t ret; pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) return ret; return finish_mkwrite_fault(vmf); } wp_page_reuse(vmf); return VM_FAULT_WRITE; } static vm_fault_t wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; get_page(vmf->page); if (vma->vm_ops && vma->vm_ops->page_mkwrite) { vm_fault_t tmp; pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(vmf->page); return tmp; } tmp = finish_mkwrite_fault(vmf); if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { unlock_page(vmf->page); put_page(vmf->page); return tmp; } } else { wp_page_reuse(vmf); lock_page(vmf->page); } fault_dirty_shared_page(vma, vmf->page); put_page(vmf->page); return VM_FAULT_WRITE; } /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ static vm_fault_t do_wp_page(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); if (!vmf->page) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a * VM_PFNMAP VMA. * * We should not cow pages in a shared writeable mapping. * Just mark the pages writable and/or call ops->pfn_mkwrite. */ if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) return wp_pfn_shared(vmf); pte_unmap_unlock(vmf->pte, vmf->ptl); return wp_page_copy(vmf); } /* * Take out anonymous pages first, anonymous shared vmas are * not dirty accountable. */ if (PageAnon(vmf->page) && !PageKsm(vmf->page)) { int total_map_swapcount; if (!trylock_page(vmf->page)) { get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); lock_page(vmf->page); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_same(*vmf->pte, vmf->orig_pte)) { unlock_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(vmf->page); return 0; } put_page(vmf->page); } if (reuse_swap_page(vmf->page, &total_map_swapcount)) { if (total_map_swapcount == 1) { /* * The page is all ours. Move it to * our anon_vma so the rmap code will * not search our parent or siblings. * Protected against the rmap code by * the page lock. */ page_move_anon_rmap(vmf->page, vma); } unlock_page(vmf->page); wp_page_reuse(vmf); return VM_FAULT_WRITE; } unlock_page(vmf->page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { return wp_page_shared(vmf); } /* * Ok, we need to copy. Oh, well.. */ get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); return wp_page_copy(vmf); } static void unmap_mapping_range_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) { zap_page_range_single(vma, start_addr, end_addr - start_addr, details); } static inline void unmap_mapping_range_tree(struct rb_root_cached *root, struct zap_details *details) { struct vm_area_struct *vma; pgoff_t vba, vea, zba, zea; vma_interval_tree_foreach(vma, root, details->first_index, details->last_index) { vba = vma->vm_pgoff; vea = vba + vma_pages(vma) - 1; zba = details->first_index; if (zba < vba) zba = vba; zea = details->last_index; if (zea > vea) zea = vea; unmap_mapping_range_vma(vma, ((zba - vba) << PAGE_SHIFT) + vma->vm_start, ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, details); } } /** * unmap_mapping_page() - Unmap single page from processes. * @page: The locked page to be unmapped. * * Unmap this page from any userspace process which still has it mmaped. * Typically, for efficiency, the range of nearby pages has already been * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once * truncation or invalidation holds the lock on a page, it may find that * the page has been remapped again: and then uses unmap_mapping_page() * to unmap it finally. */ void unmap_mapping_page(struct page *page) { struct address_space *mapping = page->mapping; struct zap_details details = { }; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageTail(page)); details.check_mapping = mapping; details.first_index = page->index; details.last_index = page->index + hpage_nr_pages(page) - 1; details.single_page = page; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } /** * unmap_mapping_pages() - Unmap pages from processes. * @mapping: The address space containing pages to be unmapped. * @start: Index of first page to be unmapped. * @nr: Number of pages to be unmapped. 0 to unmap to end of file. * @even_cows: Whether to unmap even private COWed pages. * * Unmap the pages in this address space from any userspace process which * has them mmaped. Generally, you want to remove COWed pages as well when * a file is being truncated, but not when invalidating pages from the page * cache. */ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { struct zap_details details = { }; details.check_mapping = even_cows ? NULL : mapping; details.first_index = start; details.last_index = start + nr - 1; if (details.last_index < details.first_index) details.last_index = ULONG_MAX; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } /** * unmap_mapping_range - unmap the portion of all mmaps in the specified * address_space corresponding to the specified byte range in the underlying * file. * * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE * boundary. Note that this is different from truncate_pagecache(), which * must keep the partial page. In contrast, we must get rid of * partial pages. * @holelen: size of prospective hole in bytes. This will be rounded * up to a PAGE_SIZE boundary. A holelen of zero truncates to the * end of the file. * @even_cows: 1 when truncating a file, unmap even private COWed pages; * but 0 when invalidating pagecache, don't throw away private data. */ void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows) { pgoff_t hba = holebegin >> PAGE_SHIFT; pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* Check for overflow. */ if (sizeof(holelen) > sizeof(hlen)) { long long holeend = (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; if (holeend & ~(long long)ULONG_MAX) hlen = ULONG_MAX - hba + 1; } unmap_mapping_pages(mapping, hba, hlen, even_cows); } EXPORT_SYMBOL(unmap_mapping_range); /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with pte unmapped and unlocked. * * We return with the mmap_sem locked or unlocked in the same cases * as does filemap_fault(). */ vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; struct mem_cgroup *memcg; swp_entry_t entry; pte_t pte; int locked; int exclusive = 0; vm_fault_t ret = 0; if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; entry = pte_to_swp_entry(vmf->orig_pte); if (unlikely(non_swap_entry(entry))) { if (is_migration_entry(entry)) { migration_entry_wait(vma->vm_mm, vmf->pmd, vmf->address); } else if (is_device_private_entry(entry)) { /* * For un-addressable device memory we call the pgmap * fault handler callback. The callback must migrate * the page back to some CPU accessible page. */ ret = device_private_entry_fault(vma, vmf->address, entry, vmf->flags, vmf->pmd); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); ret = VM_FAULT_SIGBUS; } goto out; } delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry, vma, vmf->address); swapcache = page; if (!page) { struct swap_info_struct *si = swp_swap_info(entry); if (si->flags & SWP_SYNCHRONOUS_IO && __swap_count(si, entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (page) { __SetPageLocked(page); __SetPageSwapBacked(page); set_page_private(page, entry.val); lru_cache_add_anon(page); swap_readpage(page, true); } } else { page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vmf); swapcache = page; } if (!page) { /* * Back out if somebody else faulted in this pte * while we released the pte lock. */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ret = VM_FAULT_OOM; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); goto unlock; } /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); } else if (PageHWPoison(page)) { /* * hwpoisoned dirty swapcache pages are kept for killing * owner processes (which may be unknown at hwpoison time) */ ret = VM_FAULT_HWPOISON; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); goto out_release; } locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); if (!locked) { ret |= VM_FAULT_RETRY; goto out_release; } /* * Make sure try_to_free_swap or reuse_swap_page or swapoff did not * release the swapcache from under us. The page pin, and pte_same * test below, are not enough to exclude that. Even if it is still * swapcache, we need to check that the page's swap has not changed. */ if (unlikely((!PageSwapCache(page) || page_private(page) != entry.val)) && swapcache) goto out_page; page = ksm_might_need_to_copy(page, vma, vmf->address); if (unlikely(!page)) { ret = VM_FAULT_OOM; page = swapcache; goto out_page; } if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) { ret = VM_FAULT_OOM; goto out_page; } /* * Back out if somebody else already faulted in this pte. */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) goto out_nomap; if (unlikely(!PageUptodate(page))) { ret = VM_FAULT_SIGBUS; goto out_nomap; } /* * The page isn't present yet, go ahead with the fault. * * Be careful about the sequence of operations here. * To get its accounting right, reuse_swap_page() must be called * while the page is counted on swap but not yet in mapcount i.e. * before page_add_anon_rmap() and swap_free(); try_to_free_swap() * must be called after the swap_free(), or it will never succeed. */ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); vmf->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; exclusive = RMAP_EXCLUSIVE; } flush_icache_page(vma, page); if (pte_swp_soft_dirty(vmf->orig_pte)) pte = pte_mksoft_dirty(pte); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); vmf->orig_pte = pte; /* ksm created a completely new copy */ if (unlikely(page != swapcache && swapcache)) { page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); } else { do_page_add_anon_rmap(page, vma, vmf->address, exclusive); mem_cgroup_commit_charge(page, memcg, true, false); activate_page(page); } swap_free(entry); if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); if (page != swapcache && swapcache) { /* * Hold the lock to avoid the swap entry to be reused * until we take the PT lock for the pte_same() check * (to avoid false positives from pte_same). For * further safety release the lock after the swap_free * so that the swap count won't change under a * parallel locked swapcache. */ unlock_page(swapcache); put_page(swapcache); } if (vmf->flags & FAULT_FLAG_WRITE) { ret |= do_wp_page(vmf); if (ret & VM_FAULT_ERROR) ret &= VM_FAULT_ERROR; goto out; } /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, vmf->address, vmf->pte); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: return ret; out_nomap: mem_cgroup_cancel_charge(page, memcg, false); pte_unmap_unlock(vmf->pte, vmf->ptl); out_page: unlock_page(page); out_release: put_page(page); if (page != swapcache && swapcache) { unlock_page(swapcache); put_page(swapcache); } return ret; } /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; struct page *page; vm_fault_t ret = 0; pte_t entry; /* File mapping without ->vm_ops ? */ if (vma->vm_flags & VM_SHARED) return VM_FAULT_SIGBUS; /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created * from a different thread. * * pte_alloc_map() is safe to use under down_write(mmap_sem) or when * parallel threads are excluded by other means. * * Here we only have down_read(mmap_sem). */ if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address)) return VM_FAULT_OOM; /* See the comment in pte_alloc_one_map() */ if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_none(*vmf->pte)) goto unlock; ret = check_stable_address_space(vma->vm_mm); if (ret) goto unlock; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); return handle_userfault(vmf, VM_UFFD_MISSING); } goto setpte; } /* Allocate our own private page. */ if (unlikely(anon_vma_prepare(vma))) goto oom; page = alloc_zeroed_user_highpage_movable(vma, vmf->address); if (!page) goto oom; if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) goto oom_free_page; /* * The memory barrier inside __SetPageUptodate makes sure that * preceeding stores to the page contents become visible before * the set_pte_at() write. */ __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_none(*vmf->pte)) goto release; ret = check_stable_address_space(vma->vm_mm); if (ret) goto release; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); mem_cgroup_cancel_charge(page, memcg, false); put_page(page); return handle_userfault(vmf, VM_UFFD_MISSING); } inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); setpte: set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, vmf->address, vmf->pte); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; release: mem_cgroup_cancel_charge(page, memcg, false); put_page(page); goto unlock; oom_free_page: put_page(page); oom: return VM_FAULT_OOM; } /* * The mmap_sem must have been held on entry, and may have been * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ static vm_fault_t __do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; /* * Preallocate pte before we take page_lock because this might lead to * deadlocks for memcg reclaim which waits for pages under writeback: * lock_page(A) * SetPageWriteback(A) * unlock_page(A) * lock_page(B) * lock_page(B) * pte_alloc_pne * shrink_page_list * wait_on_page_writeback(A) * SetPageWriteback(B) * unlock_page(B) * # flush A, B to clear the writeback */ if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm, vmf->address); if (!vmf->prealloc_pte) return VM_FAULT_OOM; smp_wmb(); /* See comment in __pte_alloc() */ } ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) return ret; if (unlikely(PageHWPoison(vmf->page))) { struct page *page = vmf->page; vm_fault_t poisonret = VM_FAULT_HWPOISON; if (ret & VM_FAULT_LOCKED) { if (page_mapped(page)) unmap_mapping_pages(page_mapping(page), page->index, 1, false); /* Retry if a clean page was removed from the cache. */ if (invalidate_inode_page(page)) poisonret = VM_FAULT_NOPAGE; unlock_page(page); } put_page(page); vmf->page = NULL; return poisonret; } if (unlikely(!(ret & VM_FAULT_LOCKED))) lock_page(vmf->page); else VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); return ret; } /* * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. * If we check pmd_trans_unstable() first we will trip the bad_pmd() check * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. */ static int pmd_devmap_trans_unstable(pmd_t *pmd) { return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); } static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; if (!pmd_none(*vmf->pmd)) goto map_pte; if (vmf->prealloc_pte) { vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_none(*vmf->pmd))) { spin_unlock(vmf->ptl); goto map_pte; } mm_inc_nr_ptes(vma->vm_mm); pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); spin_unlock(vmf->ptl); vmf->prealloc_pte = NULL; } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) { return VM_FAULT_OOM; } map_pte: /* * If a huge pmd materialized under us just retry later. Use * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge * under us and then back to pmd_none, as a result of MADV_DONTNEED * running immediately after a huge pmd fault in a different thread of * this mm, in turn leading to a misleading pmd_trans_huge() retval. * All we have to ensure is that it is a regular pmd that we can walk * with pte_offset_map() and we can do that through an atomic read in * C, which is what pmd_trans_unstable() provides. */ if (pmd_devmap_trans_unstable(vmf->pmd)) return VM_FAULT_NOPAGE; /* * At this point we know that our vmf->pmd points to a page of ptes * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() * for the duration of the fault. If a racing MADV_DONTNEED runs and * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still * be valid and we will re-check to make sure the vmf->pte isn't * pte_none() under vmf->ptl protection when we return to * alloc_set_pte(). */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); return 0; } #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE #define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long haddr) { if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) != (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK)) return false; if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) return false; return true; } static void deposit_prealloc_pte(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); /* * We are going to consume the prealloc table, * count that as nr_ptes. */ mm_inc_nr_ptes(vma->vm_mm); vmf->prealloc_pte = NULL; } static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; pmd_t entry; int i; vm_fault_t ret; if (!transhuge_vma_suitable(vma, haddr)) return VM_FAULT_FALLBACK; ret = VM_FAULT_FALLBACK; page = compound_head(page); /* * Archs like ppc64 need additonal space to store information * related to pte entry. Use the preallocated table for that. */ if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address); if (!vmf->prealloc_pte) return VM_FAULT_OOM; smp_wmb(); /* See comment in __pte_alloc() */ } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_none(*vmf->pmd))) goto out; for (i = 0; i < HPAGE_PMD_NR; i++) flush_icache_page(vma, page + i); entry = mk_huge_pmd(page, vma->vm_page_prot); if (write) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); page_add_file_rmap(page, true); /* * deposit and withdraw with pmd lock held */ if (arch_needs_pgtable_deposit()) deposit_prealloc_pte(vmf); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); update_mmu_cache_pmd(vma, haddr, vmf->pmd); /* fault is handled */ ret = 0; count_vm_event(THP_FILE_MAPPED); out: spin_unlock(vmf->ptl); return ret; } #else static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { BUILD_BUG(); return 0; } #endif /** * alloc_set_pte - setup new PTE entry for given page and add reverse page * mapping. If needed, the fucntion allocates page table or use pre-allocated. * * @vmf: fault environment * @memcg: memcg to charge page (only for private mappings) * @page: page to map * * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on * return. * * Target users are page handler itself and implementations of * vm_ops->map_pages. */ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; pte_t entry; vm_fault_t ret; if (pmd_none(*vmf->pmd) && PageTransCompound(page) && IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { /* THP on COW? */ VM_BUG_ON_PAGE(memcg, page); ret = do_set_pmd(vmf, page); if (ret != VM_FAULT_FALLBACK) return ret; } if (!vmf->pte) { ret = pte_alloc_one_map(vmf); if (ret) return ret; } /* Re-check under ptl */ if (unlikely(!pte_none(*vmf->pte))) return VM_FAULT_NOPAGE; flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); } set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, vmf->address, vmf->pte); return 0; } /** * finish_fault - finish page fault once we have prepared the page to fault * * @vmf: structure describing the fault * * This function handles all that is needed to finish a page fault once the * page to fault in is prepared. It handles locking of PTEs, inserts PTE for * given page, adds reverse page mapping, handles memcg charges and LRU * addition. The function returns 0 on success, VM_FAULT_ code in case of * error. * * The function expects the page to be locked and on success it consumes a * reference of a page being mapped (for the PTE which maps it). */ vm_fault_t finish_fault(struct vm_fault *vmf) { struct page *page; vm_fault_t ret = 0; /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && !(vmf->vma->vm_flags & VM_SHARED)) page = vmf->cow_page; else page = vmf->page; /* * check even for read faults because we might have lost our CoWed * page */ if (!(vmf->vma->vm_flags & VM_SHARED)) ret = check_stable_address_space(vmf->vma->vm_mm); if (!ret) ret = alloc_set_pte(vmf, vmf->memcg, page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; } static unsigned long fault_around_bytes __read_mostly = rounddown_pow_of_two(65536); #ifdef CONFIG_DEBUG_FS static int fault_around_bytes_get(void *data, u64 *val) { *val = fault_around_bytes; return 0; } /* * fault_around_bytes must be rounded down to the nearest page order as it's * what do_fault_around() expects to see. */ static int fault_around_bytes_set(void *data, u64 val) { if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; if (val > PAGE_SIZE) fault_around_bytes = rounddown_pow_of_two(val); else fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */ return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops, fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; ret = debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL, &fault_around_bytes_fops); if (!ret) pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); #endif /* * do_fault_around() tries to map few pages around the fault address. The hope * is that the pages will be needed soon and this will lower the number of * faults to handle. * * It uses vm_ops->map_pages() to map the pages, which skips the page if it's * not ready to be mapped: not up-to-date, locked, etc. * * This function is called with the page table lock taken. In the split ptlock * case the page table lock only protects only those entries which belong to * the page table corresponding to the fault address. * * This function doesn't cross the VMA boundaries, in order to call map_pages() * only once. * * fault_around_bytes defines how many bytes we'll try to map. * do_fault_around() expects it to be set to a power of two less than or equal * to PTRS_PER_PTE. * * The virtual address of the area that we map is naturally aligned to * fault_around_bytes rounded down to the machine page size * (and therefore to page order). This way it's easier to guarantee * that we don't cross page table boundaries. */ static vm_fault_t do_fault_around(struct vm_fault *vmf) { unsigned long address = vmf->address, nr_pages, mask; pgoff_t start_pgoff = vmf->pgoff; pgoff_t end_pgoff; int off; vm_fault_t ret = 0; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; vmf->address = max(address & mask, vmf->vma->vm_start); off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); start_pgoff -= off; /* * end_pgoff is either the end of the page table, the end of * the vma or nr_pages from start_pgoff, depending what is nearest. */ end_pgoff = start_pgoff - ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); if (pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm, vmf->address); if (!vmf->prealloc_pte) goto out; smp_wmb(); /* See comment in __pte_alloc() */ } vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); /* Huge page is mapped? Page fault is solved */ if (pmd_trans_huge(*vmf->pmd)) { ret = VM_FAULT_NOPAGE; goto out; } /* ->map_pages() haven't done anything useful. Cold page cache? */ if (!vmf->pte) goto out; /* check if the page fault is solved */ vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); if (!pte_none(*vmf->pte)) ret = VM_FAULT_NOPAGE; pte_unmap_unlock(vmf->pte, vmf->ptl); out: vmf->address = address; vmf->pte = NULL; return ret; } static vm_fault_t do_read_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret = 0; /* * Let's call ->map_pages() first and use ->fault() as fallback * if page by the offset is not ready to be mapped (cold cache or * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { ret = do_fault_around(vmf); if (ret) return ret; } ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; ret |= finish_fault(vmf); unlock_page(vmf->page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) put_page(vmf->page); return ret; } static vm_fault_t do_cow_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!vmf->cow_page) return VM_FAULT_OOM; if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL, &vmf->memcg, false)) { put_page(vmf->cow_page); return VM_FAULT_OOM; } ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; if (ret & VM_FAULT_DONE_COW) return ret; copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); __SetPageUptodate(vmf->cow_page); ret |= finish_fault(vmf); unlock_page(vmf->page); put_page(vmf->page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; return ret; uncharge_out: mem_cgroup_cancel_charge(vmf->cow_page, vmf->memcg, false); put_page(vmf->cow_page); return ret; } static vm_fault_t do_shared_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret, tmp; ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; /* * Check if the backing address space wants to know that the page is * about to become writable */ if (vma->vm_ops->page_mkwrite) { unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(vmf->page); return tmp; } } ret |= finish_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) { unlock_page(vmf->page); put_page(vmf->page); return ret; } fault_dirty_shared_page(vma, vmf->page); return ret; } /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults). * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). * If mmap_sem is released, vma may become invalid (for example * by other thread calling munmap()). */ static vm_fault_t do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *vm_mm = vma->vm_mm; vm_fault_t ret; /* * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) { /* * If we find a migration pmd entry or a none pmd entry, which * should never happen, return SIGBUS */ if (unlikely(!pmd_present(*vmf->pmd))) ret = VM_FAULT_SIGBUS; else { vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); /* * Make sure this is not a temporary clearing of pte * by holding ptl and checking again. A R/M/W update * of pte involves: take ptl, clearing the pte so that * we don't have concurrent modification by hardware * followed by an update. */ if (unlikely(pte_none(*vmf->pte))) ret = VM_FAULT_SIGBUS; else ret = VM_FAULT_NOPAGE; pte_unmap_unlock(vmf->pte, vmf->ptl); } } else if (!(vmf->flags & FAULT_FLAG_WRITE)) ret = do_read_fault(vmf); else if (!(vma->vm_flags & VM_SHARED)) ret = do_cow_fault(vmf); else ret = do_shared_fault(vmf); /* preallocated pagetable is unused: free it */ if (vmf->prealloc_pte) { pte_free(vm_mm, vmf->prealloc_pte); vmf->prealloc_pte = NULL; } return ret; } static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags) { get_page(page); count_vm_numa_event(NUMA_HINT_FAULTS); if (page_nid == numa_node_id()) { count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); *flags |= TNF_FAULT_LOCAL; } return mpol_misplaced(page, vma, addr); } static vm_fault_t do_numa_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; int page_nid = -1; int last_cpupid; int target_nid; bool migrated = false; pte_t pte; bool was_writable = pte_savedwrite(vmf->orig_pte); int flags = 0; /* * The "pte" at this point cannot be used safely without * validation through pte_unmap_same(). It's of NUMA type but * the pfn may be screwed if the read is non atomic. */ vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd); spin_lock(vmf->ptl); if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); goto out; } /* * Make it present again, Depending on how arch implementes non * accessible ptes, some can allow access by kernel mode. */ pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte); pte = pte_modify(pte, vma->vm_page_prot); pte = pte_mkyoung(pte); if (was_writable) pte = pte_mkwrite(pte); ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte); update_mmu_cache(vma, vmf->address, vmf->pte); page = vm_normal_page(vma, vmf->address, pte); if (!page) { pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } /* TODO: handle PTE-mapped THP */ if (PageCompound(page)) { pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } /* * Avoid grouping on RO pages in general. RO pages shouldn't hurt as * much anyway since they can be in shared cache state. This misses * the case where a mapping is writable but the process never writes * to it but pte_write gets cleared during protection updates and * pte_dirty has unpredictable behaviour between PTE scan updates, * background writeback, dirty balancing and application behaviour. */ if (!pte_write(pte)) flags |= TNF_NO_GROUP; /* * Flag if the page is shared between multiple address spaces. This * is later used when determining whether to group tasks together */ if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) flags |= TNF_SHARED; last_cpupid = page_cpupid_last(page); page_nid = page_to_nid(page); target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, &flags); pte_unmap_unlock(vmf->pte, vmf->ptl); if (target_nid == -1) { put_page(page); goto out; } /* Migrate to the requested node */ migrated = migrate_misplaced_page(page, vma, target_nid); if (migrated) { page_nid = target_nid; flags |= TNF_MIGRATED; } else flags |= TNF_MIGRATE_FAIL; out: if (page_nid != -1) task_numa_fault(last_cpupid, page_nid, 1, flags); return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_anonymous_page(vmf); if (vmf->vma->vm_ops->huge_fault) return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); return VM_FAULT_FALLBACK; } /* `inline' is required to avoid gcc 4.1.2 build error */ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_wp_page(vmf, orig_pmd); if (vmf->vma->vm_ops->huge_fault) return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); /* COW handled on pte level: split pmd */ VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma); __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); return VM_FAULT_FALLBACK; } static inline bool vma_is_accessible(struct vm_area_struct *vma) { return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); } static vm_fault_t create_huge_pud(struct vm_fault *vmf) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) return VM_FAULT_FALLBACK; if (vmf->vma->vm_ops->huge_fault) return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ return VM_FAULT_FALLBACK; } static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) return VM_FAULT_FALLBACK; if (vmf->vma->vm_ops->huge_fault) return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ return VM_FAULT_FALLBACK; } /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * We enter with non-exclusive mmap_sem (to exclude vma changes, but allow * concurrent faults). * * The mmap_sem may have been released depending on flags and our return value. * See filemap_fault() and __lock_page_or_retry(). */ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table * for an instant, it will be difficult to retract from * concurrent faults and from rmap lookups. */ vmf->pte = NULL; } else { /* See comment in pte_alloc_one_map() */ if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge * pmd from under us anymore at this point because we hold the * mmap_sem read mode and khugepaged takes it in write mode. * So now it's safe to run pte_offset_map(). */ vmf->pte = pte_offset_map(vmf->pmd, vmf->address); vmf->orig_pte = *vmf->pte; /* * some architectures can have larger ptes than wordsize, * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic * accesses. The code below just needs a consistent view * for the ifs and we later double check anyway with the * ptl lock held. So here a barrier will do. */ barrier(); if (pte_none(vmf->orig_pte)) { pte_unmap(vmf->pte); vmf->pte = NULL; } } if (!vmf->pte) { if (vma_is_anonymous(vmf->vma)) return do_anonymous_page(vmf); else return do_fault(vmf); } if (!pte_present(vmf->orig_pte)) return do_swap_page(vmf); if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) return do_numa_page(vmf); vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); spin_lock(vmf->ptl); entry = vmf->orig_pte; if (unlikely(!pte_same(*vmf->pte, entry))) goto unlock; if (vmf->flags & FAULT_FLAG_WRITE) { if (!pte_write(entry)) return do_wp_page(vmf); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, vmf->flags & FAULT_FLAG_WRITE)) { update_mmu_cache(vmf->vma, vmf->address, vmf->pte); } else { /* * This is needed only for protection faults but the arch code * is not yet telling us if this is a protection fault or not. * This still avoids useless tlb flushes for .text page faults * with threads. */ if (vmf->flags & FAULT_FLAG_WRITE) flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); } unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } /* * By the time we get here, we already hold the mm semaphore * * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { struct vm_fault vmf = { .vma = vma, .address = address & PAGE_MASK, .flags = flags, .pgoff = linear_page_index(vma, address), .gfp_mask = __get_fault_gfp_mask(vma), }; unsigned int dirty = flags & FAULT_FLAG_WRITE; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; p4d_t *p4d; vm_fault_t ret; pgd = pgd_offset(mm, address); p4d = p4d_alloc(mm, pgd, address); if (!p4d) return VM_FAULT_OOM; vmf.pud = pud_alloc(mm, p4d, address); if (!vmf.pud) return VM_FAULT_OOM; if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { ret = create_huge_pud(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { pud_t orig_pud = *vmf.pud; barrier(); if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { /* NUMA case for anonymous PUDs would go here */ if (dirty && !pud_write(orig_pud)) { ret = wp_huge_pud(&vmf, orig_pud); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { huge_pud_set_accessed(&vmf, orig_pud); return 0; } } } vmf.pmd = pmd_alloc(mm, vmf.pud, address); if (!vmf.pmd) return VM_FAULT_OOM; if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { ret = create_huge_pmd(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { pmd_t orig_pmd = *vmf.pmd; barrier(); if (unlikely(is_swap_pmd(orig_pmd))) { VM_BUG_ON(thp_migration_supported() && !is_pmd_migration_entry(orig_pmd)); if (is_pmd_migration_entry(orig_pmd)) pmd_migration_entry_wait(mm, vmf.pmd); return 0; } if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf, orig_pmd); if (dirty && !pmd_write(orig_pmd)) { ret = wp_huge_pmd(&vmf, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { huge_pmd_set_accessed(&vmf, orig_pmd); return 0; } } } return handle_pte_fault(&vmf); } /* * By the time we get here, we already hold the mm semaphore * * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { vm_fault_t ret; __set_current_state(TASK_RUNNING); count_vm_event(PGFAULT); count_memcg_event_mm(vma->vm_mm, PGFAULT); /* do counter updates before entering really critical section. */ check_sync_rss_stat(current); if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, flags & FAULT_FLAG_REMOTE)) return VM_FAULT_SIGSEGV; /* * Enable the memcg OOM handling for faults triggered in user * space. Kernel faults are handled more gracefully. */ if (flags & FAULT_FLAG_USER) mem_cgroup_enter_user_fault(); if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else ret = __handle_mm_fault(vma, address, flags); if (flags & FAULT_FLAG_USER) { mem_cgroup_exit_user_fault(); /* * The task may have entered a memcg OOM situation but * if the allocation error was handled gracefully (no * VM_FAULT_OOM), there is no need to kill anything. * Just clean up the OOM state peacefully. */ if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) mem_cgroup_oom_synchronize(false); } return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); #ifndef __PAGETABLE_P4D_FOLDED /* * Allocate p4d page table. * We've already handled the fast-path in-line. */ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { p4d_t *new = p4d_alloc_one(mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&mm->page_table_lock); if (pgd_present(*pgd)) /* Another has populated it */ p4d_free(mm, new); else pgd_populate(mm, pgd, new); spin_unlock(&mm->page_table_lock); return 0; } #endif /* __PAGETABLE_P4D_FOLDED */ #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. * We've already handled the fast-path in-line. */ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { pud_t *new = pud_alloc_one(mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&mm->page_table_lock); #ifndef __ARCH_HAS_5LEVEL_HACK if (!p4d_present(*p4d)) { mm_inc_nr_puds(mm); p4d_populate(mm, p4d, new); } else /* Another has populated it */ pud_free(mm, new); #else if (!pgd_present(*p4d)) { mm_inc_nr_puds(mm); pgd_populate(mm, p4d, new); } else /* Another has populated it */ pud_free(mm, new); #endif /* __ARCH_HAS_5LEVEL_HACK */ spin_unlock(&mm->page_table_lock); return 0; } #endif /* __PAGETABLE_PUD_FOLDED */ #ifndef __PAGETABLE_PMD_FOLDED /* * Allocate page middle directory. * We've already handled the fast-path in-line. */ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { spinlock_t *ptl; pmd_t *new = pmd_alloc_one(mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ ptl = pud_lock(mm, pud); #ifndef __ARCH_HAS_4LEVEL_HACK if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); pud_populate(mm, pud, new); } else /* Another has populated it */ pmd_free(mm, new); #else if (!pgd_present(*pud)) { mm_inc_nr_pmds(mm); pgd_populate(mm, pud, new); } else /* Another has populated it */ pmd_free(mm, new); #endif /* __ARCH_HAS_4LEVEL_HACK */ spin_unlock(ptl); return 0; } #endif /* __PAGETABLE_PMD_FOLDED */ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *ptep; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) goto out; p4d = p4d_offset(pgd, address); if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d))) goto out; pud = pud_offset(p4d, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) goto out; pmd = pmd_offset(pud, address); VM_BUG_ON(pmd_trans_huge(*pmd)); if (pmd_huge(*pmd)) { if (!pmdpp) goto out; if (start && end) { *start = address & PMD_MASK; *end = *start + PMD_SIZE; mmu_notifier_invalidate_range_start(mm, *start, *end); } *ptlp = pmd_lock(mm, pmd); if (pmd_huge(*pmd)) { *pmdpp = pmd; return 0; } spin_unlock(*ptlp); if (start && end) mmu_notifier_invalidate_range_end(mm, *start, *end); } if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto out; if (start && end) { *start = address & PAGE_MASK; *end = *start + PAGE_SIZE; mmu_notifier_invalidate_range_start(mm, *start, *end); } ptep = pte_offset_map_lock(mm, pmd, address, ptlp); if (!pte_present(*ptep)) goto unlock; *ptepp = ptep; return 0; unlock: pte_unmap_unlock(ptep, *ptlp); if (start && end) mmu_notifier_invalidate_range_end(mm, *start, *end); out: return -EINVAL; } static inline int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp, spinlock_t **ptlp) { int res; /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, !(res = __follow_pte_pmd(mm, address, NULL, NULL, ptepp, NULL, ptlp))); return res; } int follow_pte_pmd(struct mm_struct *mm, unsigned long address, unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { int res; /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, !(res = __follow_pte_pmd(mm, address, start, end, ptepp, pmdpp, ptlp))); return res; } EXPORT_SYMBOL(follow_pte_pmd); /** * follow_pfn - look up PFN at a user virtual address * @vma: memory mapping * @address: user virtual address * @pfn: location to store found PFN * * Only IO mappings and raw PFN mappings are allowed. * * Returns zero and the pfn at @pfn on success, -ve otherwise. */ int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn) { int ret = -EINVAL; spinlock_t *ptl; pte_t *ptep; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) return ret; ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); if (ret) return ret; *pfn = pte_pfn(*ptep); pte_unmap_unlock(ptep, ptl); return 0; } EXPORT_SYMBOL(follow_pfn); #ifdef CONFIG_HAVE_IOREMAP_PROT int follow_phys(struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned long *prot, resource_size_t *phys) { int ret = -EINVAL; pte_t *ptep, pte; spinlock_t *ptl; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) goto out; if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) goto out; pte = *ptep; if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; *prot = pgprot_val(pte_pgprot(pte)); *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; ret = 0; unlock: pte_unmap_unlock(ptep, ptl); out: return ret; } int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { resource_size_t phys_addr; unsigned long prot = 0; void __iomem *maddr; int offset = addr & (PAGE_SIZE-1); if (follow_phys(vma, addr, write, &prot, &phys_addr)) return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); if (!maddr) return -ENOMEM; if (write) memcpy_toio(maddr + offset, buf, len); else memcpy_fromio(buf, maddr + offset, len); iounmap(maddr); return len; } EXPORT_SYMBOL_GPL(generic_access_phys); #endif /* * Access another process' address space as given in mm. If non-NULL, use the * given task for page fault accounting. */ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; void *old_buf = buf; int write = gup_flags & FOLL_WRITE; if (down_read_killable(&mm->mmap_sem)) return 0; /* ignore errors, just check how much was successfully transferred */ while (len) { int bytes, ret, offset; void *maddr; struct page *page = NULL; ret = get_user_pages_remote(tsk, mm, addr, 1, gup_flags, &page, &vma, NULL); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; #else /* * Check if this is a VM_IO | VM_PFNMAP VMA, which * we can access using slightly different code. */ vma = find_vma(mm, addr); if (!vma || vma->vm_start > addr) break; if (vma->vm_ops && vma->vm_ops->access) ret = vma->vm_ops->access(vma, addr, buf, len, write); if (ret <= 0) break; bytes = ret; #endif } else { bytes = len; offset = addr & (PAGE_SIZE-1); if (bytes > PAGE_SIZE-offset) bytes = PAGE_SIZE-offset; maddr = kmap(page); if (write) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); set_page_dirty_lock(page); } else { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); } kunmap(page); put_page(page); } len -= bytes; buf += bytes; addr += bytes; } up_read(&mm->mmap_sem); return buf - old_buf; } /** * access_remote_vm - access another process' address space * @mm: the mm_struct of the target address space * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* * Access another process' address space. * Source/target buffer must be kernel space, * Do not walk the page table directly, use get_user_pages */ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct mm_struct *mm; int ret; mm = get_task_mm(tsk); if (!mm) return 0; ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); mmput(mm); return ret; } EXPORT_SYMBOL_GPL(access_process_vm); /* * Print the name of a VMA. */ void print_vma_addr(char *prefix, unsigned long ip) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; /* * we might be running from an atomic context so we cannot sleep */ if (!down_read_trylock(&mm->mmap_sem)) return; vma = find_vma(mm, ip); if (vma && vma->vm_file) { struct file *f = vma->vm_file; char *buf = (char *)__get_free_page(GFP_NOWAIT); if (buf) { char *p; p = file_path(f, buf, PAGE_SIZE); if (IS_ERR(p)) p = "?"; printk("%s%s[%lx+%lx]", prefix, kbasename(p), vma->vm_start, vma->vm_end - vma->vm_start); free_page((unsigned long)buf); } } up_read(&mm->mmap_sem); } #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP) void __might_fault(const char *file, int line) { /* * Some code (nfs/sunrpc) uses socket ops on kernel memory while * holding the mmap_sem, this is safe because kernel memory doesn't * get paged out, therefore we'll never actually fault, and the * below annotations will generate false positives. */ if (uaccess_kernel()) return; if (pagefault_disabled()) return; __might_sleep(file, line, 0); #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) if (current->mm) might_lock_read(&current->mm->mmap_sem); #endif } EXPORT_SYMBOL(__might_fault); #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) /* * Process all subpages of the specified huge page with the specified * operation. The target subpage will be processed last to keep its * cache lines hot. */ static inline void process_huge_page( unsigned long addr_hint, unsigned int pages_per_huge_page, void (*process_subpage)(unsigned long addr, int idx, void *arg), void *arg) { int i, n, base, l; unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); /* Process target subpage last to keep its cache lines hot */ might_sleep(); n = (addr_hint - addr) / PAGE_SIZE; if (2 * n <= pages_per_huge_page) { /* If target subpage in first half of huge page */ base = 0; l = n; /* Process subpages at the end of huge page */ for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { cond_resched(); process_subpage(addr + i * PAGE_SIZE, i, arg); } } else { /* If target subpage in second half of huge page */ base = pages_per_huge_page - 2 * (pages_per_huge_page - n); l = pages_per_huge_page - n; /* Process subpages at the begin of huge page */ for (i = 0; i < base; i++) { cond_resched(); process_subpage(addr + i * PAGE_SIZE, i, arg); } } /* * Process remaining subpages in left-right-left-right pattern * towards the target subpage */ for (i = 0; i < l; i++) { int left_idx = base + i; int right_idx = base + 2 * l - 1 - i; cond_resched(); process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg); cond_resched(); process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg); } } static void clear_gigantic_page(struct page *page, unsigned long addr, unsigned int pages_per_huge_page) { int i; struct page *p = page; might_sleep(); for (i = 0; i < pages_per_huge_page; i++, p = mem_map_next(p, page, i)) { cond_resched(); clear_user_highpage(p, addr + i * PAGE_SIZE); } } static void clear_subpage(unsigned long addr, int idx, void *arg) { struct page *page = arg; clear_user_highpage(page + idx, addr); } void clear_huge_page(struct page *page, unsigned long addr_hint, unsigned int pages_per_huge_page) { unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { clear_gigantic_page(page, addr, pages_per_huge_page); return; } process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page); } static void copy_user_gigantic_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma, unsigned int pages_per_huge_page) { int i; struct page *dst_base = dst; struct page *src_base = src; for (i = 0; i < pages_per_huge_page; ) { cond_resched(); copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); i++; dst = mem_map_next(dst, dst_base, i); src = mem_map_next(src, src_base, i); } } struct copy_subpage_arg { struct page *dst; struct page *src; struct vm_area_struct *vma; }; static void copy_subpage(unsigned long addr, int idx, void *arg) { struct copy_subpage_arg *copy_arg = arg; copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, addr, copy_arg->vma); } void copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr_hint, struct vm_area_struct *vma, unsigned int pages_per_huge_page) { unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); struct copy_subpage_arg arg = { .dst = dst, .src = src, .vma = vma, }; if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { copy_user_gigantic_page(dst, src, addr, vma, pages_per_huge_page); return; } process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); } long copy_huge_page_from_user(struct page *dst_page, const void __user *usr_src, unsigned int pages_per_huge_page, bool allow_pagefault) { void *src = (void *)usr_src; void *page_kaddr; unsigned long i, rc = 0; unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; struct page *subpage = dst_page; for (i = 0; i < pages_per_huge_page; i++, subpage = mem_map_next(subpage, dst_page, i)) { if (allow_pagefault) page_kaddr = kmap(subpage); else page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, (const void __user *)(src + i * PAGE_SIZE), PAGE_SIZE); if (allow_pagefault) kunmap(subpage); else kunmap_atomic(page_kaddr); ret_val -= (PAGE_SIZE - rc); if (rc) break; flush_dcache_page(subpage); cond_resched(); } return ret_val; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS static struct kmem_cache *page_ptl_cachep; void __init ptlock_cache_init(void) { page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, SLAB_PANIC, NULL); } bool ptlock_alloc(struct page *page) { spinlock_t *ptl; ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL); if (!ptl) return false; page->ptl = ptl; return true; } void ptlock_free(struct page *page) { kmem_cache_free(page_ptl_cachep, page->ptl); } #endif
83 82 83 83 34 49 49 49 49 194 194 194 193 194 14 190 190 190 194 131 193 274 35 237 273 7 7 7 7 7 7 7 227 228 228 229 229 229 229 221 229 5 216 214 5 229 229 196 197 196 194 193 1 4 5 1 1 2 83 22 82 83 83 82 82 82 82 82 12 11 11 1 1 13 1 11 11 12 7 6 4 4 7 201 26 215 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 /* * linux/fs/9p/trans_fd.c * * Fd transport layer. Includes deprecated socket layer. * * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to: * Free Software Foundation * 51 Franklin Street, Fifth Floor * Boston, MA 02111-1301 USA * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> #include <linux/ipv6.h> #include <linux/kthread.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/un.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/syscalls.h> /* killme */ #define P9_PORT 564 #define MAX_SOCK_BUF (64*1024) #define MAXPOLLWADDR 2 static struct p9_trans_module p9_tcp_trans; static struct p9_trans_module p9_fd_trans; /** * struct p9_fd_opts - per-transport options * @rfd: file descriptor for reading (trans=fd) * @wfd: file descriptor for writing (trans=fd) * @port: port to connect to (trans=tcp) * */ struct p9_fd_opts { int rfd; int wfd; u16 port; bool privport; }; /* * Option Parsing (code inspired by NFS code) * - a little lazy - parse all fd-transport options */ enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, /* Options that take no arguments */ Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, {Opt_privport, "privport"}, {Opt_err, NULL}, }; enum { Rworksched = 1, /* read work scheduled or running */ Rpending = 2, /* can read */ Wworksched = 4, /* write work scheduled or running */ Wpending = 8, /* can write */ }; struct p9_poll_wait { struct p9_conn *conn; wait_queue_entry_t wait; wait_queue_head_t *wait_addr; }; /** * struct p9_conn - fd mux connection state information * @mux_list: list link for mux to manage multiple connections (?) * @client: reference to client instance for this connection * @err: error state * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @req: current request being processed (if any) * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame * @wpos: write position for current frame * @wsize: amount of data to write for current frame * @wbuf: current write buffer * @poll_pending_link: pending links to be polled per conn * @poll_wait: array of wait_q's for various worker threads * @pt: poll state * @rq: current read work * @wq: current write work * @wsched: ???? * */ struct p9_conn { struct list_head mux_list; struct p9_client *client; int err; struct list_head req_list; struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; char tmp_buf[7]; struct p9_fcall rc; int wpos; int wsize; char *wbuf; struct list_head poll_pending_link; struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; /** * struct p9_trans_fd - transport state * @rd: reference to file to read from * @wr: reference of file to write to * @conn: connection state reference * */ struct p9_trans_fd { struct file *rd; struct file *wr; struct p9_conn conn; }; static void p9_poll_workfn(struct work_struct *work); static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { struct p9_poll_wait *pwait = &m->poll_wait[i]; if (pwait->wait_addr) { remove_wait_queue(pwait->wait_addr, &pwait->wait); pwait->wait_addr = NULL; } } spin_lock_irqsave(&p9_poll_lock, flags); list_del_init(&m->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); flush_work(&p9_poll_work); } /** * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code * */ static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock(&m->client->lock); if (m->err) { spin_unlock(&m->client->lock); return; } m->err = err; list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); } list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); if (!req->t_err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } spin_unlock(&m->client->lock); } static __poll_t p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) { __poll_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status == Connected) ts = client->trans; if (!ts) { if (err) *err = -EREMOTEIO; return EPOLLERR; } ret = vfs_poll(ts->rd, pt); if (ts->rd != ts->wr) ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN); return ret; } /** * p9_fd_read- read from a fd * @client: client instance * @v: buffer to receive data into * @len: size of receive buffer * */ static int p9_fd_read(struct p9_client *client, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; loff_t pos; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); pos = ts->rd->f_pos; ret = kernel_read(ts->rd, v, len, &pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_read_work - called when there is some data to be read from a transport * @work: container of work to be done * */ static void p9_read_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; m = container_of(work, struct p9_conn, rq); if (m->err < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; m->rc.capacity = 7; /* start by reading header */ } clear_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n", m, m->rc.offset, m->rc.capacity, m->rc.capacity - m->rc.offset); err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset, m->rc.capacity - m->rc.offset); p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err <= 0) goto error; m->rc.offset += err; /* header read in */ if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ m->rc.size = 7; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, "error parsing header: %d\n", err); goto error; } if (m->rc.size >= m->client->msize) { p9_debug(P9_DEBUG_ERROR, "requested packet size too big: %d\n", m->rc.size); err = -EIO; goto error; } p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); m->rreq = p9_tag_lookup(m->client, m->rc.tag); if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", m->rc.tag); err = -EIO; goto error; } if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->rreq); m->rreq = NULL; err = -EIO; goto error; } m->rc.sdata = m->rreq->rc.sdata; memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); m->rc.capacity = m->rc.size; } /* packet is read in * not an else because some packets (like clunk) have no payload */ if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); m->rreq->rc.size = m->rc.offset; spin_lock(&m->client->lock); if (m->rreq->status == REQ_STATUS_SENT) { list_del(&m->rreq->req_list); p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); } else if (m->rreq->status == REQ_STATUS_FLSHD) { /* Ignore replies associated with a cancelled request. */ p9_debug(P9_DEBUG_TRANS, "Ignore replies associated with a cancelled request\n"); } else { spin_unlock(&m->client->lock); p9_debug(P9_DEBUG_ERROR, "Request tag %d errored out while we were reading the reply\n", m->rc.tag); err = -EIO; goto error; } spin_unlock(&m->client->lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; p9_req_put(m->rreq); m->rreq = NULL; } end_clear: clear_bit(Rworksched, &m->wsched); if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = EPOLLIN; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } return; error: p9_conn_cancel(m, err); clear_bit(Rworksched, &m->wsched); } /** * p9_fd_write - write to a socket * @client: client instance * @v: buffer to send data from * @len: size of send buffer * */ static int p9_fd_write(struct p9_client *client, void *v, int len) { ssize_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_write_work - called when a transport can send some data * @work: container for work to be done * */ static void p9_write_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); if (m->err < 0) { clear_bit(Wworksched, &m->wsched); return; } if (!m->wsize) { spin_lock(&m->client->lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); spin_unlock(&m->client->lock); return; } req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc.sdata; m->wsize = req->tc.size; m->wpos = 0; p9_req_get(req); m->wreq = req; spin_unlock(&m->client->lock); } p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err < 0) goto error; else if (err == 0) { err = -EREMOTEIO; goto error; } m->wpos += err; if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; p9_req_put(m->wreq); m->wreq = NULL; } end_clear: clear_bit(Wworksched, &m->wsched); if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLOUT) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } return; error: p9_conn_cancel(m, err); clear_bit(Wworksched, &m->wsched); } static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); schedule_work(&p9_poll_work); return 1; } /** * p9_pollwait - add poll task to the wait queue * @filp: file pointer being polled * @wait_address: wait_q to block on * @p: poll state * * called by files poll operation to add v9fs-poll task to files wait queue */ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { struct p9_conn *m = container_of(p, struct p9_conn, pt); struct p9_poll_wait *pwait = NULL; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { if (m->poll_wait[i].wait_addr == NULL) { pwait = &m->poll_wait[i]; break; } } if (!pwait) { p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } pwait->conn = m; pwait->wait_addr = wait_address; init_waitqueue_func_entry(&pwait->wait, p9_pollwake); add_wait_queue(wait_address, &pwait->wait); } /** * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ static void p9_conn_create(struct p9_client *client) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); INIT_LIST_HEAD(&m->mux_list); m->client = client; INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); INIT_LIST_HEAD(&m->poll_pending_link); init_poll_funcptr(&m->pt, p9_pollwait); n = p9_fd_poll(client, &m->pt, NULL); if (n & EPOLLIN) { p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & EPOLLOUT) { p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } } /** * p9_poll_mux - polls a mux and schedules read or write works if necessary * @m: connection to poll * */ static void p9_poll_mux(struct p9_conn *m) { __poll_t n; int err = -ECONNRESET; if (m->err < 0) return; n = p9_fd_poll(m->client, NULL, &err); if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) { p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); p9_conn_cancel(m, err); } if (n & EPOLLIN) { set_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } if (n & EPOLLOUT) { set_bit(Wpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } } /** * p9_fd_request - send 9P request * The function can sleep until the request is scheduled for sending. * The function can be interrupted. Return from the function is not * a guarantee that the request is sent successfully. * * @client: client instance * @req: request to be sent * */ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, &req->tc, req->tc.id); if (m->err < 0) return m->err; spin_lock(&client->lock); req->status = REQ_STATUS_UNSENT; list_add_tail(&req->req_list, &m->unsent_req_list); spin_unlock(&client->lock); if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) schedule_work(&m->wq); return 0; } static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { int ret = 1; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; p9_req_put(req); ret = 0; } spin_unlock(&client->lock); return ret; } static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); /* Ignore cancelled request if message has been received * before lock. */ if (req->status == REQ_STATUS_RCVD) { spin_unlock(&client->lock); return 0; } /* we haven't received a response for oldreq, * remove it from the list. */ list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; spin_unlock(&client->lock); p9_req_put(req); return 0; } static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->trans_mod == &p9_tcp_trans) { if (clnt->trans_opts.tcp.port != P9_PORT) seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port); } else if (clnt->trans_mod == &p9_fd_trans) { if (clnt->trans_opts.fd.rfd != ~0) seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd); if (clnt->trans_opts.fd.wfd != ~0) seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd); } return 0; } /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount * @opts: fd transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; char *options, *tmp_options; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; opts->privport = false; if (!params) return 0; tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { p9_debug(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; int r; if (!*p) continue; token = match_token(p, tokens, args); if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; } } switch (token) { case Opt_port: opts->port = option; break; case Opt_rfdno: opts->rfd = option; break; case Opt_wfdno: opts->wfd = option; break; case Opt_privport: opts->privport = true; break; default: continue; } } kfree(tmp_options); return 0; } static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; ts->rd = fget(rfd); if (!ts->rd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; client->trans = ts; client->status = Connected; return 0; out_put_wr: fput(ts->wr); out_put_rd: fput(ts->rd); out_free_ts: kfree(ts); return -EIO; } static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); kfree(p); return PTR_ERR(file); } get_file(file); p->wr = p->rd = file; client->trans = p; client->status = Connected; p->rd->f_flags |= O_NONBLOCK; p9_conn_create(client); return 0; } /** * p9_mux_destroy - cancels all pending requests of mux * @m: mux to destroy * */ static void p9_conn_destroy(struct p9_conn *m) { p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); if (m->rreq) { p9_req_put(m->rreq); m->rreq = NULL; } cancel_work_sync(&m->wq); if (m->wreq) { p9_req_put(m->wreq); m->wreq = NULL; } p9_conn_cancel(m, -ECONNRESET); m->client = NULL; } /** * p9_fd_close - shutdown file descriptor transport * @client: client instance * */ static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; if (!client) return; ts = client->trans; if (!ts) return; client->status = Disconnected; p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); kfree(ts); } /* * stolen from NFS - maybe should be made a generic function? */ static inline int valid_ipaddr4(const char *buf) { int rc, count, in[4]; rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); if (rc != 4) return -EINVAL; for (count = 0; count < 4; count++) { if (in[count] > 255) return -EINVAL; } return 0; } static int p9_bind_privport(struct socket *sock) { struct sockaddr_in cl; int port, err = -EINVAL; memset(&cl, 0, sizeof(cl)); cl.sin_family = AF_INET; cl.sin_addr.s_addr = INADDR_ANY; for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { cl.sin_port = htons((ushort)port); err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); if (err != -EADDRINUSE) break; } return err; } static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; if (addr == NULL || valid_ipaddr4(addr) < 0) return -EINVAL; csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); err = __sock_create(current->nsproxy->net_ns, PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } if (opts.privport) { err = p9_bind_privport(csocket); if (err < 0) { pr_err("%s (%d): problem binding to privport\n", __func__, task_pid_nr(current)); sock_release(csocket); return err; } } err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; csocket = NULL; if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { pr_err("%s (%d): problem connecting socket: %s: %d\n", __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; struct p9_fd_opts opts; parse_opts(args, &opts); client->trans_opts.fd.rfd = opts.rfd; client->trans_opts.fd.wfd = opts.wfd; if (opts.rfd == ~0 || opts.wfd == ~0) { pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) return err; p9_conn_create(client); return 0; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create_unix, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; /** * p9_poll_workfn - poll worker thread * @work: work queue * * polls all v9fs transports for new events and queues the appropriate * work to the work queue * */ static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; p9_debug(P9_DEBUG_TRANS, "start %p\n", current); spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, struct p9_conn, poll_pending_link); list_del_init(&conn->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); p9_poll_mux(conn); spin_lock_irqsave(&p9_poll_lock, flags); } spin_unlock_irqrestore(&p9_poll_lock, flags); p9_debug(P9_DEBUG_TRANS, "finish\n"); } int p9_trans_fd_init(void) { v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); return 0; } void p9_trans_fd_exit(void) { flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); }
23 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 /* Signature verification with an asymmetric key * * See Documentation/crypto/asymmetric-keys.txt * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #define pr_fmt(fmt) "SIG: "fmt #include <keys/asymmetric-subtype.h> #include <linux/export.h> #include <linux/err.h> #include <linux/slab.h> #include <crypto/public_key.h> #include "asymmetric_keys.h" /* * Destroy a public key signature. */ void public_key_signature_free(struct public_key_signature *sig) { int i; if (sig) { for (i = 0; i < ARRAY_SIZE(sig->auth_ids); i++) kfree(sig->auth_ids[i]); kfree(sig->s); kfree(sig->digest); kfree(sig); } } EXPORT_SYMBOL_GPL(public_key_signature_free); /** * verify_signature - Initiate the use of an asymmetric key to verify a signature * @key: The asymmetric key to verify against * @sig: The signature to check * * Returns 0 if successful or else an error. */ int verify_signature(const struct key *key, const struct public_key_signature *sig) { const struct asymmetric_key_subtype *subtype; int ret; pr_devel("==>%s()\n", __func__); if (key->type != &key_type_asymmetric) return -EINVAL; subtype = asymmetric_key_subtype(key); if (!subtype || !key->payload.data[0]) return -EINVAL; if (!subtype->verify_signature) return -ENOTSUPP; ret = subtype->verify_signature(key, sig); pr_devel("<==%s() = %d\n", __func__, ret); return ret; } EXPORT_SYMBOL_GPL(verify_signature);
4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 /* * X.25 Packet Layer release 002 * * This is ALPHA test software. This code may break your machine, * randomly fail to work with new releases, misbehave and/or generally * screw up. It might even work. * * This code REQUIRES 2.1.15 or higher * * This module: * This module is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * History * X.25 001 Jonathan Naylor Started coding. * X.25 002 Jonathan Naylor Centralised disconnection processing. * mar/20/00 Daniela Squassoni Disabling/enabling of facilities * negotiation. * jun/24/01 Arnaldo C. Melo use skb_queue_purge, cleanups * apr/04/15 Shaun Pereira Fast select with no * restriction on response. */ #define pr_fmt(fmt) "X25: " fmt #include <linux/slab.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/x25.h> /* * This routine purges all of the queues of frames. */ void x25_clear_queues(struct sock *sk) { struct x25_sock *x25 = x25_sk(sk); skb_queue_purge(&sk->sk_write_queue); skb_queue_purge(&x25->ack_queue); skb_queue_purge(&x25->interrupt_in_queue); skb_queue_purge(&x25->interrupt_out_queue); skb_queue_purge(&x25->fragment_queue); } /* * This routine purges the input queue of those frames that have been * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the * SDL diagram. */ void x25_frames_acked(struct sock *sk, unsigned short nr) { struct sk_buff *skb; struct x25_sock *x25 = x25_sk(sk); int modulus = x25->neighbour->extended ? X25_EMODULUS : X25_SMODULUS; /* * Remove all the ack-ed frames from the ack queue. */ if (x25->va != nr) while (skb_peek(&x25->ack_queue) && x25->va != nr) { skb = skb_dequeue(&x25->ack_queue); kfree_skb(skb); x25->va = (x25->va + 1) % modulus; } } void x25_requeue_frames(struct sock *sk) { struct sk_buff *skb, *skb_prev = NULL; /* * Requeue all the un-ack-ed frames on the output queue to be picked * up by x25_kick. This arrangement handles the possibility of an empty * output queue. */ while ((skb = skb_dequeue(&x25_sk(sk)->ack_queue)) != NULL) { if (!skb_prev) skb_queue_head(&sk->sk_write_queue, skb); else skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } /* * Validate that the value of nr is between va and vs. Return true or * false for testing. */ int x25_validate_nr(struct sock *sk, unsigned short nr) { struct x25_sock *x25 = x25_sk(sk); unsigned short vc = x25->va; int modulus = x25->neighbour->extended ? X25_EMODULUS : X25_SMODULUS; while (vc != x25->vs) { if (nr == vc) return 1; vc = (vc + 1) % modulus; } return nr == x25->vs ? 1 : 0; } /* * This routine is called when the packet layer internally generates a * control frame. */ void x25_write_internal(struct sock *sk, int frametype) { struct x25_sock *x25 = x25_sk(sk); struct sk_buff *skb; unsigned char *dptr; unsigned char facilities[X25_MAX_FAC_LEN]; unsigned char addresses[1 + X25_ADDR_LEN]; unsigned char lci1, lci2; /* * Default safe frame size. */ int len = X25_MAX_L2_LEN + X25_EXT_MIN_LEN; /* * Adjust frame size. */ switch (frametype) { case X25_CALL_REQUEST: len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; break; case X25_CALL_ACCEPTED: /* fast sel with no restr on resp */ if (x25->facilities.reverse & 0x80) { len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; } else { len += 1 + X25_MAX_FAC_LEN; } break; case X25_CLEAR_REQUEST: case X25_RESET_REQUEST: len += 2; break; case X25_RR: case X25_RNR: case X25_REJ: case X25_CLEAR_CONFIRMATION: case X25_INTERRUPT_CONFIRMATION: case X25_RESET_CONFIRMATION: break; default: pr_err("invalid frame type %02X\n", frametype); return; } if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) return; /* * Space for Ethernet and 802.2 LLC headers. */ skb_reserve(skb, X25_MAX_L2_LEN); /* * Make space for the GFI and LCI, and fill them in. */ dptr = skb_put(skb, 2); lci1 = (x25->lci >> 8) & 0x0F; lci2 = (x25->lci >> 0) & 0xFF; if (x25->neighbour->extended) { *dptr++ = lci1 | X25_GFI_EXTSEQ; *dptr++ = lci2; } else { *dptr++ = lci1 | X25_GFI_STDSEQ; *dptr++ = lci2; } /* * Now fill in the frame type specific information. */ switch (frametype) { case X25_CALL_REQUEST: dptr = skb_put(skb, 1); *dptr++ = X25_CALL_REQUEST; len = x25_addr_aton(addresses, &x25->dest_addr, &x25->source_addr); skb_put_data(skb, addresses, len); len = x25_create_facilities(facilities, &x25->facilities, &x25->dte_facilities, x25->neighbour->global_facil_mask); skb_put_data(skb, facilities, len); skb_put_data(skb, x25->calluserdata.cuddata, x25->calluserdata.cudlength); x25->calluserdata.cudlength = 0; break; case X25_CALL_ACCEPTED: dptr = skb_put(skb, 2); *dptr++ = X25_CALL_ACCEPTED; *dptr++ = 0x00; /* Address lengths */ len = x25_create_facilities(facilities, &x25->facilities, &x25->dte_facilities, x25->vc_facil_mask); skb_put_data(skb, facilities, len); /* fast select with no restriction on response allows call user data. Userland must ensure it is ours and not theirs */ if(x25->facilities.reverse & 0x80) { skb_put_data(skb, x25->calluserdata.cuddata, x25->calluserdata.cudlength); } x25->calluserdata.cudlength = 0; break; case X25_CLEAR_REQUEST: dptr = skb_put(skb, 3); *dptr++ = frametype; *dptr++ = x25->causediag.cause; *dptr++ = x25->causediag.diagnostic; break; case X25_RESET_REQUEST: dptr = skb_put(skb, 3); *dptr++ = frametype; *dptr++ = 0x00; /* XXX */ *dptr++ = 0x00; /* XXX */ break; case X25_RR: case X25_RNR: case X25_REJ: if (x25->neighbour->extended) { dptr = skb_put(skb, 2); *dptr++ = frametype; *dptr++ = (x25->vr << 1) & 0xFE; } else { dptr = skb_put(skb, 1); *dptr = frametype; *dptr++ |= (x25->vr << 5) & 0xE0; } break; case X25_CLEAR_CONFIRMATION: case X25_INTERRUPT_CONFIRMATION: case X25_RESET_CONFIRMATION: dptr = skb_put(skb, 1); *dptr = frametype; break; } x25_transmit_link(skb, x25->neighbour); } /* * Unpick the contents of the passed X.25 Packet Layer frame. */ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, int *d, int *m) { struct x25_sock *x25 = x25_sk(sk); unsigned char *frame; if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) return X25_ILLEGAL; frame = skb->data; *ns = *nr = *q = *d = *m = 0; switch (frame[2]) { case X25_CALL_REQUEST: case X25_CALL_ACCEPTED: case X25_CLEAR_REQUEST: case X25_CLEAR_CONFIRMATION: case X25_INTERRUPT: case X25_INTERRUPT_CONFIRMATION: case X25_RESET_REQUEST: case X25_RESET_CONFIRMATION: case X25_RESTART_REQUEST: case X25_RESTART_CONFIRMATION: case X25_REGISTRATION_REQUEST: case X25_REGISTRATION_CONFIRMATION: case X25_DIAGNOSTIC: return frame[2]; } if (x25->neighbour->extended) { if (frame[2] == X25_RR || frame[2] == X25_RNR || frame[2] == X25_REJ) { if (!pskb_may_pull(skb, X25_EXT_MIN_LEN)) return X25_ILLEGAL; frame = skb->data; *nr = (frame[3] >> 1) & 0x7F; return frame[2]; } } else { if ((frame[2] & 0x1F) == X25_RR || (frame[2] & 0x1F) == X25_RNR || (frame[2] & 0x1F) == X25_REJ) { *nr = (frame[2] >> 5) & 0x07; return frame[2] & 0x1F; } } if (x25->neighbour->extended) { if ((frame[2] & 0x01) == X25_DATA) { if (!pskb_may_pull(skb, X25_EXT_MIN_LEN)) return X25_ILLEGAL; frame = skb->data; *q = (frame[0] & X25_Q_BIT) == X25_Q_BIT; *d = (frame[0] & X25_D_BIT) == X25_D_BIT; *m = (frame[3] & X25_EXT_M_BIT) == X25_EXT_M_BIT; *nr = (frame[3] >> 1) & 0x7F; *ns = (frame[2] >> 1) & 0x7F; return X25_DATA; } } else { if ((frame[2] & 0x01) == X25_DATA) { *q = (frame[0] & X25_Q_BIT) == X25_Q_BIT; *d = (frame[0] & X25_D_BIT) == X25_D_BIT; *m = (frame[2] & X25_STD_M_BIT) == X25_STD_M_BIT; *nr = (frame[2] >> 5) & 0x07; *ns = (frame[2] >> 1) & 0x07; return X25_DATA; } } pr_debug("invalid PLP frame %3ph\n", frame); return X25_ILLEGAL; } void x25_disconnect(struct sock *sk, int reason, unsigned char cause, unsigned char diagnostic) { struct x25_sock *x25 = x25_sk(sk); x25_clear_queues(sk); x25_stop_timer(sk); x25->lci = 0; x25->state = X25_STATE_0; x25->causediag.cause = cause; x25->causediag.diagnostic = diagnostic; sk->sk_state = TCP_CLOSE; sk->sk_err = reason; sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } if (x25->neighbour) { read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); x25->neighbour = NULL; read_unlock_bh(&x25_list_lock); } } /* * Clear an own-rx-busy condition and tell the peer about this, provided * that there is a significant amount of free receive buffer space available. */ void x25_check_rbuf(struct sock *sk) { struct x25_sock *x25 = x25_sk(sk); if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf >> 1) && (x25->condition & X25_COND_OWN_RX_BUSY)) { x25->condition &= ~X25_COND_OWN_RX_BUSY; x25->condition &= ~X25_COND_ACK_PENDING; x25->vl = x25->vr; x25_write_internal(sk, X25_RR); x25_stop_timer(sk); } }
5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 // SPDX-License-Identifier: GPL-2.0 /* net/atm/pvc.c - ATM PVC sockets */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ #include <linux/net.h> /* struct socket, struct proto_ops */ #include <linux/atm.h> /* ATM stuff */ #include <linux/atmdev.h> /* ATM devices */ #include <linux/errno.h> /* error codes */ #include <linux/kernel.h> /* printk */ #include <linux/init.h> #include <linux/skbuff.h> #include <linux/bitops.h> #include <linux/export.h> #include <net/sock.h> /* for sock_no_* */ #include "resources.h" /* devs and vccs */ #include "common.h" /* common for PVCs and SVCs */ static int pvc_shutdown(struct socket *sock, int how) { return 0; } static int pvc_bind(struct socket *sock, struct sockaddr *sockaddr, int sockaddr_len) { struct sock *sk = sock->sk; struct sockaddr_atmpvc *addr; struct atm_vcc *vcc; int error; if (sockaddr_len != sizeof(struct sockaddr_atmpvc)) return -EINVAL; addr = (struct sockaddr_atmpvc *)sockaddr; if (addr->sap_family != AF_ATMPVC) return -EAFNOSUPPORT; lock_sock(sk); vcc = ATM_SD(sock); if (!test_bit(ATM_VF_HASQOS, &vcc->flags)) { error = -EBADFD; goto out; } if (test_bit(ATM_VF_PARTIAL, &vcc->flags)) { if (vcc->vpi != ATM_VPI_UNSPEC) addr->sap_addr.vpi = vcc->vpi; if (vcc->vci != ATM_VCI_UNSPEC) addr->sap_addr.vci = vcc->vci; } error = vcc_connect(sock, addr->sap_addr.itf, addr->sap_addr.vpi, addr->sap_addr.vci); out: release_sock(sk); return error; } static int pvc_connect(struct socket *sock, struct sockaddr *sockaddr, int sockaddr_len, int flags) { return pvc_bind(sock, sockaddr, sockaddr_len); } static int pvc_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; int error; lock_sock(sk); error = vcc_setsockopt(sock, level, optname, optval, optlen); release_sock(sk); return error; } static int pvc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; int error; lock_sock(sk); error = vcc_getsockopt(sock, level, optname, optval, optlen); release_sock(sk); return error; } static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, int peer) { struct sockaddr_atmpvc *addr; struct atm_vcc *vcc = ATM_SD(sock); if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; addr = (struct sockaddr_atmpvc *)sockaddr; memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; addr->sap_addr.vci = vcc->vci; return sizeof(struct sockaddr_atmpvc); } static const struct proto_ops pvc_proto_ops = { .family = PF_ATMPVC, .owner = THIS_MODULE, .release = vcc_release, .bind = pvc_bind, .connect = pvc_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pvc_getname, .poll = vcc_poll, .ioctl = vcc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vcc_compat_ioctl, #endif .listen = sock_no_listen, .shutdown = pvc_shutdown, .setsockopt = pvc_setsockopt, .getsockopt = pvc_getsockopt, .sendmsg = vcc_sendmsg, .recvmsg = vcc_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; static int pvc_create(struct net *net, struct socket *sock, int protocol, int kern) { if (net != &init_net) return -EAFNOSUPPORT; sock->ops = &pvc_proto_ops; return vcc_create(net, sock, protocol, PF_ATMPVC, kern); } static const struct net_proto_family pvc_family_ops = { .family = PF_ATMPVC, .create = pvc_create, .owner = THIS_MODULE, }; /* * Initialize the ATM PVC protocol family */ int __init atmpvc_init(void) { return sock_register(&pvc_family_ops); } void atmpvc_exit(void) { sock_unregister(PF_ATMPVC); }
185 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_UDF_SB_H #define __LINUX_UDF_SB_H #include <linux/mutex.h> #include <linux/bitops.h> #include <linux/magic.h> #define UDF_MAX_READ_VERSION 0x0250 #define UDF_MAX_WRITE_VERSION 0x0201 #define UDF_FLAG_USE_EXTENDED_FE 0 #define UDF_VERS_USE_EXTENDED_FE 0x0200 #define UDF_FLAG_USE_STREAMS 1 #define UDF_VERS_USE_STREAMS 0x0200 #define UDF_FLAG_USE_SHORT_AD 2 #define UDF_FLAG_USE_AD_IN_ICB 3 #define UDF_FLAG_USE_FILE_CTIME_EA 4 #define UDF_FLAG_STRICT 5 #define UDF_FLAG_UNDELETE 6 #define UDF_FLAG_UNHIDE 7 #define UDF_FLAG_VARCONV 8 #define UDF_FLAG_NLS_MAP 9 #define UDF_FLAG_UTF8 10 #define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ #define UDF_FLAG_GID_FORGET 12 #define UDF_FLAG_UID_SET 13 #define UDF_FLAG_GID_SET 14 #define UDF_FLAG_SESSION_SET 15 #define UDF_FLAG_LASTBLOCK_SET 16 #define UDF_FLAG_BLOCKSIZE_SET 17 #define UDF_FLAG_INCONSISTENT 18 #define UDF_FLAG_RW_INCOMPAT 19 /* Set when we find RW incompatible * feature */ #define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 #define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 #define UDF_PART_FLAG_FREED_BITMAP 0x0004 #define UDF_PART_FLAG_FREED_TABLE 0x0008 #define UDF_PART_FLAG_READ_ONLY 0x0010 #define UDF_PART_FLAG_WRITE_ONCE 0x0020 #define UDF_PART_FLAG_REWRITABLE 0x0040 #define UDF_PART_FLAG_OVERWRITABLE 0x0080 #define UDF_MAX_BLOCK_LOADED 8 #define UDF_TYPE1_MAP15 0x1511U #define UDF_VIRTUAL_MAP15 0x1512U #define UDF_VIRTUAL_MAP20 0x2012U #define UDF_SPARABLE_MAP15 0x1522U #define UDF_METADATA_MAP25 0x2511U #define UDF_INVALID_MODE ((umode_t)-1) #pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ #define MF_DUPLICATE_MD 0x01 #define MF_MIRROR_FE_LOADED 0x02 struct udf_meta_data { __u32 s_meta_file_loc; __u32 s_mirror_file_loc; __u32 s_bitmap_file_loc; __u32 s_alloc_unit_size; __u16 s_align_unit_size; /* * Partition Reference Number of the associated physical / sparable * partition */ __u16 s_phys_partition_ref; int s_flags; struct inode *s_metadata_fe; struct inode *s_mirror_fe; struct inode *s_bitmap_fe; }; struct udf_sparing_data { __u16 s_packet_len; struct buffer_head *s_spar_map[4]; }; struct udf_virtual_data { __u32 s_num_entries; __u16 s_start_offset; }; struct udf_bitmap { __u32 s_extPosition; int s_nr_groups; struct buffer_head *s_block_bitmap[0]; }; struct udf_part_map { union { struct udf_bitmap *s_bitmap; struct inode *s_table; } s_uspace; union { struct udf_bitmap *s_bitmap; struct inode *s_table; } s_fspace; __u32 s_partition_root; __u32 s_partition_len; __u16 s_partition_type; __u16 s_partition_num; union { struct udf_sparing_data s_sparing; struct udf_virtual_data s_virtual; struct udf_meta_data s_metadata; } s_type_specific; __u32 (*s_partition_func)(struct super_block *, __u32, __u16, __u32); __u16 s_volumeseqnum; __u16 s_partition_flags; }; #pragma pack() struct udf_sb_info { struct udf_part_map *s_partmaps; __u8 s_volume_ident[32]; /* Overall info */ __u16 s_partitions; __u16 s_partition; /* Sector headers */ __s32 s_session; __u32 s_anchor; __u32 s_last_block; struct buffer_head *s_lvid_bh; /* Default permissions */ umode_t s_umask; kgid_t s_gid; kuid_t s_uid; umode_t s_fmode; umode_t s_dmode; /* Lock protecting consistency of above permission settings */ rwlock_t s_cred_lock; /* Root Info */ struct timespec64 s_record_time; /* Fileset Info */ __u16 s_serial_number; /* highest UDF revision we have recorded to this media */ __u16 s_udfrev; /* Miscellaneous flags */ unsigned long s_flags; /* Encoding info */ struct nls_table *s_nls_map; /* VAT inode */ struct inode *s_vat_inode; struct mutex s_alloc_mutex; /* Protected by s_alloc_mutex */ unsigned int s_lvid_dirty; }; static inline struct udf_sb_info *UDF_SB(struct super_block *sb) { return sb->s_fs_info; } struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb); int udf_compute_nr_groups(struct super_block *sb, u32 partition); static inline int UDF_QUERY_FLAG(struct super_block *sb, int flag) { return test_bit(flag, &UDF_SB(sb)->s_flags); } static inline void UDF_SET_FLAG(struct super_block *sb, int flag) { set_bit(flag, &UDF_SB(sb)->s_flags); } static inline void UDF_CLEAR_FLAG(struct super_block *sb, int flag) { clear_bit(flag, &UDF_SB(sb)->s_flags); } #endif /* __LINUX_UDF_SB_H */
31 1 30 25 1 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 /* * Copyright (c) 2011 Florian Westphal <fw@strlen.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/ip.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/route.h> #include <linux/netfilter/xt_rpfilter.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match"); /* don't try to find route from mcast/bcast/zeronet */ static __be32 rpfilter_get_saddr(__be32 addr) { if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || ipv4_is_zeronet(addr)) return 0; return addr; } static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, const struct net_device *dev, u8 flags) { struct fib_result res; bool dev_match; int ret __maybe_unused; if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return false; if (res.type != RTN_UNICAST) { if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL)) return false; } dev_match = false; #ifdef CONFIG_IP_ROUTE_MULTIPATH for (ret = 0; ret < res.fi->fib_nhs; ret++) { struct fib_nh *nh = &res.fi->fib_nh[ret]; if (nh->nh_dev == dev) { dev_match = true; break; } } #else if (FIB_RES_DEV(res) == dev) dev_match = true; #endif return dev_match || flags & XT_RPFILTER_LOOSE; } static bool rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info; const struct iphdr *iph; struct flowi4 flow; bool invert; info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; if (rpfilter_is_loopback(skb, xt_in(par))) return true ^ invert; iph = ip_hdr(skb); if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) return true ^ invert; } memset(&flow, 0, sizeof(flow)); flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par)); return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { pr_info_ratelimited("unknown options\n"); return -EINVAL; } if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", par->table); return -EINVAL; } return 0; } static struct xt_match rpfilter_mt_reg __read_mostly = { .name = "rpfilter", .family = NFPROTO_IPV4, .checkentry = rpfilter_check, .match = rpfilter_mt, .matchsize = sizeof(struct xt_rpfilter_info), .hooks = (1 << NF_INET_PRE_ROUTING), .me = THIS_MODULE }; static int __init rpfilter_mt_init(void) { return xt_register_match(&rpfilter_mt_reg); } static void __exit rpfilter_mt_exit(void) { xt_unregister_match(&rpfilter_mt_reg); } module_init(rpfilter_mt_init); module_exit(rpfilter_mt_exit);
254 254 274 49 47 275 71 70 1349 1352 1348 622 622 623 623 622 615 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 /* * IPv6 library code, needed by static components when full IPv6 support is * not configured or static. These functions are needed by GSO/GRO implementation. */ #include <linux/export.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> #include <net/secure_seq.h> #include <linux/netfilter.h> static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { u32 id; do { id = prandom_u32(); } while (!id); return id; } /* This function exists only for tap drivers that must support broken * clients requesting UFO without specifying an IPv6 fragment ID. * * This is similar to ipv6_select_ident() but we use an independent hash * seed to limit information leakage. * * The network header must be set before calling this. */ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; addrs = skb_header_pointer(skb, skb_network_offset(skb) + offsetof(struct ipv6hdr, saddr), sizeof(buf), buf); if (!addrs) return 0; id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); return htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { u32 id; id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { unsigned int offset = sizeof(struct ipv6hdr); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; switch (**nexthdr) { case NEXTHDR_HOP: break; case NEXTHDR_ROUTING: found_rhdr = 1; break; case NEXTHDR_DEST: #if IS_ENABLED(CONFIG_IPV6_MIP6) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif if (found_rhdr) return offset; break; default: return offset; } if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) return -EINVAL; exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); offset += ipv6_optlen(exthdr); if (offset > IPV6_MAXPLEN) return -EINVAL; *nexthdr = &exthdr->nexthdr; } return -EINVAL; } EXPORT_SYMBOL(ip6_find_1stfragopt); #if IS_ENABLED(CONFIG_IPV6) int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit == 0) { struct net_device *dev = dst->dev; struct inet6_dev *idev; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) hoplimit = idev->cnf.hop_limit; else hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; rcu_read_unlock(); } return hoplimit; } EXPORT_SYMBOL(ip6_dst_hoplimit); #endif int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int len; len = skb->len - sizeof(struct ipv6hdr); if (len > IPV6_MAXPLEN) len = 0; ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out(sk, skb); if (unlikely(!skb)) return 0; skb->protocol = htons(ETH_P_IPV6); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); } EXPORT_SYMBOL_GPL(__ip6_local_out); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = __ip6_local_out(net, sk, skb); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } EXPORT_SYMBOL_GPL(ip6_local_out);
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 /* * Copyright (C)2006 USAGI/WIDE Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * Author: * Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com> * * Based on net/netfilter/xt_tcpudp.c * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/mip6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6t_mh.h> MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); MODULE_LICENSE("GPL"); /* Returns 1 if the type is matched by the range, 0 otherwise */ static inline bool type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) { return (type >= min && type <= max) ^ invert; } static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct ip6_mh _mh; const struct ip6_mh *mh; const struct ip6t_mh *mhinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh); if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil MH tinygram.\n"); par->hotdrop = true; return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { pr_debug("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); par->hotdrop = true; return false; } return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); } static int mh_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_mh *mhinfo = par->matchinfo; /* Must specify no unknown invflags */ return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0; } static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", .family = NFPROTO_IPV6, .checkentry = mh_mt6_check, .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), .proto = IPPROTO_MH, .me = THIS_MODULE, }; static int __init mh_mt6_init(void) { return xt_register_match(&mh_mt6_reg); } static void __exit mh_mt6_exit(void) { xt_unregister_match(&mh_mt6_reg); } module_init(mh_mt6_init); module_exit(mh_mt6_exit);
1738 1340 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MM_TYPES_H #define _LINUX_MM_TYPES_H #include <linux/mm_types_task.h> #include <linux/auxvec.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/uprobes.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <asm/mmu.h> #ifndef AT_VECTOR_SIZE_ARCH #define AT_VECTOR_SIZE_ARCH 0 #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) typedef int vm_fault_t; struct address_space; struct mem_cgroup; struct hmm; /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using * a page, though if it is a pagecache page, rmap structures can tell us * who is mapping it. * * If you allocate the page using alloc_pages(), you can use some of the * space in struct page for your own purposes. The five words in the main * union are available, except for bit 0 of the first word which must be * kept clear. Many users use this word to store a pointer to an object * which is guaranteed to be aligned. If you use the same storage as * page->mapping, you must restore it to NULL before freeing the page. * * If your page will not be mapped to userspace, you can also use the four * bytes in the mapcount union, but you must call page_mapcount_reset() * before freeing it. * * If you want to use the refcount field, it must be used in such a way * that other CPUs temporarily incrementing and then decrementing the * refcount does not cause problems. On receiving the page from * alloc_pages(), the refcount will be positive. * * If you allocate pages of order > 0, you can use some of the fields * in each subpage, but you may need to restore some of their values * afterwards. * * SLUB uses cmpxchg_double() to atomically update its freelist and * counters. That requires that freelist & counters be adjacent and * double-word aligned. We align all struct pages to double-word * boundaries, and ensure that 'freelist' is aligned within the * struct. */ #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) #else #define _struct_page_alignment #endif struct page { unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ /* * Five words (20/40 bytes) are available in this union. * WARNING: bit 0 of the first word is used for PageTail(). That * means the other users of this union MUST NOT use the bit to * avoid collision and false-positive PageTail(). */ union { struct { /* Page cache and anonymous pages */ /** * @lru: Pageout list, eg. active_list protected by * zone_lru_lock. Sometimes used as a generic list * by the page owner. */ struct list_head lru; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; pgoff_t index; /* Our offset within mapping. */ /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. * Used for swp_entry_t if PageSwapCache. * Indicates order in the buddy system if PageBuddy. */ unsigned long private; }; struct { /* slab, slob and slub */ union { struct list_head slab_list; /* uses lru */ struct { /* Partial pages */ struct page *next; #ifdef CONFIG_64BIT int pages; /* Nr of pages left */ int pobjects; /* Approximate count */ #else short int pages; short int pobjects; #endif }; }; struct kmem_cache *slab_cache; /* not slob */ /* Double-word boundary */ void *freelist; /* first free object */ union { void *s_mem; /* slab: first object */ unsigned long counters; /* SLUB */ struct { /* SLUB */ unsigned inuse:16; unsigned objects:15; unsigned frozen:1; }; }; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ /* First tail page only */ unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ unsigned long _compound_pad_2; struct list_head deferred_list; }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ unsigned long _pt_pad_2; /* mapping */ union { struct mm_struct *pt_mm; /* x86 pgds only */ atomic_t pt_frag_refcount; /* powerpc */ }; #if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; #else spinlock_t ptl; #endif }; struct { /* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; unsigned long hmm_data; unsigned long _zd_pad_1; /* uses mapping */ }; /** @rcu_head: You can use this to free a page by RCU. */ struct rcu_head rcu_head; }; union { /* This union is 4 bytes in size. */ /* * If the page can be mapped to userspace, encodes the number * of times this page is referenced by a page table. */ atomic_t _mapcount; /* * If the page is neither PageSlab nor mappable to userspace, * the value stored here may help determine what this page * is used for. See page-flags.h for a list of page types * which are currently stored here. */ unsigned int page_type; unsigned int active; /* SLAB */ int units; /* SLOB */ }; /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; #ifdef CONFIG_MEMCG struct mem_cgroup *mem_cgroup; #endif /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with * highmem some memory is mapped into kernel virtual memory * dynamically, so we need a place to store that address. * Note that this field could be 16 bits on x86 ... ;) * * Architectures with slow multiplication can define * WANT_PAGE_VIRTUAL in asm/page.h */ #if defined(WANT_PAGE_VIRTUAL) void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif } _struct_page_alignment; #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) __u16 offset; __u16 size; #else __u32 offset; #endif /* we maintain a pagecount bias, so that we dont dirty cache line * containing page->_refcount every time we allocate a fragment. */ unsigned int pagecnt_bias; bool pfmemalloc; }; typedef unsigned long vm_flags_t; static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; } /* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that * map parts of them. */ struct vm_region { struct rb_node vm_rb; /* link in global region tree */ vm_flags_t vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ int vm_usage; /* region usage count (access under nommu_region_sem) */ bool vm_icache_flushed : 1; /* true if the icache has been flushed for * this region */ }; #ifdef CONFIG_USERFAULTFD #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, }) struct vm_userfaultfd_ctx { struct userfaultfd_ctx *ctx; }; #else /* CONFIG_USERFAULTFD */ #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {}) struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ unsigned long vm_start; /* Our start address within vm_mm. */ unsigned long vm_end; /* The first byte after our end address within vm_mm. */ /* linked list of VM areas per task, sorted by address */ struct vm_area_struct *vm_next, *vm_prev; struct rb_node vm_rb; /* * Largest free memory gap in bytes to the left of this VMA. * Either between this VMA and vma->vm_prev, or between one of the * VMAs below us in the VMA rbtree and its ->vm_prev. This helps * get_unmapped_area find a free area of the right size. */ unsigned long rb_subtree_gap; /* Second cache line starts here. */ struct mm_struct *vm_mm; /* The address space we belong to. */ pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, see mm.h. */ /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. */ struct { struct rb_node rb; unsigned long rb_subtree_last; } shared; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack * or brk vma (with NULL file) can only be in an anon_vma list. */ struct list_head anon_vma_chain; /* Serialized by mmap_sem & * page_table_lock */ struct anon_vma *anon_vma; /* Serialized by page_table_lock */ /* Function pointers to deal with this struct. */ const struct vm_operations_struct *vm_ops; /* Information about our backing store: */ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ atomic_long_t swap_readahead_info; #ifndef CONFIG_MMU struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; struct core_thread { struct task_struct *task; struct core_thread *next; }; struct core_state { atomic_t nr_threads; struct core_thread dumper; struct completion startup; }; struct kioctx_table; struct mm_struct { struct { struct vm_area_struct *mmap; /* list of VMAs */ struct rb_root mm_rb; u64 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif unsigned long mmap_base; /* base of mmap area */ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES /* Base adresses for compatible mmap() */ unsigned long mmap_compat_base; unsigned long mmap_compat_legacy_base; #endif unsigned long task_size; /* size of task vm space */ unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; /** * @mm_users: The number of users including userspace. * * Use mmget()/mmget_not_zero()/mmput() to modify. When this * drops to 0 (i.e. when the task exits and there are no other * temporary reference holders), we also release a reference on * @mm_count (which may then free the &struct mm_struct if * @mm_count also drops to 0). */ atomic_t mm_users; /** * @mm_count: The number of references to &struct mm_struct * (@mm_users count as 1). * * Use mmgrab()/mmdrop() to modify. When this drops to 0, the * &struct mm_struct is freed. */ atomic_t mm_count; #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some * counters */ struct rw_semaphore mmap_sem; struct list_head mmlist; /* List of maybe swapped mm's. These * are globally strung together off * init_mm.mmlist, and are protected * by mmlist_lock */ unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ unsigned long total_vm; /* Total pages mapped */ unsigned long locked_vm; /* Pages that have PG_mlocked set */ unsigned long pinned_vm; /* Refcount permanently increased */ unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long stack_vm; /* VM_STACK */ unsigned long def_flags; spinlock_t arg_lock; /* protect the below fields */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ /* * Special counters, in some configurations protected by the * page_table_lock, in other configurations by being atomic. */ struct mm_rss_stat rss_stat; struct linux_binfmt *binfmt; /* Architecture-specific MM context */ mm_context_t context; unsigned long flags; /* Must use atomic bitops to access */ struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_MEMBARRIER atomic_t membarrier_state; #endif #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MEMCG /* * "owner" points to a task that is regarded as the canonical * user/owner of this mm. All of the following must be true in * order for it to be changed: * * current == mm->owner * current->mm != mm * new_owner->mm == mm * new_owner->alloc_lock is held */ struct task_struct __rcu *owner; #endif struct user_namespace *user_ns; /* store ref to file /proc/<pid>/exe symlink points to */ struct file __rcu *exe_file; #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_NUMA_BALANCING /* * numa_next_scan is the next time that the PTEs will be marked * pte_numa. NUMA hinting faults will gather statistics and * migrate pages to new nodes if necessary. */ unsigned long numa_next_scan; /* Restart point for scanning and setting pte_numa */ unsigned long numa_scan_offset; /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; #endif /* * An operation with batched TLB flushing is going on. Anything * that can move process memory needs to flush the TLB when * moving a PROT_NONE or PROT_NUMA mapped page. */ atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; #if IS_ENABLED(CONFIG_HMM) /* HMM needs to track a few things per mm */ struct hmm *hmm; #endif } __randomize_layout; /* * The mm_cpumask needs to be at the end of mm_struct, because it * is dynamically sized based on nr_cpu_ids. */ unsigned long cpu_bitmap[]; }; extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ static inline void mm_init_cpumask(struct mm_struct *mm) { unsigned long cpu_bitmap = (unsigned long)mm; cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { return (struct cpumask *)&mm->cpu_bitmap; } struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); extern void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); } static inline void inc_tlb_flush_pending(struct mm_struct *mm) { atomic_inc(&mm->tlb_flush_pending); /* * The only time this value is relevant is when there are indeed pages * to flush. And we'll only flush pages after changing them, which * requires the PTL. * * So the ordering here is: * * atomic_inc(&mm->tlb_flush_pending); * spin_lock(&ptl); * ... * set_pte_at(); * spin_unlock(&ptl); * * spin_lock(&ptl) * mm_tlb_flush_pending(); * .... * spin_unlock(&ptl); * * flush_tlb_range(); * atomic_dec(&mm->tlb_flush_pending); * * Where the increment if constrained by the PTL unlock, it thus * ensures that the increment is visible if the PTE modification is * visible. After all, if there is no PTE modification, nobody cares * about TLB flushes either. * * This very much relies on users (mm_tlb_flush_pending() and * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc * locks (PPC) the unlock of one doesn't order against the lock of * another PTL. * * The decrement is ordered by the flush_tlb_range(), such that * mm_tlb_flush_pending() will not return false unless all flushes have * completed. */ } static inline void dec_tlb_flush_pending(struct mm_struct *mm) { /* * See inc_tlb_flush_pending(). * * This cannot be smp_mb__before_atomic() because smp_mb() simply does * not order against TLB invalidate completion, which is what we need. * * Therefore we must rely on tlb_flush_*() to guarantee order. */ atomic_dec(&mm->tlb_flush_pending); } static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { /* * Must be called after having acquired the PTL; orders against that * PTLs release and therefore ensures that if we observe the modified * PTE we must also observe the increment from inc_tlb_flush_pending(). * * That is, it only guarantees to return true if there is a flush * pending for _this_ PTL. */ return atomic_read(&mm->tlb_flush_pending); } static inline bool mm_tlb_flush_nested(struct mm_struct *mm) { /* * Similar to mm_tlb_flush_pending(), we must have acquired the PTL * for which there is a TLB flush pending in order to guarantee * we've seen both that PTE modification and the increment. * * (no requirement on actually still holding the PTL, that is irrelevant) */ return atomic_read(&mm->tlb_flush_pending) > 1; } struct vm_fault; struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ /* * If .fault is not provided, this points to a * NULL-terminated array of pages that back the special mapping. * * This must not be NULL unless .fault is provided. */ struct page **pages; /* * If non-NULL, then this is called to resolve page faults * on the special mapping. If used, .pages is not checked. */ vm_fault_t (*fault)(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf); int (*mremap)(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma); }; enum tlb_flush_reason { TLB_FLUSH_ON_TASK_SWITCH, TLB_REMOTE_SHOOTDOWN, TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, NR_TLB_FLUSH_REASONS, }; /* * A swap entry has to fit into a "unsigned long", as the entry is hidden * in the "index" field of the swapper address space. */ typedef struct { unsigned long val; } swp_entry_t; #endif /* _LINUX_MM_TYPES_H */
7084 1062 1063 7086 11 2592 4278 20 277 290 20 11 75 72 4223 2227 3873 3877 3875 1783 1782 211 211 100 100 36 36 36 44 44 64 64 1447 1446 1446 1335 4268 1800 977 2 4269 4272 325 4273 193 193 4271 4271 3553 4257 3819 622 4254 3991 3988 1712 3249 705 1780 2035 1666 4257 4256 4257 4243 10554 5347 8638 6567 10665 10558 4209 1 1003 557 2 24 100 98 98 6 100 98 59 46 46 46 46 46 46 18 18 213 213 213 213 339 340 53 44 339 10 10 10 9 340 1 1297 340 44 44 20 44 340 36 337 340 205 205 205 205 34 35 35 35 59 8547 8552 1749 7914 139 139 7861 8546 5612 158 8548 139 8549 8550 4279 4282 342 107 107 107 107 4892 4888 4891 194 8549 8548 8545 6951 271 6951 291 6950 1297 6951 159 6949 2431 6949 342 6949 370 8026 8030 3051 3048 3040 7043 5539 5302 6825 8031 7432 6776 6776 1291 1291 1290 1388 1388 1388 1388 15 15 15 15 52 15 5 15 28 38 52 4 2008 2010 126 7967 6582 6581 158 158 158 6489 7028 6579 7963 4360 4748 2053 2759 2056 160 160 673 595 4126 4130 3424 3422 3424 6 5 3542 3541 15 15 3532 3532 6 6 6 6 5 5 4 3520 3522 3522 3422 3423 1375 3069 226 1 7 15 23 4 199 202 219 218 219 218 219 4 129 103 128 21 10 10 214 204 218 202 198 23 23 219 219 219 219 23 23 23 23 602 602 100 8 5 5 5 3 5 5 5 2742 1161 12 8 4 12 2753 587 216 587 67 16 67 16 16 67 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 /* * fs/dcache.c * * Complete reimplementation * (C) 1997 Thomas Schoebel-Theuer, * with heavy changes by Linus Torvalds */ /* * Notes on the allocation strategy: * * The dcache is a master of the icache - whenever a dcache entry * exists, the inode will always exist. "iput()" is done either when * the dcache entry is deleted or garbage collected. */ #include <linux/ratelimit.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/cache.h> #include <linux/export.h> #include <linux/security.h> #include <linux/seqlock.h> #include <linux/bootmem.h> #include <linux/bit_spinlock.h> #include <linux/rculist_bl.h> #include <linux/list_lru.h> #include "internal.h" #include "mount.h" /* * Usage: * dcache->d_inode->i_lock protects: * - i_dentry, d_u.d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_roots bl list spinlock protects: * - the s_roots list (see __d_drop) * dentry->d_sb->s_dentry_lru_lock protects: * - the dcache lru lists and counters * d_lock protects: * - d_flags * - d_name * - d_lru * - d_count * - d_unhashed() * - d_parent and d_subdirs * - childrens' d_child and d_parent * - d_u.d_alias, d_inode * * Ordering: * dentry->d_inode->i_lock * dentry->d_lock * dentry->d_sb->s_dentry_lru_lock * dcache_hash_bucket lock * s_roots lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock * ... * dentry->d_parent->d_lock * dentry->d_lock * * If no ancestor relationship: * arbitrary, since it's serialized on rename_lock */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; const struct qstr empty_name = QSTR_INIT("", 0); EXPORT_SYMBOL(empty_name); const struct qstr slash_name = QSTR_INIT("/", 1); EXPORT_SYMBOL(slash_name); /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try * to make this good - I've just made it work. * * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ static unsigned int d_hash_shift __read_mostly; static struct hlist_bl_head *dentry_hashtable __read_mostly; static inline struct hlist_bl_head *d_hash(unsigned int hash) { return dentry_hashtable + (hash >> d_hash_shift); } #define IN_LOOKUP_SHIFT 10 static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT]; static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT); } /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; static DEFINE_PER_CPU(long, nr_dentry); static DEFINE_PER_CPU(long, nr_dentry_unused); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) /* * Here we resort to our own counters instead of using generic per-cpu counters * for consistency with what the vfs inode code does. We are expected to harvest * better code and performance by having our own specialized counters. * * Please note that the loop is done over all possible CPUs, not over all online * CPUs. The reason for this is that we don't want to play games with CPUs going * on and off. If one of them goes off, we will just keep their counters. * * glommer: See cffbc8a for details, and if you ever intend to change this, * please update all vfs counters to match. */ static long get_nr_dentry(void) { int i; long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_dentry, i); return sum < 0 ? 0 : sum; } static long get_nr_dentry_unused(void) { int i; long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_dentry_unused, i); return sum < 0 ? 0 : sum; } int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); dentry_stat.nr_unused = get_nr_dentry_unused(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif /* * Compare 2 name strings, return 0 if they match, otherwise non-zero. * The strings are both count bytes long, and count is non-zero. */ #ifdef CONFIG_DCACHE_WORD_ACCESS #include <asm/word-at-a-time.h> /* * NOTE! 'cs' and 'scount' come from a dentry, so it has a * aligned allocation for this particular component. We don't * strictly need the load_unaligned_zeropad() safety, but it * doesn't hurt either. * * In contrast, 'ct' and 'tcount' can be from a pathname, and do * need the careful unaligned handling. */ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { unsigned long a,b,mask; for (;;) { a = read_word_at_a_time(cs); b = load_unaligned_zeropad(ct); if (tcount < sizeof(unsigned long)) break; if (unlikely(a != b)) return 1; cs += sizeof(unsigned long); ct += sizeof(unsigned long); tcount -= sizeof(unsigned long); if (!tcount) return 0; } mask = bytemask_from_count(tcount); return unlikely(!!((a ^ b) & mask)); } #else static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { do { if (*cs != *ct) return 1; cs++; ct++; tcount--; } while (tcount); return 0; } #endif static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) { /* * Be careful about RCU walk racing with rename: * use 'READ_ONCE' to fetch the name pointer. * * NOTE! Even if a rename will mean that the length * was not loaded atomically, we don't care. The * RCU walk will check the sequence count eventually, * and catch it. And we won't overrun the buffer, * because we're reading the name pointer atomically, * and a dentry name is guaranteed to be properly * terminated with a NUL byte. * * End result: even if 'len' is wrong, we'll exit * early because the data cannot match (there can * be no NUL in the ct/tcount data) */ const unsigned char *cs = READ_ONCE(dentry->d_name.name); return dentry_string_cmp(cs, ct, tcount); } struct external_name { union { atomic_t count; struct rcu_head head; } u; unsigned char name[]; }; static inline struct external_name *external_name(struct dentry *dentry) { return container_of(dentry->d_name.name, struct external_name, name[0]); } static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); kmem_cache_free(dentry_cache, dentry); } static void __d_free_external_name(struct rcu_head *head) { struct external_name *name = container_of(head, struct external_name, u.head); mod_node_page_state(page_pgdat(virt_to_page(name)), NR_INDIRECTLY_RECLAIMABLE_BYTES, -ksize(name)); kfree(name); } static void __d_free_external(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); __d_free_external_name(&external_name(dentry)->u.head); kmem_cache_free(dentry_cache, dentry); } static inline int dname_external(const struct dentry *dentry) { return dentry->d_name.name != dentry->d_iname; } void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) { spin_lock(&dentry->d_lock); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); atomic_inc(&p->u.count); spin_unlock(&dentry->d_lock); name->name = p->name; } else { memcpy(name->inline_name, dentry->d_iname, dentry->d_name.len + 1); spin_unlock(&dentry->d_lock); name->name = name->inline_name; } } EXPORT_SYMBOL(take_dentry_name_snapshot); void release_dentry_name_snapshot(struct name_snapshot *name) { if (unlikely(name->name != name->inline_name)) { struct external_name *p; p = container_of(name->name, struct external_name, name[0]); if (unlikely(atomic_dec_and_test(&p->u.count))) call_rcu(&p->u.head, __d_free_external_name); } } EXPORT_SYMBOL(release_dentry_name_snapshot); static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) { unsigned flags; dentry->d_inode = inode; flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); flags |= type_flags; WRITE_ONCE(dentry->d_flags, flags); } static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; } static void dentry_free(struct dentry *dentry) { WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); if (likely(atomic_dec_and_test(&p->u.count))) { call_rcu(&dentry->d_u.d_rcu, __d_free_external); return; } } /* if dentry was never visible to RCU, immediate free is OK */ if (dentry->d_flags & DCACHE_NORCU) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); } /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. */ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_lock) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else iput(inode); } /* * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry * is in use - which includes both the "real" per-superblock * LRU list _and_ the DCACHE_SHRINK_LIST use. * * The DCACHE_SHRINK_LIST bit is set whenever the dentry is * on the shrink list (ie not on the superblock LRU list). * * The per-cpu "nr_dentry_unused" counters are updated with * the DCACHE_LRU_LIST bit. * * These helper functions make sure we always follow the * rules. d_lock must be held by the caller. */ #define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x)) static void d_lru_add(struct dentry *dentry) { D_FLAG_VERIFY(dentry, 0); dentry->d_flags |= DCACHE_LRU_LIST; this_cpu_inc(nr_dentry_unused); WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } static void d_lru_del(struct dentry *dentry) { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } static void d_shrink_del(struct dentry *dentry) { D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); list_del_init(&dentry->d_lru); dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); this_cpu_dec(nr_dentry_unused); } static void d_shrink_add(struct dentry *dentry, struct list_head *list) { D_FLAG_VERIFY(dentry, 0); list_add(&dentry->d_lru, list); dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST; this_cpu_inc(nr_dentry_unused); } /* * These can only be called under the global LRU lock, ie during the * callback for freeing the LRU list. "isolate" removes it from the * LRU lists entirely, while shrink_move moves it to the indicated * private list. */ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry) { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); list_lru_isolate(lru, &dentry->d_lru); } static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, struct list_head *list) { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags |= DCACHE_SHRINK_LIST; list_lru_isolate_move(lru, &dentry->d_lru, list); } /** * d_drop - drop a dentry * @dentry: dentry to drop * * d_drop() unhashes the entry from the parent dentry hashes, so that it won't * be found through a VFS lookup any more. Note that this is different from * deleting the dentry - d_delete will try to mark the dentry negative if * possible, giving a successful _negative_ lookup, while d_drop will * just make the cache lookup fail. * * d_drop() is used mainly for stuff that wants to invalidate a dentry for some * reason (NFS timeouts or autofs deletes). * * __d_drop requires dentry->d_lock * ___d_drop doesn't mark dentry as "unhashed" * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). */ static void ___d_drop(struct dentry *dentry) { struct hlist_bl_head *b; /* * Hashed dentries are normally on the dentry hashtable, * with the exception of those newly allocated by * d_obtain_root, which are always IS_ROOT: */ if (unlikely(IS_ROOT(dentry))) b = &dentry->d_sb->s_roots; else b = d_hash(dentry->d_name.hash); hlist_bl_lock(b); __hlist_bl_del(&dentry->d_hash); hlist_bl_unlock(b); } void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { ___d_drop(dentry); dentry->d_hash.pprev = NULL; write_seqcount_invalidate(&dentry->d_seq); } } EXPORT_SYMBOL(__d_drop); void d_drop(struct dentry *dentry) { spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(d_drop); static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) { struct dentry *next; /* * Inform d_walk() and shrink_dentry_list() that we are no longer * attached to the dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; if (unlikely(list_empty(&dentry->d_child))) return; __list_del_entry(&dentry->d_child); /* * Cursors can move around the list of children. While we'd been * a normal list member, it didn't matter - ->d_child.next would've * been updated. However, from now on it won't be and for the * things like d_walk() it might end up with a nasty surprise. * Normally d_walk() doesn't care about cursors moving around - * ->d_lock on parent prevents that and since a cursor has no children * of its own, we get through it without ever unlocking the parent. * There is one exception, though - if we ascend from a child that * gets killed as soon as we unlock it, the next sibling is found * using the value left in its ->d_child.next. And if _that_ * pointed to a cursor, and cursor got moved (e.g. by lseek()) * before d_walk() regains parent->d_lock, we'll end up skipping * everything the cursor had been moved past. * * Solution: make sure that the pointer left behind in ->d_child.next * points to something that won't be moving around. I.e. skip the * cursors. */ while (dentry->d_child.next != &parent->d_subdirs) { next = list_entry(dentry->d_child.next, struct dentry, d_child); if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) break; dentry->d_child.next = next->d_child.next; } } static void __dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; bool can_free = true; if (!IS_ROOT(dentry)) parent = dentry->d_parent; /* * The dentry is now unrecoverably dead to the world. */ lockref_mark_dead(&dentry->d_lockref); /* * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. */ if (dentry->d_flags & DCACHE_OP_PRUNE) dentry->d_op->d_prune(dentry); if (dentry->d_flags & DCACHE_LRU_LIST) { if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) d_lru_del(dentry); } /* if it was on the hash then remove it */ __d_drop(dentry); dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); if (dentry->d_inode) dentry_unlink_inode(dentry); else spin_unlock(&dentry->d_lock); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); spin_lock(&dentry->d_lock); if (dentry->d_flags & DCACHE_SHRINK_LIST) { dentry->d_flags |= DCACHE_MAY_FREE; can_free = false; } spin_unlock(&dentry->d_lock); if (likely(can_free)) dentry_free(dentry); cond_resched(); } static struct dentry *__lock_parent(struct dentry *dentry) { struct dentry *parent; rcu_read_lock(); spin_unlock(&dentry->d_lock); again: parent = READ_ONCE(dentry->d_parent); spin_lock(&parent->d_lock); /* * We can't blindly lock dentry until we are sure * that we won't violate the locking order. * Any changes of dentry->d_parent must have * been done with parent->d_lock held, so * spin_lock() above is enough of a barrier * for checking if it's still our child. */ if (unlikely(parent != dentry->d_parent)) { spin_unlock(&parent->d_lock); goto again; } rcu_read_unlock(); if (parent != dentry) spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); else parent = NULL; return parent; } static inline struct dentry *lock_parent(struct dentry *dentry) { struct dentry *parent = dentry->d_parent; if (IS_ROOT(dentry)) return NULL; if (likely(spin_trylock(&parent->d_lock))) return parent; return __lock_parent(dentry); } static inline bool retain_dentry(struct dentry *dentry) { WARN_ON(d_in_lookup(dentry)); /* Unreachable? Get rid of it */ if (unlikely(d_unhashed(dentry))) return false; if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) return false; if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) return false; } /* retain; LRU fodder */ dentry->d_lockref.count--; if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) d_lru_add(dentry); else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) dentry->d_flags |= DCACHE_REFERENCED; return true; } /* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. * Returns dentry requiring refcount drop, or NULL if we're done. */ static struct dentry *dentry_kill(struct dentry *dentry) __releases(dentry->d_lock) { struct inode *inode = dentry->d_inode; struct dentry *parent = NULL; if (inode && unlikely(!spin_trylock(&inode->i_lock))) goto slow_positive; if (!IS_ROOT(dentry)) { parent = dentry->d_parent; if (unlikely(!spin_trylock(&parent->d_lock))) { parent = __lock_parent(dentry); if (likely(inode || !dentry->d_inode)) goto got_locks; /* negative that became positive */ if (parent) spin_unlock(&parent->d_lock); inode = dentry->d_inode; goto slow_positive; } } __dentry_kill(dentry); return parent; slow_positive: spin_unlock(&dentry->d_lock); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); parent = lock_parent(dentry); got_locks: if (unlikely(dentry->d_lockref.count != 1)) { dentry->d_lockref.count--; } else if (likely(!retain_dentry(dentry))) { __dentry_kill(dentry); return parent; } /* we are keeping it, after all */ if (inode) spin_unlock(&inode->i_lock); if (parent) spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); return NULL; } /* * Try to do a lockless dput(), and return whether that was successful. * * If unsuccessful, we return false, having already taken the dentry lock. * * The caller needs to hold the RCU read lock, so that the dentry is * guaranteed to stay around even if the refcount goes down to zero! */ static inline bool fast_dput(struct dentry *dentry) { int ret; unsigned int d_flags; /* * If we have a d_op->d_delete() operation, we sould not * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); /* * .. otherwise, we can try to just decrement the * lockref optimistically. */ ret = lockref_put_return(&dentry->d_lockref); /* * If the lockref_put_return() failed due to the lock being held * by somebody else, the fast path has failed. We will need to * get the lock, and then check the count again. */ if (unlikely(ret < 0)) { spin_lock(&dentry->d_lock); if (dentry->d_lockref.count > 1) { dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return true; } return false; } /* * If we weren't the last ref, we're done. */ if (ret) return true; /* * Careful, careful. The reference count went down * to zero, but we don't hold the dentry lock, so * somebody else could get it again, and do another * dput(), and we need to not race with that. * * However, there is a very special and common case * where we don't care, because there is nothing to * do: the dentry is still hashed, it does not have * a 'delete' op, and it's referenced and already on * the LRU list. * * NOTE! Since we aren't locked, these values are * not "stable". However, it is sufficient that at * some point after we dropped the reference the * dentry was hashed and the flags had the proper * value. Other dentry users may have re-gotten * a reference to the dentry and change that, but * our work is done - we can leave the dentry * around with a zero refcount. */ smp_rmb(); d_flags = READ_ONCE(dentry->d_flags); d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) return true; /* * Not the fast normal case? Get the lock. We've already decremented * the refcount, but we'll need to re-check the situation after * getting the lock. */ spin_lock(&dentry->d_lock); /* * Did somebody else grab a reference to it in the meantime, and * we're no longer the last user after all? Alternatively, somebody * else could have killed it and marked it dead. Either way, we * don't need to do anything else. */ if (dentry->d_lockref.count) { spin_unlock(&dentry->d_lock); return true; } /* * Re-get the reference we optimistically dropped. We hold the * lock, and we just tested that it was zero, so we can just * set it to 1. */ dentry->d_lockref.count = 1; return false; } /* * This is dput * * This is complicated by the fact that we do not want to put * dentries that are no longer on any hash chain on the unused * list: we'd much rather just get rid of them immediately. * * However, that implies that we have to traverse the dentry * tree upwards to the parents which might _also_ now be * scheduled for deletion (it may have been only waiting for * its last child to go away). * * This tail recursion is done by hand as we don't want to depend * on the compiler to always get this right (gcc generally doesn't). * Real recursion would eat up our stack space. */ /* * dput - release a dentry * @dentry: dentry to release * * Release a dentry. This will drop the usage count and if appropriate * call the dentry unlink method as well as removing it from the queues and * releasing its resources. If the parent dentries were scheduled for release * they too may now get deleted. */ void dput(struct dentry *dentry) { while (dentry) { might_sleep(); rcu_read_lock(); if (likely(fast_dput(dentry))) { rcu_read_unlock(); return; } /* Slow case: now with the dentry lock held */ rcu_read_unlock(); if (likely(retain_dentry(dentry))) { spin_unlock(&dentry->d_lock); return; } dentry = dentry_kill(dentry); } } EXPORT_SYMBOL(dput); /* This must be called with d_lock held */ static inline void __dget_dlock(struct dentry *dentry) { dentry->d_lockref.count++; } static inline void __dget(struct dentry *dentry) { lockref_get(&dentry->d_lockref); } struct dentry *dget_parent(struct dentry *dentry) { int gotref; struct dentry *ret; unsigned seq; /* * Do optimistic parent lookup without any * locking. */ rcu_read_lock(); seq = raw_seqcount_begin(&dentry->d_seq); ret = READ_ONCE(dentry->d_parent); gotref = lockref_get_not_zero(&ret->d_lockref); rcu_read_unlock(); if (likely(gotref)) { if (!read_seqcount_retry(&dentry->d_seq, seq)) return ret; dput(ret); } repeat: /* * Don't need rcu_dereference because we re-check it was correct under * the lock. */ rcu_read_lock(); ret = dentry->d_parent; spin_lock(&ret->d_lock); if (unlikely(ret != dentry->d_parent)) { spin_unlock(&ret->d_lock); rcu_read_unlock(); goto repeat; } rcu_read_unlock(); BUG_ON(!ret->d_lockref.count); ret->d_lockref.count++; spin_unlock(&ret->d_lock); return ret; } EXPORT_SYMBOL(dget_parent); static struct dentry * __d_find_any_alias(struct inode *inode) { struct dentry *alias; if (hlist_empty(&inode->i_dentry)) return NULL; alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); __dget(alias); return alias; } /** * d_find_any_alias - find any alias for a given inode * @inode: inode to find an alias for * * If any aliases exist for the given inode, take and return a * reference for one of them. If no aliases exist, return %NULL. */ struct dentry *d_find_any_alias(struct inode *inode) { struct dentry *de; spin_lock(&inode->i_lock); de = __d_find_any_alias(inode); spin_unlock(&inode->i_lock); return de; } EXPORT_SYMBOL(d_find_any_alias); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. * Notice that if inode is a directory there can be only one alias and * it can be unhashed only if it has no children, or if it is the root * of a filesystem, or if the directory was renamed and d_revalidate * was the first vfs operation to notice. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer * any other hashed alias over that one. */ static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias; if (S_ISDIR(inode->i_mode)) return __d_find_any_alias(inode); hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); if (!d_unhashed(alias)) { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } spin_unlock(&alias->d_lock); } return NULL; } struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; if (!hlist_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); de = __d_find_alias(inode); spin_unlock(&inode->i_lock); } return de; } EXPORT_SYMBOL(d_find_alias); /* * Try to kill dentries associated with this inode. * WARNING: you must own a reference to inode. */ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_lockref.count) { struct dentry *parent = lock_parent(dentry); if (likely(!dentry->d_lockref.count)) { __dentry_kill(dentry); dput(parent); goto restart; } if (parent) spin_unlock(&parent->d_lock); } spin_unlock(&dentry->d_lock); } spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_prune_aliases); /* * Lock a dentry from shrink list. * Called under rcu_read_lock() and dentry->d_lock; the former * guarantees that nothing we access will be freed under us. * Note that dentry is *not* protected from concurrent dentry_kill(), * d_delete(), etc. * * Return false if dentry has been disrupted or grabbed, leaving * the caller to kick it off-list. Otherwise, return true and have * that dentry's inode and parent both locked. */ static bool shrink_lock_dentry(struct dentry *dentry) { struct inode *inode; struct dentry *parent; if (dentry->d_lockref.count) return false; inode = dentry->d_inode; if (inode && unlikely(!spin_trylock(&inode->i_lock))) { spin_unlock(&dentry->d_lock); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); if (unlikely(dentry->d_lockref.count)) goto out; /* changed inode means that somebody had grabbed it */ if (unlikely(inode != dentry->d_inode)) goto out; } parent = dentry->d_parent; if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock))) return true; spin_unlock(&dentry->d_lock); spin_lock(&parent->d_lock); if (unlikely(parent != dentry->d_parent)) { spin_unlock(&parent->d_lock); spin_lock(&dentry->d_lock); goto out; } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (likely(!dentry->d_lockref.count)) return true; spin_unlock(&parent->d_lock); out: if (inode) spin_unlock(&inode->i_lock); return false; } static void shrink_dentry_list(struct list_head *list) { while (!list_empty(list)) { struct dentry *dentry, *parent; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); rcu_read_lock(); if (!shrink_lock_dentry(dentry)) { bool can_free = false; rcu_read_unlock(); d_shrink_del(dentry); if (dentry->d_lockref.count < 0) can_free = dentry->d_flags & DCACHE_MAY_FREE; spin_unlock(&dentry->d_lock); if (can_free) dentry_free(dentry); continue; } rcu_read_unlock(); d_shrink_del(dentry); parent = dentry->d_parent; __dentry_kill(dentry); if (parent == dentry) continue; /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also * expected to be beneficial in reducing dentry cache * fragmentation. */ dentry = parent; while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) dentry = dentry_kill(dentry); } } static enum lru_status dentry_lru_isolate(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) { struct list_head *freeable = arg; struct dentry *dentry = container_of(item, struct dentry, d_lru); /* * we are inverting the lru lock/dentry->d_lock here, * so use a trylock. If we fail to get the lock, just skip * it */ if (!spin_trylock(&dentry->d_lock)) return LRU_SKIP; /* * Referenced dentries are still in use. If they have active * counts, just remove them from the LRU. Otherwise give them * another pass through the LRU. */ if (dentry->d_lockref.count) { d_lru_isolate(lru, dentry); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } if (dentry->d_flags & DCACHE_REFERENCED) { dentry->d_flags &= ~DCACHE_REFERENCED; spin_unlock(&dentry->d_lock); /* * The list move itself will be made by the common LRU code. At * this point, we've dropped the dentry->d_lock but keep the * lru lock. This is safe to do, since every list movement is * protected by the lru lock even if both locks are held. * * This is guaranteed by the fact that all LRU management * functions are intermediated by the LRU API calls like * list_lru_add and list_lru_del. List movement in this file * only ever occur through this functions or through callbacks * like this one, that are called from the LRU API. * * The only exceptions to this are functions like * shrink_dentry_list, and code that first checks for the * DCACHE_SHRINK_LIST flag. Those are guaranteed to be * operating only with stack provided lists after they are * properly isolated from the main list. It is thus, always a * local access. */ return LRU_ROTATE; } d_lru_shrink_move(lru, dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } /** * prune_dcache_sb - shrink the dcache * @sb: superblock * @sc: shrink control, passed to list_lru_shrink_walk() * * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This * is done when we need more memory and called from the superblock shrinker * function. * * This function may fail to free any resources if all the dentries are in * use. */ long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc) { LIST_HEAD(dispose); long freed; freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc, dentry_lru_isolate, &dispose); shrink_dentry_list(&dispose); return freed; } static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) { struct list_head *freeable = arg; struct dentry *dentry = container_of(item, struct dentry, d_lru); /* * we are inverting the lru lock/dentry->d_lock here, * so use a trylock. If we fail to get the lock, just skip * it */ if (!spin_trylock(&dentry->d_lock)) return LRU_SKIP; d_lru_shrink_move(lru, dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock * * Shrink the dcache for the specified super block. This is used to free * the dcache before unmounting a file system. */ void shrink_dcache_sb(struct super_block *sb) { do { LIST_HEAD(dispose); list_lru_walk(&sb->s_dentry_lru, dentry_lru_isolate_shrink, &dispose, 1024); shrink_dentry_list(&dispose); } while (list_lru_count(&sb->s_dentry_lru) > 0); } EXPORT_SYMBOL(shrink_dcache_sb); /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk * @D_WALK_QUIT: quit walk * @D_WALK_NORETRY: quit when retry is needed * @D_WALK_SKIP: skip this dentry and its children */ enum d_walk_ret { D_WALK_CONTINUE, D_WALK_QUIT, D_WALK_NORETRY, D_WALK_SKIP, }; /** * d_walk - walk the dentry tree * @parent: start of walk * @data: data passed to @enter() and @finish() * @enter: callback when first entering the dentry * * The @enter() callbacks are called with d_lock held. */ static void d_walk(struct dentry *parent, void *data, enum d_walk_ret (*enter)(void *, struct dentry *)) { struct dentry *this_parent; struct list_head *next; unsigned seq = 0; enum d_walk_ret ret; bool retry = true; again: read_seqbegin_or_lock(&rename_lock, &seq); this_parent = parent; spin_lock(&this_parent->d_lock); ret = enter(data, this_parent); switch (ret) { case D_WALK_CONTINUE: break; case D_WALK_QUIT: case D_WALK_SKIP: goto out_unlock; case D_WALK_NORETRY: retry = false; break; } repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) continue; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); switch (ret) { case D_WALK_CONTINUE: break; case D_WALK_QUIT: spin_unlock(&dentry->d_lock); goto out_unlock; case D_WALK_NORETRY: retry = false; break; case D_WALK_SKIP: spin_unlock(&dentry->d_lock); continue; } if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&this_parent->d_lock); spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ rcu_read_lock(); ascend: if (this_parent != parent) { struct dentry *child = this_parent; this_parent = child->d_parent; spin_unlock(&child->d_lock); spin_lock(&this_parent->d_lock); /* might go back up the wrong parent if we have had a rename. */ if (need_seqretry(&rename_lock, seq)) goto rename_retry; /* go into the first sibling still alive */ do { next = child->d_child.next; if (next == &this_parent->d_subdirs) goto ascend; child = list_entry(next, struct dentry, d_child); } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); rcu_read_unlock(); goto resume; } if (need_seqretry(&rename_lock, seq)) goto rename_retry; rcu_read_unlock(); out_unlock: spin_unlock(&this_parent->d_lock); done_seqretry(&rename_lock, seq); return; rename_retry: spin_unlock(&this_parent->d_lock); rcu_read_unlock(); BUG_ON(seq & 1); if (!retry) return; seq = 1; goto again; } struct check_mount { struct vfsmount *mnt; unsigned int mounted; }; static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry) { struct check_mount *info = data; struct path path = { .mnt = info->mnt, .dentry = dentry }; if (likely(!d_mountpoint(dentry))) return D_WALK_CONTINUE; if (__path_is_mountpoint(&path)) { info->mounted = 1; return D_WALK_QUIT; } return D_WALK_CONTINUE; } /** * path_has_submounts - check for mounts over a dentry in the * current namespace. * @parent: path to check. * * Return true if the parent or its subdirectories contain * a mount point in the current namespace. */ int path_has_submounts(const struct path *parent) { struct check_mount data = { .mnt = parent->mnt, .mounted = 0 }; read_seqlock_excl(&mount_lock); d_walk(parent->dentry, &data, path_check_mount); read_sequnlock_excl(&mount_lock); return data.mounted; } EXPORT_SYMBOL(path_has_submounts); /* * Called by mount code to set a mountpoint and check if the mountpoint is * reachable (e.g. NFS can unhash a directory dentry and then the complete * subtree can become unreachable). * * Only one of d_invalidate() and d_set_mounted() must succeed. For * this reason take rename_lock and d_lock on dentry and ancestors. */ int d_set_mounted(struct dentry *dentry) { struct dentry *p; int ret = -ENOENT; write_seqlock(&rename_lock); for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { /* Need exclusion wrt. d_invalidate() */ spin_lock(&p->d_lock); if (unlikely(d_unhashed(p))) { spin_unlock(&p->d_lock); goto out; } spin_unlock(&p->d_lock); } spin_lock(&dentry->d_lock); if (!d_unlinked(dentry)) { ret = -EBUSY; if (!d_mountpoint(dentry)) { dentry->d_flags |= DCACHE_MOUNTED; ret = 0; } } spin_unlock(&dentry->d_lock); out: write_sequnlock(&rename_lock); return ret; } /* * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level * whenever the d_subdirs list is non-empty and continue * searching. * * It returns zero iff there are no unused children, * otherwise it returns the number of children moved to * the end of the unused list. This may not be the total * number of unused children, because select_parent can * drop the lock and return early due to latency * constraints. */ struct select_data { struct dentry *start; struct list_head dispose; int found; }; static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) { struct select_data *data = _data; enum d_walk_ret ret = D_WALK_CONTINUE; if (data->start == dentry) goto out; if (dentry->d_flags & DCACHE_SHRINK_LIST) { data->found++; } else { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); if (!dentry->d_lockref.count) { d_shrink_add(dentry, &data->dispose); data->found++; } } /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ if (!list_empty(&data->dispose)) ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; out: return ret; } /** * shrink_dcache_parent - prune dcache * @parent: parent of entries to prune * * Prune the dcache to remove unused children of the parent dentry. */ void shrink_dcache_parent(struct dentry *parent) { for (;;) { struct select_data data; INIT_LIST_HEAD(&data.dispose); data.start = parent; data.found = 0; d_walk(parent, &data, select_collect); if (!list_empty(&data.dispose)) { shrink_dentry_list(&data.dispose); continue; } cond_resched(); if (!data.found) break; } } EXPORT_SYMBOL(shrink_dcache_parent); static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) { /* it has busy descendents; complain about those instead */ if (!list_empty(&dentry->d_subdirs)) return D_WALK_CONTINUE; /* root with refcount 1 is fine */ if (dentry == _data && dentry->d_lockref.count == 1) return D_WALK_CONTINUE; printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} " " still in use (%d) [unmount of %s %s]\n", dentry, dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry, dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); WARN_ON(1); return D_WALK_CONTINUE; } static void do_one_tree(struct dentry *dentry) { shrink_dcache_parent(dentry); d_walk(dentry, dentry, umount_check); d_drop(dentry); dput(dentry); } /* * destroy the dentries attached to a superblock on unmounting */ void shrink_dcache_for_umount(struct super_block *sb) { struct dentry *dentry; WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); dentry = sb->s_root; sb->s_root = NULL; do_one_tree(dentry); while (!hlist_bl_empty(&sb->s_roots)) { dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash)); do_one_tree(dentry); } } static enum d_walk_ret find_submount(void *_data, struct dentry *dentry) { struct dentry **victim = _data; if (d_mountpoint(dentry)) { __dget_dlock(dentry); *victim = dentry; return D_WALK_QUIT; } return D_WALK_CONTINUE; } /** * d_invalidate - detach submounts, prune dcache, and drop * @dentry: dentry to invalidate (aka detach, prune and drop) */ void d_invalidate(struct dentry *dentry) { bool had_submounts = false; spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); return; } __d_drop(dentry); spin_unlock(&dentry->d_lock); /* Negative dentries can be dropped without further checks */ if (!dentry->d_inode) return; shrink_dcache_parent(dentry); for (;;) { struct dentry *victim = NULL; d_walk(dentry, &victim, find_submount); if (!victim) { if (had_submounts) shrink_dcache_parent(dentry); return; } had_submounts = true; detach_mounts(victim); dput(victim); } } EXPORT_SYMBOL(d_invalidate); /** * __d_alloc - allocate a dcache entry * @sb: filesystem it will belong to * @name: qstr of the name * * Allocates a dentry. It returns %NULL if there is insufficient memory * available. On a success the dentry is returned. The name passed in is * copied and the copy passed in may be reused after this call. */ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { struct external_name *ext = NULL; struct dentry *dentry; char *dname; int err; dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); if (!dentry) return NULL; /* * We guarantee that the inline name is always NUL-terminated. * This way the memcpy() done by the name switching in rename * will still always have a NUL at the end, even if we might * be overwriting an internal NUL character */ dentry->d_iname[DNAME_INLINE_LEN-1] = 0; if (unlikely(!name)) { name = &slash_name; dname = dentry->d_iname; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); if (!ext) { kmem_cache_free(dentry_cache, dentry); return NULL; } atomic_set(&ext->u.count, 1); dname = ext->name; } else { dname = dentry->d_iname; } dentry->d_name.len = name->len; dentry->d_name.hash = name->hash; memcpy(dname, name->name, name->len); dname[name->len] = 0; /* Make sure we always see the terminating NUL character */ smp_store_release(&dentry->d_name.name, dname); /* ^^^ */ dentry->d_lockref.count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; dentry->d_parent = dentry; dentry->d_sb = sb; dentry->d_op = NULL; dentry->d_fsdata = NULL; INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_HLIST_NODE(&dentry->d_u.d_alias); INIT_LIST_HEAD(&dentry->d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); if (dentry->d_op && dentry->d_op->d_init) { err = dentry->d_op->d_init(dentry); if (err) { if (dname_external(dentry)) kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); return NULL; } } if (unlikely(ext)) { pg_data_t *pgdat = page_pgdat(virt_to_page(ext)); mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES, ksize(ext)); } this_cpu_inc(nr_dentry); return dentry; } /** * d_alloc - allocate a dcache entry * @parent: parent of entry to allocate * @name: qstr of the name * * Allocates a dentry. It returns %NULL if there is insufficient memory * available. On a success the dentry is returned. The name passed in is * copied and the copy passed in may be reused after this call. */ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) { struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject * to concurrency here */ __dget_dlock(parent); dentry->d_parent = parent; list_add(&dentry->d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); return dentry; } EXPORT_SYMBOL(d_alloc); struct dentry *d_alloc_anon(struct super_block *sb) { return __d_alloc(sb, NULL); } EXPORT_SYMBOL(d_alloc_anon); struct dentry *d_alloc_cursor(struct dentry * parent) { struct dentry *dentry = d_alloc_anon(parent->d_sb); if (dentry) { dentry->d_flags |= DCACHE_DENTRY_CURSOR; dentry->d_parent = dget(parent); } return dentry; } /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock * @name: qstr of the name * * For a filesystem that just pins its dentries in memory and never * performs lookups at all, return an unhashed IS_ROOT dentry. * This is used for pipes, sockets et.al. - the stuff that should * never be anyone's children or parents. Unlike all other * dentries, these will not have RCU delay between dropping the * last reference and freeing them. */ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { struct dentry *dentry = __d_alloc(sb, name); if (likely(dentry)) dentry->d_flags |= DCACHE_NORCU; return dentry; } EXPORT_SYMBOL(d_alloc_pseudo); struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; q.name = name; q.hash_len = hashlen_string(parent, name); return d_alloc(parent, &q); } EXPORT_SYMBOL(d_alloc_name); void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) { WARN_ON_ONCE(dentry->d_op); WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | DCACHE_OP_REAL)); dentry->d_op = op; if (!op) return; if (op->d_hash) dentry->d_flags |= DCACHE_OP_HASH; if (op->d_compare) dentry->d_flags |= DCACHE_OP_COMPARE; if (op->d_revalidate) dentry->d_flags |= DCACHE_OP_REVALIDATE; if (op->d_weak_revalidate) dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE; if (op->d_delete) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) dentry->d_flags |= DCACHE_OP_PRUNE; if (op->d_real) dentry->d_flags |= DCACHE_OP_REAL; } EXPORT_SYMBOL(d_set_d_op); /* * d_set_fallthru - Mark a dentry as falling through to a lower layer * @dentry - The dentry to mark * * Mark a dentry as falling through to the lower layer (as set with * d_pin_lower()). This flag may be recorded on the medium. */ void d_set_fallthru(struct dentry *dentry) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_FALLTHRU; spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(d_set_fallthru); static unsigned d_flags_for_inode(struct inode *inode) { unsigned add_flags = DCACHE_REGULAR_TYPE; if (!inode) return DCACHE_MISS_TYPE; if (S_ISDIR(inode->i_mode)) { add_flags = DCACHE_DIRECTORY_TYPE; if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { if (unlikely(!inode->i_op->lookup)) add_flags = DCACHE_AUTODIR_TYPE; else inode->i_opflags |= IOP_LOOKUP; } goto type_determined; } if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { if (unlikely(inode->i_op->get_link)) { add_flags = DCACHE_SYMLINK_TYPE; goto type_determined; } inode->i_opflags |= IOP_NOFOLLOW; } if (unlikely(!S_ISREG(inode->i_mode))) add_flags = DCACHE_SPECIAL_TYPE; type_determined: if (unlikely(IS_AUTOMOUNT(inode))) add_flags |= DCACHE_NEED_AUTOMOUNT; return add_flags; } static void __d_instantiate(struct dentry *dentry, struct inode *inode) { unsigned add_flags = d_flags_for_inode(inode); WARN_ON(d_in_lookup(dentry)); spin_lock(&dentry->d_lock); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); fsnotify_update_flags(dentry); spin_unlock(&dentry->d_lock); } /** * d_instantiate - fill in inode information for a dentry * @entry: dentry to complete * @inode: inode to attach to this dentry * * Fill in inode information in the entry. * * This turns negative dentries into productive full members * of society. * * NOTE! This assumes that the inode count has been incremented * (or otherwise set) by the caller to indicate that it is now * in use by the dcache. */ void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); __d_instantiate(entry, inode); spin_unlock(&inode->i_lock); } } EXPORT_SYMBOL(d_instantiate); /* * This should be equivalent to d_instantiate() + unlock_new_inode(), * with lockdep-related part of unlock_new_inode() done before * anything else. Use that instead of open-coding d_instantiate()/ * unlock_new_inode() combinations. */ void d_instantiate_new(struct dentry *entry, struct inode *inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); BUG_ON(!inode); lockdep_annotate_inode_mutex_key(inode); security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); __d_instantiate(entry, inode); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_instantiate_new); struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; if (root_inode) { res = d_alloc_anon(root_inode->i_sb); if (res) d_instantiate(res, root_inode); else iput(root_inode); } return res; } EXPORT_SYMBOL(d_make_root); static struct dentry *__d_instantiate_anon(struct dentry *dentry, struct inode *inode, bool disconnected) { struct dentry *res; unsigned add_flags; security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); res = __d_find_any_alias(inode); if (res) { spin_unlock(&inode->i_lock); dput(dentry); goto out_iput; } /* attach a disconnected dentry */ add_flags = d_flags_for_inode(inode); if (disconnected) add_flags |= DCACHE_DISCONNECTED; spin_lock(&dentry->d_lock); __d_set_inode_and_type(dentry, inode, add_flags); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); if (!disconnected) { hlist_bl_lock(&dentry->d_sb->s_roots); hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots); hlist_bl_unlock(&dentry->d_sb->s_roots); } spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); return dentry; out_iput: iput(inode); return res; } struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode) { return __d_instantiate_anon(dentry, inode, true); } EXPORT_SYMBOL(d_instantiate_anon); static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) { struct dentry *tmp; struct dentry *res; if (!inode) return ERR_PTR(-ESTALE); if (IS_ERR(inode)) return ERR_CAST(inode); res = d_find_any_alias(inode); if (res) goto out_iput; tmp = d_alloc_anon(inode->i_sb); if (!tmp) { res = ERR_PTR(-ENOMEM); goto out_iput; } return __d_instantiate_anon(tmp, inode, disconnected); out_iput: iput(inode); return res; } /** * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode * @inode: inode to allocate the dentry for * * Obtain a dentry for an inode resulting from NFS filehandle conversion or * similar open by handle operations. The returned dentry may be anonymous, * or may have a full name (if the inode was already in the cache). * * When called on a directory inode, we must ensure that the inode only ever * has one dentry. If a dentry is found, that is returned instead of * allocating a new one. * * On successful return, the reference to the inode has been transferred * to the dentry. In case of an error the reference on the inode is released. * To make it easier to use in export operations a %NULL or IS_ERR inode may * be passed in and the error will be propagated to the return value, * with a %NULL @inode replaced by ERR_PTR(-ESTALE). */ struct dentry *d_obtain_alias(struct inode *inode) { return __d_obtain_alias(inode, true); } EXPORT_SYMBOL(d_obtain_alias); /** * d_obtain_root - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for * * Obtain an IS_ROOT dentry for the root of a filesystem. * * We must ensure that directory inodes only ever have one dentry. If a * dentry is found, that is returned instead of allocating a new one. * * On successful return, the reference to the inode has been transferred * to the dentry. In case of an error the reference on the inode is * released. A %NULL or IS_ERR inode may be passed in and will be the * error will be propagate to the return value, with a %NULL @inode * replaced by ERR_PTR(-ESTALE). */ struct dentry *d_obtain_root(struct inode *inode) { return __d_obtain_alias(inode, false); } EXPORT_SYMBOL(d_obtain_root); /** * d_add_ci - lookup or allocate new dentry with case-exact name * @inode: the inode case-insensitive lookup has found * @dentry: the negative dentry that was passed to the parent's lookup func * @name: the case-exact name to be associated with the returned dentry * * This is to avoid filling the dcache with case-insensitive names to the * same inode, only the actual correct case is stored in the dcache for * case-insensitive filesystems. * * For a case-insensitive lookup match and if the the case-exact dentry * already exists in in the dcache, use it and return it. * * If no entry exists with the exact case name, allocate new dentry with * the exact case, and return the spliced entry. */ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, struct qstr *name) { struct dentry *found, *res; /* * First check if a dentry matching the name already exists, * if not go ahead and create it now. */ found = d_hash_and_lookup(dentry->d_parent, name); if (found) { iput(inode); return found; } if (d_in_lookup(dentry)) { found = d_alloc_parallel(dentry->d_parent, name, dentry->d_wait); if (IS_ERR(found) || !d_in_lookup(found)) { iput(inode); return found; } } else { found = d_alloc(dentry->d_parent, name); if (!found) { iput(inode); return ERR_PTR(-ENOMEM); } } res = d_splice_alias(inode, found); if (res) { dput(found); return res; } return found; } EXPORT_SYMBOL(d_add_ci); static inline bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name) { if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) { if (dentry->d_name.len != name->len) return false; return dentry_cmp(dentry, name->name, name->len) == 0; } return parent->d_op->d_compare(dentry, dentry->d_name.len, dentry->d_name.name, name) == 0; } /** * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry * @name: qstr of name we wish to find * @seqp: returns d_seq value at the point where the dentry was found * Returns: dentry, or NULL * * __d_lookup_rcu is the dcache lookup function for rcu-walk name * resolution (store-free path walking) design described in * Documentation/filesystems/path-lookup.txt. * * This is not to be used outside core vfs. * * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock * held, and rcu_read_lock held. The returned dentry must not be stored into * without taking d_lock and checking d_seq sequence count against @seq * returned here. * * A refcount may be taken on the found dentry with the d_rcu_to_refcount * function. * * Alternatively, __d_lookup_rcu may be called again to look up the child of * the returned dentry, so long as its parent's seqlock is checked after the * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. * * NOTE! The caller *has* to check the resulting dentry against the sequence * number we've returned before using any of the resulting dentry state! */ struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seqp) { u64 hashlen = name->hash_len; const unsigned char *str = name->name; struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); struct hlist_bl_node *node; struct dentry *dentry; /* * Note: There is significant duplication with __d_lookup_rcu which is * required to prevent single threaded performance regressions * especially on architectures where smp_rmb (in seqcounts) are costly. * Keep the two functions in sync. */ /* * The hash list is protected using RCU. * * Carefully use d_seq when comparing a candidate dentry, to avoid * races with d_move(). * * It is possible that concurrent renames can mess up our list * walk here and result in missing our dentry, resulting in the * false-negative result. d_lookup() protects against concurrent * renames using rename_lock seqlock. * * See Documentation/filesystems/path-lookup.txt for more details. */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { unsigned seq; seqretry: /* * The dentry sequence count protects us from concurrent * renames, and thus protects parent and name fields. * * The caller must perform a seqcount check in order * to do anything useful with the returned dentry. * * NOTE! We do a "raw" seqcount_begin here. That means that * we don't wait for the sequence count to stabilize if it * is in the middle of a sequence change. If we do the slow * dentry compare, we will do seqretries until it is stable, * and if we end up with a successful lookup, we actually * want to exit RCU lookup anyway. * * Note that raw_seqcount_begin still *does* smp_rmb(), so * we are still guaranteed NUL-termination of ->d_name.name. */ seq = raw_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) continue; if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { int tlen; const char *tname; if (dentry->d_name.hash != hashlen_hash(hashlen)) continue; tlen = dentry->d_name.len; tname = dentry->d_name.name; /* we want a consistent (name,len) pair */ if (read_seqcount_retry(&dentry->d_seq, seq)) { cpu_relax(); goto seqretry; } if (parent->d_op->d_compare(dentry, tlen, tname, name) != 0) continue; } else { if (dentry->d_name.hash_len != hashlen) continue; if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0) continue; } *seqp = seq; return dentry; } return NULL; } /** * d_lookup - search for a dentry * @parent: parent dentry * @name: qstr of name we wish to find * Returns: dentry, or NULL * * d_lookup searches the children of the parent dentry for the name in * question. If the dentry is found its reference count is incremented and the * dentry is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned if the dentry does not exist. */ struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) { struct dentry *dentry; unsigned seq; do { seq = read_seqbegin(&rename_lock); dentry = __d_lookup(parent, name); if (dentry) break; } while (read_seqretry(&rename_lock, seq)); return dentry; } EXPORT_SYMBOL(d_lookup); /** * __d_lookup - search for a dentry (racy) * @parent: parent dentry * @name: qstr of name we wish to find * Returns: dentry, or NULL * * __d_lookup is like d_lookup, however it may (rarely) return a * false-negative result due to unrelated rename activity. * * __d_lookup is slightly faster by avoiding rename_lock read seqlock, * however it must be used carefully, eg. with a following d_lookup in * the case of failure. * * __d_lookup callers must be commented. */ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) { unsigned int hash = name->hash; struct hlist_bl_head *b = d_hash(hash); struct hlist_bl_node *node; struct dentry *found = NULL; struct dentry *dentry; /* * Note: There is significant duplication with __d_lookup_rcu which is * required to prevent single threaded performance regressions * especially on architectures where smp_rmb (in seqcounts) are costly. * Keep the two functions in sync. */ /* * The hash list is protected using RCU. * * Take d_lock when comparing a candidate dentry, to avoid races * with d_move(). * * It is possible that concurrent renames can mess up our list * walk here and result in missing our dentry, resulting in the * false-negative result. d_lookup() protects against concurrent * renames using rename_lock seqlock. * * See Documentation/filesystems/path-lookup.txt for more details. */ rcu_read_lock(); hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { if (dentry->d_name.hash != hash) continue; spin_lock(&dentry->d_lock); if (dentry->d_parent != parent) goto next; if (d_unhashed(dentry)) goto next; if (!d_same_name(dentry, parent, name)) goto next; dentry->d_lockref.count++; found = dentry; spin_unlock(&dentry->d_lock); break; next: spin_unlock(&dentry->d_lock); } rcu_read_unlock(); return found; } /** * d_hash_and_lookup - hash the qstr then search for a dentry * @dir: Directory to search in * @name: qstr of name we wish to find * * On lookup failure NULL is returned; on bad name - ERR_PTR(-error) */ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) { /* * Check for a fs-specific hash function. Note that we must * calculate the standard hash first, as the d_op->d_hash() * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(dir, name->name, name->len); if (dir->d_flags & DCACHE_OP_HASH) { int err = dir->d_op->d_hash(dir, name); if (unlikely(err < 0)) return ERR_PTR(err); } return d_lookup(dir, name); } EXPORT_SYMBOL(d_hash_and_lookup); /* * When a file is deleted, we have two options: * - turn this dentry into a negative dentry * - unhash this dentry and free it. * * Usually, we want to just turn this into * a negative dentry, but if anybody else is * currently using the dentry or the inode * we can't do that and we fall back on removing * it from the hash queues and waiting for * it to be deleted later when it has no users */ /** * d_delete - delete a dentry * @dentry: The dentry to delete * * Turn the dentry into a negative dentry if possible, otherwise * remove it from the hash queues so it can be deleted later */ void d_delete(struct dentry * dentry) { struct inode *inode = dentry->d_inode; int isdir = d_is_dir(dentry); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); /* * Are we the only user? */ if (dentry->d_lockref.count == 1) { dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_unlink_inode(dentry); } else { __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); } fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); static void __d_rehash(struct dentry *entry) { struct hlist_bl_head *b = d_hash(entry->d_name.hash); hlist_bl_lock(b); hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } /** * d_rehash - add an entry back to the hash * @entry: dentry to add to the hash * * Adds a dentry to the hash according to its name. */ void d_rehash(struct dentry * entry) { spin_lock(&entry->d_lock); __d_rehash(entry); spin_unlock(&entry->d_lock); } EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { for (;;) { unsigned n = dir->i_dir_seq; if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) return n; cpu_relax(); } } static inline void end_dir_add(struct inode *dir, unsigned n) { smp_store_release(&dir->i_dir_seq, n + 2); } static void d_wait_lookup(struct dentry *dentry) { if (d_in_lookup(dentry)) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(dentry->d_wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(&dentry->d_lock); schedule(); spin_lock(&dentry->d_lock); } while (d_in_lookup(dentry)); } } struct dentry *d_alloc_parallel(struct dentry *parent, const struct qstr *name, wait_queue_head_t *wq) { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); struct hlist_bl_node *node; struct dentry *new = d_alloc(parent, name); struct dentry *dentry; unsigned seq, r_seq, d_seq; if (unlikely(!new)) return ERR_PTR(-ENOMEM); retry: rcu_read_lock(); seq = smp_load_acquire(&parent->d_inode->i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { if (!lockref_get_not_dead(&dentry->d_lockref)) { rcu_read_unlock(); goto retry; } if (read_seqcount_retry(&dentry->d_seq, d_seq)) { rcu_read_unlock(); dput(dentry); goto retry; } rcu_read_unlock(); dput(new); return dentry; } if (unlikely(read_seqretry(&rename_lock, r_seq))) { rcu_read_unlock(); goto retry; } if (unlikely(seq & 1)) { rcu_read_unlock(); goto retry; } hlist_bl_lock(b); if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; } /* * No changes for the parent since the beginning of d_lookup(). * Since all removals from the chain happen with hlist_bl_lock(), * any potential in-lookup matches are going to stay here until * we unlock the chain. All fields are stable in everything * we encounter. */ hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) { if (dentry->d_name.hash != hash) continue; if (dentry->d_parent != parent) continue; if (!d_same_name(dentry, parent, name)) continue; hlist_bl_unlock(b); /* now we can try to grab a reference */ if (!lockref_get_not_dead(&dentry->d_lockref)) { rcu_read_unlock(); goto retry; } rcu_read_unlock(); /* * somebody is likely to be still doing lookup for it; * wait for them to finish */ spin_lock(&dentry->d_lock); d_wait_lookup(dentry); /* * it's not in-lookup anymore; in principle we should repeat * everything from dcache lookup, but it's likely to be what * d_lookup() would've found anyway. If it is, just return it; * otherwise we really have to repeat the whole thing. */ if (unlikely(dentry->d_name.hash != hash)) goto mismatch; if (unlikely(dentry->d_parent != parent)) goto mismatch; if (unlikely(d_unhashed(dentry))) goto mismatch; if (unlikely(!d_same_name(dentry, parent, name))) goto mismatch; /* OK, it *is* a hashed match; return it */ spin_unlock(&dentry->d_lock); dput(new); return dentry; } rcu_read_unlock(); /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; new->d_wait = wq; hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b); hlist_bl_unlock(b); return new; mismatch: spin_unlock(&dentry->d_lock); dput(dentry); goto retry; } EXPORT_SYMBOL(d_alloc_parallel); void __d_lookup_done(struct dentry *dentry) { struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); wake_up_all(dentry->d_wait); dentry->d_wait = NULL; hlist_bl_unlock(b); INIT_HLIST_NODE(&dentry->d_u.d_alias); INIT_LIST_HEAD(&dentry->d_lru); } EXPORT_SYMBOL(__d_lookup_done); /* inode->i_lock held if inode is non-NULL */ static inline void __d_add(struct dentry *dentry, struct inode *inode) { struct inode *dir = NULL; unsigned n; spin_lock(&dentry->d_lock); if (unlikely(d_in_lookup(dentry))) { dir = dentry->d_parent->d_inode; n = start_dir_add(dir); __d_lookup_done(dentry); } if (inode) { unsigned add_flags = d_flags_for_inode(inode); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); fsnotify_update_flags(dentry); } __d_rehash(dentry); if (dir) end_dir_add(dir, n); spin_unlock(&dentry->d_lock); if (inode) spin_unlock(&inode->i_lock); } /** * d_add - add dentry to hash queues * @entry: dentry to add * @inode: The inode to attach to this dentry * * This adds the entry to the hash queues and initializes @inode. * The entry was actually filled in earlier during d_alloc(). */ void d_add(struct dentry *entry, struct inode *inode) { if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); } __d_add(entry, inode); } EXPORT_SYMBOL(d_add); /** * d_exact_alias - find and hash an exact unhashed alias * @entry: dentry to add * @inode: The inode to go with this dentry * * If an unhashed dentry with the same name/parent and desired * inode already exists, hash and return it. Otherwise, return * NULL. * * Parent directory should be locked. */ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) { struct dentry *alias; unsigned int hash = entry->d_name.hash; spin_lock(&inode->i_lock); hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or * parent changes, because the parent inode i_mutex is held. */ if (alias->d_name.hash != hash) continue; if (alias->d_parent != entry->d_parent) continue; if (!d_same_name(alias, entry->d_parent, &entry->d_name)) continue; spin_lock(&alias->d_lock); if (!d_unhashed(alias)) { spin_unlock(&alias->d_lock); alias = NULL; } else { __dget_dlock(alias); __d_rehash(alias); spin_unlock(&alias->d_lock); } spin_unlock(&inode->i_lock); return alias; } spin_unlock(&inode->i_lock); return NULL; } EXPORT_SYMBOL(d_exact_alias); static void swap_names(struct dentry *dentry, struct dentry *target) { if (unlikely(dname_external(target))) { if (unlikely(dname_external(dentry))) { /* * Both external: swap the pointers */ swap(target->d_name.name, dentry->d_name.name); } else { /* * dentry:internal, target:external. Steal target's * storage and make target internal. */ memcpy(target->d_iname, dentry->d_name.name, dentry->d_name.len + 1); dentry->d_name.name = target->d_name.name; target->d_name.name = target->d_iname; } } else { if (unlikely(dname_external(dentry))) { /* * dentry:external, target:internal. Give dentry's * storage to target and make dentry internal */ memcpy(dentry->d_iname, target->d_name.name, target->d_name.len + 1); target->d_name.name = dentry->d_name.name; dentry->d_name.name = dentry->d_iname; } else { /* * Both are internal. */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); } } } swap(dentry->d_name.hash_len, target->d_name.hash_len); } static void copy_name(struct dentry *dentry, struct dentry *target) { struct external_name *old_name = NULL; if (unlikely(dname_external(dentry))) old_name = external_name(dentry); if (unlikely(dname_external(target))) { atomic_inc(&external_name(target)->u.count); dentry->d_name = target->d_name; } else { memcpy(dentry->d_iname, target->d_name.name, target->d_name.len + 1); dentry->d_name.name = dentry->d_iname; dentry->d_name.hash_len = target->d_name.hash_len; } if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) call_rcu(&old_name->u.head, __d_free_external_name); } /* * When d_splice_alias() moves a directory's encrypted alias to its decrypted * alias as a result of the encryption key being added, DCACHE_ENCRYPTED_NAME * must be cleared. Note that we don't have to support arbitrary moves of this * flag because fscrypt doesn't allow encrypted aliases to be the source or * target of a rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { #if IS_ENABLED(CONFIG_FS_ENCRYPTION) dentry->d_flags &= ~DCACHE_ENCRYPTED_NAME; #endif } /* * __d_move - move a dentry * @dentry: entry to move * @target: new dentry * @exchange: exchange the two dentries * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. Caller must hold * rename_lock, the i_mutex of the source and target directories, * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). */ static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { struct dentry *old_parent, *p; struct inode *dir = NULL; unsigned n; WARN_ON(!dentry->d_inode); if (WARN_ON(dentry == target)) return; BUG_ON(d_ancestor(target, dentry)); old_parent = dentry->d_parent; p = d_ancestor(old_parent, target); if (IS_ROOT(dentry)) { BUG_ON(p); spin_lock(&target->d_parent->d_lock); } else if (!p) { /* target is not a descendent of dentry->d_parent */ spin_lock(&target->d_parent->d_lock); spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED); } else { BUG_ON(p == dentry); spin_lock(&old_parent->d_lock); if (p != target) spin_lock_nested(&target->d_parent->d_lock, DENTRY_D_LOCK_NESTED); } spin_lock_nested(&dentry->d_lock, 2); spin_lock_nested(&target->d_lock, 3); if (unlikely(d_in_lookup(target))) { dir = target->d_parent->d_inode; n = start_dir_add(dir); __d_lookup_done(target); } write_seqcount_begin(&dentry->d_seq); write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* unhash both */ if (!d_unhashed(dentry)) ___d_drop(dentry); if (!d_unhashed(target)) ___d_drop(target); /* ... and switch them in the tree */ dentry->d_parent = target->d_parent; if (!exchange) { copy_name(dentry, target); target->d_hash.pprev = NULL; dentry->d_parent->d_lockref.count++; if (dentry != old_parent) /* wasn't IS_ROOT */ WARN_ON(!--old_parent->d_lockref.count); } else { target->d_parent = old_parent; swap_names(dentry, target); list_move(&target->d_child, &target->d_parent->d_subdirs); __d_rehash(target); fsnotify_update_flags(target); } list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); __d_rehash(dentry); fsnotify_update_flags(dentry); fscrypt_handle_d_move(dentry); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); if (dir) end_dir_add(dir, n); if (dentry->d_parent != old_parent) spin_unlock(&dentry->d_parent->d_lock); if (dentry != old_parent) spin_unlock(&old_parent->d_lock); spin_unlock(&target->d_lock); spin_unlock(&dentry->d_lock); } /* * d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. See the locking * requirements for __d_move. */ void d_move(struct dentry *dentry, struct dentry *target) { write_seqlock(&rename_lock); __d_move(dentry, target, false); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); /* * d_exchange - exchange two dentries * @dentry1: first dentry * @dentry2: second dentry */ void d_exchange(struct dentry *dentry1, struct dentry *dentry2) { write_seqlock(&rename_lock); WARN_ON(!dentry1->d_inode); WARN_ON(!dentry2->d_inode); WARN_ON(IS_ROOT(dentry1)); WARN_ON(IS_ROOT(dentry2)); __d_move(dentry1, dentry2, true); write_sequnlock(&rename_lock); } /** * d_ancestor - search for an ancestor * @p1: ancestor dentry * @p2: child dentry * * Returns the ancestor dentry of p2 which is a child of p1, if p1 is * an ancestor of p2, else NULL. */ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) { struct dentry *p; for (p = p2; !IS_ROOT(p); p = p->d_parent) { if (p->d_parent == p1) return p; } return NULL; } /* * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding * dentry->d_parent->d_inode->i_mutex, and rename_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ static int __d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL; struct rw_semaphore *m2 = NULL; int ret = -ESTALE; /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) goto out_unalias; /* See lock_rename() */ if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; if (!inode_trylock_shared(alias->d_parent->d_inode)) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: __d_move(alias, dentry, false); ret = 0; out_err: if (m2) up_read(m2); if (m1) mutex_unlock(m1); return ret; } /** * d_splice_alias - splice a disconnected dentry into the tree if one exists * @inode: the inode which may have a disconnected dentry * @dentry: a negative dentry which we want to point to the inode. * * If inode is a directory and has an IS_ROOT alias, then d_move that in * place of the given dentry and return it, else simply d_add the inode * to the dentry and return NULL. * * If a non-IS_ROOT directory is found, the filesystem is corrupt, and * we should error out: directories can't have multiple aliases. * * This is needed in the lookup routine of any filesystem that is exportable * (via knfsd) so that we can build dcache paths to directories effectively. * * If a dentry was found and moved, then it is returned. Otherwise NULL * is returned. This matches the expected return value of ->lookup. * * Cluster filesystems may call this function with a negative, hashed dentry. * In that case, we know that the inode will be a regular file, and also this * will only occur during atomic_open. So we need to check for the dentry * being already hashed only in the final case. */ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) { if (IS_ERR(inode)) return ERR_CAST(inode); BUG_ON(!d_unhashed(dentry)); if (!inode) goto out; security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode)) { struct dentry *new = __d_find_any_alias(inode); if (unlikely(new)) { /* The reference to new ensures it remains an alias */ spin_unlock(&inode->i_lock); write_seqlock(&rename_lock); if (unlikely(d_ancestor(new, dentry))) { write_sequnlock(&rename_lock); dput(new); new = ERR_PTR(-ELOOP); pr_warn_ratelimited( "VFS: Lookup of '%s' in %s %s" " would have caused loop\n", dentry->d_name.name, inode->i_sb->s_type->name, inode->i_sb->s_id); } else if (!IS_ROOT(new)) { struct dentry *old_parent = dget(new->d_parent); int err = __d_unalias(inode, dentry, new); write_sequnlock(&rename_lock); if (err) { dput(new); new = ERR_PTR(err); } dput(old_parent); } else { __d_move(new, dentry, false); write_sequnlock(&rename_lock); } iput(inode); return new; } } out: __d_add(dentry, inode); return NULL; } EXPORT_SYMBOL(d_splice_alias); /* * Test whether new_dentry is a subdirectory of old_dentry. * * Trivially implemented using the dcache structure */ /** * is_subdir - is new dentry a subdirectory of old_dentry * @new_dentry: new dentry * @old_dentry: old dentry * * Returns true if new_dentry is a subdirectory of the parent (at any depth). * Returns false otherwise. * Caller must ensure that "new_dentry" is pinned before calling is_subdir() */ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { bool result; unsigned seq; if (new_dentry == old_dentry) return true; do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); /* * Need rcu_readlock to protect against the d_parent trashing * due to d_move */ rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = true; else result = false; rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); return result; } EXPORT_SYMBOL(is_subdir); static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) { struct dentry *root = data; if (dentry != root) { if (d_unhashed(dentry) || !dentry->d_inode) return D_WALK_SKIP; if (!(dentry->d_flags & DCACHE_GENOCIDE)) { dentry->d_flags |= DCACHE_GENOCIDE; dentry->d_lockref.count--; } } return D_WALK_CONTINUE; } void d_genocide(struct dentry *parent) { d_walk(parent, parent, d_genocide_kill); } EXPORT_SYMBOL(d_genocide); void d_tmpfile(struct dentry *dentry, struct inode *inode) { inode_dec_link_count(inode); BUG_ON(dentry->d_name.name != dentry->d_iname || !hlist_unhashed(&dentry->d_u.d_alias) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", (unsigned long long)inode->i_ino); spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_parent->d_lock); d_instantiate(dentry, inode); } EXPORT_SYMBOL(d_tmpfile); static __initdata unsigned long dhash_entries; static int __init set_dhash_entries(char *str) { if (!str) return 0; dhash_entries = simple_strtoul(str, &str, 0); return 1; } __setup("dhash_entries=", set_dhash_entries); static void __init dcache_init_early(void) { /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ if (hashdist) return; dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_bl_head), dhash_entries, 13, HASH_EARLY | HASH_ZERO, &d_hash_shift, NULL, 0, 0); d_hash_shift = 32 - d_hash_shift; } static void __init dcache_init(void) { /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature * of the dcache. */ dentry_cache = KMEM_CACHE_USERCOPY(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT, d_iname); /* Hash may have been set up in dcache_init_early */ if (!hashdist) return; dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_bl_head), dhash_entries, 13, HASH_ZERO, &d_hash_shift, NULL, 0, 0); d_hash_shift = 32 - d_hash_shift; } /* SLAB cache for __getname() consumers */ struct kmem_cache *names_cachep __read_mostly; EXPORT_SYMBOL(names_cachep); void __init vfs_caches_init_early(void) { int i; for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); dcache_init_early(); inode_init_early(); } void __init vfs_caches_init(void) { names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL); dcache_init(); inode_init(); files_init(); files_maxfiles_init(); mnt_init(); bdev_cache_init(); chrdev_init(); }
44 44 52 52 52 28 28 651 32 32 32 652 28 28 28 28 28 28 28 28 52 52 12 52 52 28 32 32 28 28 28 28 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 /* * Devices PM QoS constraints management * * Copyright (C) 2011 Texas Instruments, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * * This module exposes the interface to kernel space for specifying * per-device PM QoS dependencies. It provides infrastructure for registration * of: * * Dependents on a QoS value : register requests * Watchers of QoS value : get notified when target QoS value changes * * This QoS design is best effort based. Dependents register their QoS needs. * Watchers register to keep track of the current QoS needs of the system. * Watchers can register a per-device notification callback using the * dev_pm_qos_*_notifier API. The notification chain data is stored in the * per-device constraint data struct. * * Note about the per-device constraint data struct allocation: * . The per-device constraints data struct ptr is tored into the device * dev_pm_info. * . To minimize the data usage by the per-device constraints, the data struct * is only allocated at the first call to dev_pm_qos_add_request. * . The data is later free'd when the device is removed from the system. * . A global mutex protects the constraints users from the data being * allocated and free'd. */ #include <linux/pm_qos.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/device.h> #include <linux/mutex.h> #include <linux/export.h> #include <linux/pm_runtime.h> #include <linux/err.h> #include <trace/events/power.h> #include "power.h" static DEFINE_MUTEX(dev_pm_qos_mtx); static DEFINE_MUTEX(dev_pm_qos_sysfs_mtx); /** * __dev_pm_qos_flags - Check PM QoS flags for a given device. * @dev: Device to check the PM QoS flags for. * @mask: Flags to check against. * * This routine must be called with dev->power.lock held. */ enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) { struct dev_pm_qos *qos = dev->power.qos; struct pm_qos_flags *pqf; s32 val; lockdep_assert_held(&dev->power.lock); if (IS_ERR_OR_NULL(qos)) return PM_QOS_FLAGS_UNDEFINED; pqf = &qos->flags; if (list_empty(&pqf->list)) return PM_QOS_FLAGS_UNDEFINED; val = pqf->effective_flags & mask; if (val) return (val == mask) ? PM_QOS_FLAGS_ALL : PM_QOS_FLAGS_SOME; return PM_QOS_FLAGS_NONE; } /** * dev_pm_qos_flags - Check PM QoS flags for a given device (locked). * @dev: Device to check the PM QoS flags for. * @mask: Flags to check against. */ enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask) { unsigned long irqflags; enum pm_qos_flags_status ret; spin_lock_irqsave(&dev->power.lock, irqflags); ret = __dev_pm_qos_flags(dev, mask); spin_unlock_irqrestore(&dev->power.lock, irqflags); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_flags); /** * __dev_pm_qos_read_value - Get PM QoS constraint for a given device. * @dev: Device to get the PM QoS constraint value for. * * This routine must be called with dev->power.lock held. */ s32 __dev_pm_qos_read_value(struct device *dev) { lockdep_assert_held(&dev->power.lock); return dev_pm_qos_raw_read_value(dev); } /** * dev_pm_qos_read_value - Get PM QoS constraint for a given device (locked). * @dev: Device to get the PM QoS constraint value for. */ s32 dev_pm_qos_read_value(struct device *dev) { unsigned long flags; s32 ret; spin_lock_irqsave(&dev->power.lock, flags); ret = __dev_pm_qos_read_value(dev); spin_unlock_irqrestore(&dev->power.lock, flags); return ret; } /** * apply_constraint - Add/modify/remove device PM QoS request. * @req: Constraint request to apply * @action: Action to perform (add/update/remove). * @value: Value to assign to the QoS request. * * Internal function to update the constraints list using the PM QoS core * code and if needed call the per-device callbacks. */ static int apply_constraint(struct dev_pm_qos_request *req, enum pm_qos_req_action action, s32 value) { struct dev_pm_qos *qos = req->dev->power.qos; int ret; switch(req->type) { case DEV_PM_QOS_RESUME_LATENCY: if (WARN_ON(action != PM_QOS_REMOVE_REQ && value < 0)) value = 0; ret = pm_qos_update_target(&qos->resume_latency, &req->data.pnode, action, value); break; case DEV_PM_QOS_LATENCY_TOLERANCE: ret = pm_qos_update_target(&qos->latency_tolerance, &req->data.pnode, action, value); if (ret) { value = pm_qos_read_value(&qos->latency_tolerance); req->dev->power.set_latency_tolerance(req->dev, value); } break; case DEV_PM_QOS_FLAGS: ret = pm_qos_update_flags(&qos->flags, &req->data.flr, action, value); break; default: ret = -EINVAL; } return ret; } /* * dev_pm_qos_constraints_allocate * @dev: device to allocate data for * * Called at the first call to add_request, for constraint data allocation * Must be called with the dev_pm_qos_mtx mutex held */ static int dev_pm_qos_constraints_allocate(struct device *dev) { struct dev_pm_qos *qos; struct pm_qos_constraints *c; struct blocking_notifier_head *n; qos = kzalloc(sizeof(*qos), GFP_KERNEL); if (!qos) return -ENOMEM; n = kzalloc(sizeof(*n), GFP_KERNEL); if (!n) { kfree(qos); return -ENOMEM; } BLOCKING_INIT_NOTIFIER_HEAD(n); c = &qos->resume_latency; plist_head_init(&c->list); c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; c->no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; c->type = PM_QOS_MIN; c->notifiers = n; c = &qos->latency_tolerance; plist_head_init(&c->list); c->target_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE; c->default_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE; c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; c->type = PM_QOS_MIN; INIT_LIST_HEAD(&qos->flags.list); spin_lock_irq(&dev->power.lock); dev->power.qos = qos; spin_unlock_irq(&dev->power.lock); return 0; } static void __dev_pm_qos_hide_latency_limit(struct device *dev); static void __dev_pm_qos_hide_flags(struct device *dev); /** * dev_pm_qos_constraints_destroy * @dev: target device * * Called from the device PM subsystem on device removal under device_pm_lock(). */ void dev_pm_qos_constraints_destroy(struct device *dev) { struct dev_pm_qos *qos; struct dev_pm_qos_request *req, *tmp; struct pm_qos_constraints *c; struct pm_qos_flags *f; mutex_lock(&dev_pm_qos_sysfs_mtx); /* * If the device's PM QoS resume latency limit or PM QoS flags have been * exposed to user space, they have to be hidden at this point. */ pm_qos_sysfs_remove_resume_latency(dev); pm_qos_sysfs_remove_flags(dev); mutex_lock(&dev_pm_qos_mtx); __dev_pm_qos_hide_latency_limit(dev); __dev_pm_qos_hide_flags(dev); qos = dev->power.qos; if (!qos) goto out; /* Flush the constraints lists for the device. */ c = &qos->resume_latency; plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { /* * Update constraints list and call the notification * callbacks if needed */ apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } c = &qos->latency_tolerance; plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } f = &qos->flags; list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) { apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } spin_lock_irq(&dev->power.lock); dev->power.qos = ERR_PTR(-ENODEV); spin_unlock_irq(&dev->power.lock); kfree(qos->resume_latency.notifiers); kfree(qos); out: mutex_unlock(&dev_pm_qos_mtx); mutex_unlock(&dev_pm_qos_sysfs_mtx); } static bool dev_pm_qos_invalid_req_type(struct device *dev, enum dev_pm_qos_req_type type) { return type == DEV_PM_QOS_LATENCY_TOLERANCE && !dev->power.set_latency_tolerance; } static int __dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value) { int ret = 0; if (!dev || !req || dev_pm_qos_invalid_req_type(dev, type)) return -EINVAL; if (WARN(dev_pm_qos_request_active(req), "%s() called for already added request\n", __func__)) return -EINVAL; if (IS_ERR(dev->power.qos)) ret = -ENODEV; else if (!dev->power.qos) ret = dev_pm_qos_constraints_allocate(dev); trace_dev_pm_qos_add_request(dev_name(dev), type, value); if (!ret) { req->dev = dev; req->type = type; ret = apply_constraint(req, PM_QOS_ADD_REQ, value); } return ret; } /** * dev_pm_qos_add_request - inserts new qos request into the list * @dev: target device for the constraint * @req: pointer to a preallocated handle * @type: type of the request * @value: defines the qos request * * This function inserts a new entry in the device constraints list of * requested qos performance characteristics. It recomputes the aggregate * QoS expectations of parameters and initializes the dev_pm_qos_request * handle. Caller needs to save this handle for later use in updates and * removal. * * Returns 1 if the aggregated constraint value has changed, * 0 if the aggregated constraint value has not changed, * -EINVAL in case of wrong parameters, -ENOMEM if there's not enough memory * to allocate for data structures, -ENODEV if the device has just been removed * from the system. * * Callers should ensure that the target device is not RPM_SUSPENDED before * using this function for requests of type DEV_PM_QOS_FLAGS. */ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value) { int ret; mutex_lock(&dev_pm_qos_mtx); ret = __dev_pm_qos_add_request(dev, req, type, value); mutex_unlock(&dev_pm_qos_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_add_request); /** * __dev_pm_qos_update_request - Modify an existing device PM QoS request. * @req : PM QoS request to modify. * @new_value: New value to request. */ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) { s32 curr_value; int ret = 0; if (!req) /*guard against callers passing in null */ return -EINVAL; if (WARN(!dev_pm_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; if (IS_ERR_OR_NULL(req->dev->power.qos)) return -ENODEV; switch(req->type) { case DEV_PM_QOS_RESUME_LATENCY: case DEV_PM_QOS_LATENCY_TOLERANCE: curr_value = req->data.pnode.prio; break; case DEV_PM_QOS_FLAGS: curr_value = req->data.flr.flags; break; default: return -EINVAL; } trace_dev_pm_qos_update_request(dev_name(req->dev), req->type, new_value); if (curr_value != new_value) ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value); return ret; } /** * dev_pm_qos_update_request - modifies an existing qos request * @req : handle to list element holding a dev_pm_qos request to use * @new_value: defines the qos request * * Updates an existing dev PM qos request along with updating the * target value. * * Attempts are made to make this code callable on hot code paths. * * Returns 1 if the aggregated constraint value has changed, * 0 if the aggregated constraint value has not changed, * -EINVAL in case of wrong parameters, -ENODEV if the device has been * removed from the system * * Callers should ensure that the target device is not RPM_SUSPENDED before * using this function for requests of type DEV_PM_QOS_FLAGS. */ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) { int ret; mutex_lock(&dev_pm_qos_mtx); ret = __dev_pm_qos_update_request(req, new_value); mutex_unlock(&dev_pm_qos_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req) { int ret; if (!req) /*guard against callers passing in null */ return -EINVAL; if (WARN(!dev_pm_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; if (IS_ERR_OR_NULL(req->dev->power.qos)) return -ENODEV; trace_dev_pm_qos_remove_request(dev_name(req->dev), req->type, PM_QOS_DEFAULT_VALUE); ret = apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); return ret; } /** * dev_pm_qos_remove_request - modifies an existing qos request * @req: handle to request list element * * Will remove pm qos request from the list of constraints and * recompute the current target value. Call this on slow code paths. * * Returns 1 if the aggregated constraint value has changed, * 0 if the aggregated constraint value has not changed, * -EINVAL in case of wrong parameters, -ENODEV if the device has been * removed from the system * * Callers should ensure that the target device is not RPM_SUSPENDED before * using this function for requests of type DEV_PM_QOS_FLAGS. */ int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) { int ret; mutex_lock(&dev_pm_qos_mtx); ret = __dev_pm_qos_remove_request(req); mutex_unlock(&dev_pm_qos_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request); /** * dev_pm_qos_add_notifier - sets notification entry for changes to target value * of per-device PM QoS constraints * * @dev: target device for the constraint * @notifier: notifier block managed by caller. * * Will register the notifier into a notification chain that gets called * upon changes to the target value for the device. * * If the device's constraints object doesn't exist when this routine is called, * it will be created (or error code will be returned if that fails). */ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) { int ret = 0; mutex_lock(&dev_pm_qos_mtx); if (IS_ERR(dev->power.qos)) ret = -ENODEV; else if (!dev->power.qos) ret = dev_pm_qos_constraints_allocate(dev); if (!ret) ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers, notifier); mutex_unlock(&dev_pm_qos_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier); /** * dev_pm_qos_remove_notifier - deletes notification for changes to target value * of per-device PM QoS constraints * * @dev: target device for the constraint * @notifier: notifier block to be removed. * * Will remove the notifier from the notification chain that gets called * upon changes to the target value. */ int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier) { int retval = 0; mutex_lock(&dev_pm_qos_mtx); /* Silently return if the constraints object is not present. */ if (!IS_ERR_OR_NULL(dev->power.qos)) retval = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers, notifier); mutex_unlock(&dev_pm_qos_mtx); return retval; } EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier); /** * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor. * @dev: Device whose ancestor to add the request for. * @req: Pointer to the preallocated handle. * @type: Type of the request. * @value: Constraint latency value. */ int dev_pm_qos_add_ancestor_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value) { struct device *ancestor = dev->parent; int ret = -ENODEV; switch (type) { case DEV_PM_QOS_RESUME_LATENCY: while (ancestor && !ancestor->power.ignore_children) ancestor = ancestor->parent; break; case DEV_PM_QOS_LATENCY_TOLERANCE: while (ancestor && !ancestor->power.set_latency_tolerance) ancestor = ancestor->parent; break; default: ancestor = NULL; } if (ancestor) ret = dev_pm_qos_add_request(ancestor, req, type, value); if (ret < 0) req->dev = NULL; return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request); static void __dev_pm_qos_drop_user_request(struct device *dev, enum dev_pm_qos_req_type type) { struct dev_pm_qos_request *req = NULL; switch(type) { case DEV_PM_QOS_RESUME_LATENCY: req = dev->power.qos->resume_latency_req; dev->power.qos->resume_latency_req = NULL; break; case DEV_PM_QOS_LATENCY_TOLERANCE: req = dev->power.qos->latency_tolerance_req; dev->power.qos->latency_tolerance_req = NULL; break; case DEV_PM_QOS_FLAGS: req = dev->power.qos->flags_req; dev->power.qos->flags_req = NULL; break; } __dev_pm_qos_remove_request(req); kfree(req); } static void dev_pm_qos_drop_user_request(struct device *dev, enum dev_pm_qos_req_type type) { mutex_lock(&dev_pm_qos_mtx); __dev_pm_qos_drop_user_request(dev, type); mutex_unlock(&dev_pm_qos_mtx); } /** * dev_pm_qos_expose_latency_limit - Expose PM QoS latency limit to user space. * @dev: Device whose PM QoS latency limit is to be exposed to user space. * @value: Initial value of the latency limit. */ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) { struct dev_pm_qos_request *req; int ret; if (!device_is_registered(dev) || value < 0) return -EINVAL; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_RESUME_LATENCY, value); if (ret < 0) { kfree(req); return ret; } mutex_lock(&dev_pm_qos_sysfs_mtx); mutex_lock(&dev_pm_qos_mtx); if (IS_ERR_OR_NULL(dev->power.qos)) ret = -ENODEV; else if (dev->power.qos->resume_latency_req) ret = -EEXIST; if (ret < 0) { __dev_pm_qos_remove_request(req); kfree(req); mutex_unlock(&dev_pm_qos_mtx); goto out; } dev->power.qos->resume_latency_req = req; mutex_unlock(&dev_pm_qos_mtx); ret = pm_qos_sysfs_add_resume_latency(dev); if (ret) dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY); out: mutex_unlock(&dev_pm_qos_sysfs_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit); static void __dev_pm_qos_hide_latency_limit(struct device *dev) { if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->resume_latency_req) __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY); } /** * dev_pm_qos_hide_latency_limit - Hide PM QoS latency limit from user space. * @dev: Device whose PM QoS latency limit is to be hidden from user space. */ void dev_pm_qos_hide_latency_limit(struct device *dev) { mutex_lock(&dev_pm_qos_sysfs_mtx); pm_qos_sysfs_remove_resume_latency(dev); mutex_lock(&dev_pm_qos_mtx); __dev_pm_qos_hide_latency_limit(dev); mutex_unlock(&dev_pm_qos_mtx); mutex_unlock(&dev_pm_qos_sysfs_mtx); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit); /** * dev_pm_qos_expose_flags - Expose PM QoS flags of a device to user space. * @dev: Device whose PM QoS flags are to be exposed to user space. * @val: Initial values of the flags. */ int dev_pm_qos_expose_flags(struct device *dev, s32 val) { struct dev_pm_qos_request *req; int ret; if (!device_is_registered(dev)) return -EINVAL; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val); if (ret < 0) { kfree(req); return ret; } pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_sysfs_mtx); mutex_lock(&dev_pm_qos_mtx); if (IS_ERR_OR_NULL(dev->power.qos)) ret = -ENODEV; else if (dev->power.qos->flags_req) ret = -EEXIST; if (ret < 0) { __dev_pm_qos_remove_request(req); kfree(req); mutex_unlock(&dev_pm_qos_mtx); goto out; } dev->power.qos->flags_req = req; mutex_unlock(&dev_pm_qos_mtx); ret = pm_qos_sysfs_add_flags(dev); if (ret) dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); out: mutex_unlock(&dev_pm_qos_sysfs_mtx); pm_runtime_put(dev); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags); static void __dev_pm_qos_hide_flags(struct device *dev) { if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->flags_req) __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); } /** * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space. * @dev: Device whose PM QoS flags are to be hidden from user space. */ void dev_pm_qos_hide_flags(struct device *dev) { pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_sysfs_mtx); pm_qos_sysfs_remove_flags(dev); mutex_lock(&dev_pm_qos_mtx); __dev_pm_qos_hide_flags(dev); mutex_unlock(&dev_pm_qos_mtx); mutex_unlock(&dev_pm_qos_sysfs_mtx); pm_runtime_put(dev); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags); /** * dev_pm_qos_update_flags - Update PM QoS flags request owned by user space. * @dev: Device to update the PM QoS flags request for. * @mask: Flags to set/clear. * @set: Whether to set or clear the flags (true means set). */ int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) { s32 value; int ret; pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_mtx); if (IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->flags_req) { ret = -EINVAL; goto out; } value = dev_pm_qos_requested_flags(dev); if (set) value |= mask; else value &= ~mask; ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value); out: mutex_unlock(&dev_pm_qos_mtx); pm_runtime_put(dev); return ret; } /** * dev_pm_qos_get_user_latency_tolerance - Get user space latency tolerance. * @dev: Device to obtain the user space latency tolerance for. */ s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev) { s32 ret; mutex_lock(&dev_pm_qos_mtx); ret = IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->latency_tolerance_req ? PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT : dev->power.qos->latency_tolerance_req->data.pnode.prio; mutex_unlock(&dev_pm_qos_mtx); return ret; } /** * dev_pm_qos_update_user_latency_tolerance - Update user space latency tolerance. * @dev: Device to update the user space latency tolerance for. * @val: New user space latency tolerance for @dev (negative values disable). */ int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val) { int ret; mutex_lock(&dev_pm_qos_mtx); if (IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->latency_tolerance_req) { struct dev_pm_qos_request *req; if (val < 0) { if (val == PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT) ret = 0; else ret = -EINVAL; goto out; } req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) { ret = -ENOMEM; goto out; } ret = __dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY_TOLERANCE, val); if (ret < 0) { kfree(req); goto out; } dev->power.qos->latency_tolerance_req = req; } else { if (val < 0) { __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY_TOLERANCE); ret = 0; } else { ret = __dev_pm_qos_update_request(dev->power.qos->latency_tolerance_req, val); } } out: mutex_unlock(&dev_pm_qos_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_update_user_latency_tolerance); /** * dev_pm_qos_expose_latency_tolerance - Expose latency tolerance to userspace * @dev: Device whose latency tolerance to expose */ int dev_pm_qos_expose_latency_tolerance(struct device *dev) { int ret; if (!dev->power.set_latency_tolerance) return -EINVAL; mutex_lock(&dev_pm_qos_sysfs_mtx); ret = pm_qos_sysfs_add_latency_tolerance(dev); mutex_unlock(&dev_pm_qos_sysfs_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_tolerance); /** * dev_pm_qos_hide_latency_tolerance - Hide latency tolerance from userspace * @dev: Device whose latency tolerance to hide */ void dev_pm_qos_hide_latency_tolerance(struct device *dev) { mutex_lock(&dev_pm_qos_sysfs_mtx); pm_qos_sysfs_remove_latency_tolerance(dev); mutex_unlock(&dev_pm_qos_sysfs_mtx); /* Remove the request from user space now */ pm_runtime_get_sync(dev); dev_pm_qos_update_user_latency_tolerance(dev, PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT); pm_runtime_put(dev); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_tolerance);
6 5 4 3 2 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 /* * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> #include <linux/netfilter/x_tables.h> static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; struct in6_addr pfx; __wsum src_sum, dst_sum; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; /* Ensure that LSB of prefix is zero */ ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) return -EINVAL; ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) return -EINVAL; src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, struct in6_addr *addr) { unsigned int pfx_len; unsigned int i, idx; __be32 mask; __sum16 sum; pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); for (i = 0; i < pfx_len; i += 32) { if (pfx_len - i >= 32) mask = 0; else mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) idx = 3; else { for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { if ((__force __sum16)addr->s6_addr16[idx] != CSUM_MANGLED_0) break; } if (idx == ARRAY_SIZE(addr->s6_addr16)) return false; } sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; return true; } static unsigned int ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, saddr)); return NF_DROP; } return XT_CONTINUE; } static unsigned int ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, daddr)); return NF_DROP; } return XT_CONTINUE; } static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_POST_ROUTING), .me = THIS_MODULE, }, { .name = "DNPT", .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, }; static int __init ip6t_npt_init(void) { return xt_register_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } static void __exit ip6t_npt_exit(void) { xt_unregister_targets(ip6t_npt_target_reg, ARRAY_SIZE(ip6t_npt_target_reg)); } module_init(ip6t_npt_init); module_exit(ip6t_npt_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS("ip6t_SNPT"); MODULE_ALIAS("ip6t_DNPT");
9 9 8 1 9 9 9 11 11 11 7 7 11 11 9 10 11 11 10 8 10 10 11 9 11 11 11 10 10 10 10 9 8 9 8 8 7 10 11 11 11 2 11 5 5 5 5 5 4 3 2 5 5 2 9 10 9 3 2 1 3 17 16 15 15 16 15 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 /* SCTP kernel implementation * (C) Copyright Red Hat Inc. 2017 * * This file is part of the SCTP kernel implementation * * These functions implement sctp diag support. * * This SCTP implementation is free software; * you can redistribute it and/or modify it under the terms of * the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This SCTP implementation is distributed in the hope that it * will be useful, but WITHOUT ANY WARRANTY; without even the implied * ************************ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU CC; see the file COPYING. If not, see * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email addresched(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Xin Long <lucien.xin@gmail.com> */ #include <linux/module.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> #include <net/sctp/sctp.h> static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info); /* define some functions to make asoc/ep fill look clean */ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, struct sock *sk, struct sctp_association *asoc) { union sctp_addr laddr, paddr; struct dst_entry *dst; struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer; laddr = list_entry(asoc->base.bind_addr.address_list.next, struct sctp_sockaddr_entry, list)->a; paddr = asoc->peer.primary_path->ipaddr; dst = asoc->peer.primary_path->dst; r->idiag_family = sk->sk_family; r->id.idiag_sport = htons(asoc->base.bind_addr.port); r->id.idiag_dport = htons(asoc->peer.port); r->id.idiag_if = dst ? dst->dev->ifindex : 0; sock_diag_save_cookie(sk, r->id.idiag_cookie); #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr; *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr; } else #endif { memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr; r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr; } r->idiag_state = asoc->state; if (timer_pending(t3_rtx)) { r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; r->idiag_retrans = asoc->rtx_data_chunks; r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies); } } static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, struct list_head *address_list) { struct sctp_sockaddr_entry *laddr; int addrlen = sizeof(struct sockaddr_storage); int addrcnt = 0; struct nlattr *attr; void *info = NULL; list_for_each_entry_rcu(laddr, address_list, list) addrcnt++; attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); if (!attr) return -EMSGSIZE; info = nla_data(attr); list_for_each_entry_rcu(laddr, address_list, list) { memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; } return 0; } static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb, struct sctp_association *asoc) { int addrlen = sizeof(struct sockaddr_storage); struct sctp_transport *from; struct nlattr *attr; void *info = NULL; attr = nla_reserve(skb, INET_DIAG_PEERS, addrlen * asoc->peer.transport_count); if (!attr) return -EMSGSIZE; info = nla_data(attr); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { memcpy(info, &from->ipaddr, sizeof(from->ipaddr)); memset(info + sizeof(from->ipaddr), 0, addrlen - sizeof(from->ipaddr)); info += addrlen; } return 0; } /* sctp asoc/ep fill*/ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, int portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh, bool net_admin) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct list_head *addr_list; struct inet_diag_msg *r; struct nlmsghdr *nlh; int ext = req->idiag_ext; struct sctp_infox infox; void *info = NULL; nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); BUG_ON(!sk_fullsock(sk)); r->idiag_timer = 0; r->idiag_retrans = 0; r->idiag_expires = 0; if (asoc) { inet_diag_msg_sctpasoc_fill(r, sk, asoc); } else { inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; } if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) { u32 mem[SK_MEMINFO_VARS]; int amt; if (asoc && asoc->ep->sndbuf_policy) amt = asoc->sndbuf_used; else amt = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_WMEM_ALLOC] = amt; if (asoc && asoc->ep->rcvbuf_policy) amt = atomic_read(&asoc->rmem_alloc); else amt = sk_rmem_alloc_get(sk); mem[SK_MEMINFO_RMEM_ALLOC] = amt; mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0) goto errout; } if (ext & (1 << (INET_DIAG_INFO - 1))) { struct nlattr *attr; attr = nla_reserve_64bit(skb, INET_DIAG_INFO, sizeof(struct sctp_info), INET_DIAG_PAD); if (!attr) goto errout; info = nla_data(attr); } infox.sctpinfo = (struct sctp_info *)info; infox.asoc = asoc; sctp_diag_get_info(sk, r, &infox); addr_list = asoc ? &asoc->base.bind_addr.address_list : &ep->base.bind_addr.address_list; if (inet_diag_msg_sctpladdrs_fill(skb, addr_list)) goto errout; if (asoc && (ext & (1 << (INET_DIAG_CONG - 1)))) if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0) goto errout; if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc)) goto errout; nlmsg_end(skb, nlh); return 0; errout: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } /* callback and param */ struct sctp_comm_param { struct sk_buff *skb; struct netlink_callback *cb; const struct inet_diag_req_v2 *r; const struct nlmsghdr *nlh; bool net_admin; }; static size_t inet_assoc_attr_size(struct sctp_association *asoc) { int addrlen = sizeof(struct sockaddr_storage); int addrcnt = 0; struct sctp_sockaddr_entry *laddr; list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, list) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_msg)) + inet_diag_msg_attrs_size() + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64; } static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) { struct sctp_association *assoc = tsp->asoc; struct sock *sk = tsp->asoc->base.sk; struct sctp_comm_param *commp = p; struct sk_buff *in_skb = commp->skb; const struct inet_diag_req_v2 *req = commp->r; const struct nlmsghdr *nlh = commp->nlh; struct net *net = sock_net(in_skb->sk); struct sk_buff *rep; int err; err = sock_diag_check_cookie(sk, req->id.idiag_cookie); if (err) goto out; err = -ENOMEM; rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); if (!rep) goto out; lock_sock(sk); if (sk != assoc->base.sk) { release_sock(sk); sk = assoc->base.sk; lock_sock(sk); } err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh, commp->net_admin); release_sock(sk); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); goto out; } err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; out: return err; } static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; struct sctp_association *assoc; int err = 0; lock_sock(sk); if (ep != tsp->asoc->ep) goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) && r->id.idiag_sport) goto next; if (r->id.idiag_dport != htons(assoc->peer.port) && r->id.idiag_dport) goto next; if (!cb->args[3] && inet_sctp_diag_fill(sk, NULL, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, commp->net_admin) < 0) { err = 1; goto release; } cb->args[3] = 1; if (inet_sctp_diag_fill(sk, assoc, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, cb->nlh, commp->net_admin) < 0) { err = 1; goto release; } next: cb->args[4]++; } cb->args[1] = 0; cb->args[3] = 0; cb->args[4] = 0; release: release_sock(sk); return err; } static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; struct sctp_association *assoc = list_entry(ep->asocs.next, struct sctp_association, asocs); /* find the ep only once through the transports by this condition */ if (tsp->asoc != assoc) return 0; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) return 0; return 1; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) { struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; struct net *net = sock_net(skb->sk); struct inet_sock *inet = inet_sk(sk); int err = 0; if (!net_eq(sock_net(sk), net)) goto out; if (cb->args[4] < cb->args[1]) goto next; if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs)) goto next; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next; if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next; if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next; if (inet_sctp_diag_fill(sk, NULL, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, commp->net_admin) < 0) { err = 2; goto out; } next: cb->args[4]++; out: return err; } /* define the functions for sctp_diag_handler*/ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info) { struct sctp_infox *infox = (struct sctp_infox *)info; if (infox->asoc) { r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc); r->idiag_wqueue = infox->asoc->sndbuf_used; } else { r->idiag_rqueue = sk->sk_ack_backlog; r->idiag_wqueue = sk->sk_max_ack_backlog; } if (infox->sctpinfo) sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo); } static int sctp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); union sctp_addr laddr, paddr; struct sctp_comm_param commp = { .skb = in_skb, .r = req, .nlh = nlh, .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN), }; if (req->sdiag_family == AF_INET) { laddr.v4.sin_port = req->id.idiag_sport; laddr.v4.sin_addr.s_addr = req->id.idiag_src[0]; laddr.v4.sin_family = AF_INET; paddr.v4.sin_port = req->id.idiag_dport; paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0]; paddr.v4.sin_family = AF_INET; } else { laddr.v6.sin6_port = req->id.idiag_sport; memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, sizeof(laddr.v6.sin6_addr)); laddr.v6.sin6_family = AF_INET6; paddr.v6.sin6_port = req->id.idiag_dport; memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, sizeof(paddr.v6.sin6_addr)); paddr.v6.sin6_family = AF_INET6; } return sctp_transport_lookup_process(sctp_tsp_dump_one, net, &laddr, &paddr, &commp); } static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { u32 idiag_states = r->idiag_states; struct net *net = sock_net(skb->sk); struct sctp_comm_param commp = { .skb = skb, .cb = cb, .r = r, .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN), }; int pos = cb->args[2]; /* eps hashtable dumps * args: * 0 : if it will traversal listen sock * 1 : to record the sock pos of this time's traversal * 4 : to work as a temporary variable to traversal list */ if (cb->args[0] == 0) { if (!(idiag_states & TCPF_LISTEN)) goto skip; if (sctp_for_each_endpoint(sctp_ep_dump, &commp)) goto done; skip: cb->args[0] = 1; cb->args[1] = 0; cb->args[4] = 0; } /* asocs by transport hashtable dump * args: * 1 : to record the assoc pos of this time's traversal * 2 : to record the transport pos of this time's traversal * 3 : to mark if we have dumped the ep info of the current asoc * 4 : to work as a temporary variable to traversal list * 5 : to save the sk we get from travelsing the tsp list. */ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, net, &pos, &commp); cb->args[2] = pos; done: cb->args[1] = cb->args[4]; cb->args[4] = 0; } static const struct inet_diag_handler sctp_diag_handler = { .dump = sctp_diag_dump, .dump_one = sctp_diag_dump_one, .idiag_get_info = sctp_diag_get_info, .idiag_type = IPPROTO_SCTP, .idiag_info_size = sizeof(struct sctp_info), }; static int __init sctp_diag_init(void) { return inet_diag_register(&sctp_diag_handler); } static void __exit sctp_diag_exit(void) { inet_diag_unregister(&sctp_diag_handler); } module_init(sctp_diag_init); module_exit(sctp_diag_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
3 4 3 4 4 4 2 2 14 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 /* * CALIPSO - Common Architecture Label IPv6 Security Option * * This is an implementation of the CALIPSO protocol as specified in * RFC 5570. * * Authors: Paul Moore <paul.moore@hp.com> * Huw Davies <huw@codeweavers.com> * */ /* (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ #include <linux/init.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/jhash.h> #include <linux/audit.h> #include <linux/slab.h> #include <net/ip.h> #include <net/icmp.h> #include <net/tcp.h> #include <net/netlabel.h> #include <net/calipso.h> #include <linux/atomic.h> #include <linux/bug.h> #include <asm/unaligned.h> #include <linux/crc-ccitt.h> /* Maximium size of the calipso option including * the two-byte TLV header. */ #define CALIPSO_OPT_LEN_MAX (2 + 252) /* Size of the minimum calipso option including * the two-byte TLV header. */ #define CALIPSO_HDR_LEN (2 + 8) /* Maximium size of the calipso option including * the two-byte TLV header and upto 3 bytes of * leading pad and 7 bytes of trailing pad. */ #define CALIPSO_OPT_LEN_MAX_WITH_PAD (3 + CALIPSO_OPT_LEN_MAX + 7) /* Maximium size of u32 aligned buffer required to hold calipso * option. Max of 3 initial pad bytes starting from buffer + 3. * i.e. the worst case is when the previous tlv finishes on 4n + 3. */ #define CALIPSO_MAX_BUFFER (6 + CALIPSO_OPT_LEN_MAX) /* List of available DOI definitions */ static DEFINE_SPINLOCK(calipso_doi_list_lock); static LIST_HEAD(calipso_doi_list); /* Label mapping cache */ int calipso_cache_enabled = 1; int calipso_cache_bucketsize = 10; #define CALIPSO_CACHE_BUCKETBITS 7 #define CALIPSO_CACHE_BUCKETS BIT(CALIPSO_CACHE_BUCKETBITS) #define CALIPSO_CACHE_REORDERLIMIT 10 struct calipso_map_cache_bkt { spinlock_t lock; u32 size; struct list_head list; }; struct calipso_map_cache_entry { u32 hash; unsigned char *key; size_t key_len; struct netlbl_lsm_cache *lsm_data; u32 activity; struct list_head list; }; static struct calipso_map_cache_bkt *calipso_cache; static void calipso_cache_invalidate(void); static void calipso_doi_putdef(struct calipso_doi *doi_def); /* Label Mapping Cache Functions */ /** * calipso_cache_entry_free - Frees a cache entry * @entry: the entry to free * * Description: * This function frees the memory associated with a cache entry including the * LSM cache data if there are no longer any users, i.e. reference count == 0. * */ static void calipso_cache_entry_free(struct calipso_map_cache_entry *entry) { if (entry->lsm_data) netlbl_secattr_cache_free(entry->lsm_data); kfree(entry->key); kfree(entry); } /** * calipso_map_cache_hash - Hashing function for the CALIPSO cache * @key: the hash key * @key_len: the length of the key in bytes * * Description: * The CALIPSO tag hashing function. Returns a 32-bit hash value. * */ static u32 calipso_map_cache_hash(const unsigned char *key, u32 key_len) { return jhash(key, key_len, 0); } /** * calipso_cache_init - Initialize the CALIPSO cache * * Description: * Initializes the CALIPSO label mapping cache, this function should be called * before any of the other functions defined in this file. Returns zero on * success, negative values on error. * */ static int __init calipso_cache_init(void) { u32 iter; calipso_cache = kcalloc(CALIPSO_CACHE_BUCKETS, sizeof(struct calipso_map_cache_bkt), GFP_KERNEL); if (!calipso_cache) return -ENOMEM; for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) { spin_lock_init(&calipso_cache[iter].lock); calipso_cache[iter].size = 0; INIT_LIST_HEAD(&calipso_cache[iter].list); } return 0; } /** * calipso_cache_invalidate - Invalidates the current CALIPSO cache * * Description: * Invalidates and frees any entries in the CALIPSO cache. Returns zero on * success and negative values on failure. * */ static void calipso_cache_invalidate(void) { struct calipso_map_cache_entry *entry, *tmp_entry; u32 iter; for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) { spin_lock_bh(&calipso_cache[iter].lock); list_for_each_entry_safe(entry, tmp_entry, &calipso_cache[iter].list, list) { list_del(&entry->list); calipso_cache_entry_free(entry); } calipso_cache[iter].size = 0; spin_unlock_bh(&calipso_cache[iter].lock); } } /** * calipso_cache_check - Check the CALIPSO cache for a label mapping * @key: the buffer to check * @key_len: buffer length in bytes * @secattr: the security attribute struct to use * * Description: * This function checks the cache to see if a label mapping already exists for * the given key. If there is a match then the cache is adjusted and the * @secattr struct is populated with the correct LSM security attributes. The * cache is adjusted in the following manner if the entry is not already the * first in the cache bucket: * * 1. The cache entry's activity counter is incremented * 2. The previous (higher ranking) entry's activity counter is decremented * 3. If the difference between the two activity counters is geater than * CALIPSO_CACHE_REORDERLIMIT the two entries are swapped * * Returns zero on success, -ENOENT for a cache miss, and other negative values * on error. * */ static int calipso_cache_check(const unsigned char *key, u32 key_len, struct netlbl_lsm_secattr *secattr) { u32 bkt; struct calipso_map_cache_entry *entry; struct calipso_map_cache_entry *prev_entry = NULL; u32 hash; if (!calipso_cache_enabled) return -ENOENT; hash = calipso_map_cache_hash(key, key_len); bkt = hash & (CALIPSO_CACHE_BUCKETS - 1); spin_lock_bh(&calipso_cache[bkt].lock); list_for_each_entry(entry, &calipso_cache[bkt].list, list) { if (entry->hash == hash && entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; refcount_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CALIPSO; if (!prev_entry) { spin_unlock_bh(&calipso_cache[bkt].lock); return 0; } if (prev_entry->activity > 0) prev_entry->activity -= 1; if (entry->activity > prev_entry->activity && entry->activity - prev_entry->activity > CALIPSO_CACHE_REORDERLIMIT) { __list_del(entry->list.prev, entry->list.next); __list_add(&entry->list, prev_entry->list.prev, &prev_entry->list); } spin_unlock_bh(&calipso_cache[bkt].lock); return 0; } prev_entry = entry; } spin_unlock_bh(&calipso_cache[bkt].lock); return -ENOENT; } /** * calipso_cache_add - Add an entry to the CALIPSO cache * @calipso_ptr: the CALIPSO option * @secattr: the packet's security attributes * * Description: * Add a new entry into the CALIPSO label mapping cache. Add the new entry to * head of the cache bucket's list, if the cache bucket is out of room remove * the last entry in the list first. It is important to note that there is * currently no checking for duplicate keys. Returns zero on success, * negative values on failure. The key stored starts at calipso_ptr + 2, * i.e. the type and length bytes are not stored, this corresponds to * calipso_ptr[1] bytes of data. * */ static int calipso_cache_add(const unsigned char *calipso_ptr, const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; u32 bkt; struct calipso_map_cache_entry *entry = NULL; struct calipso_map_cache_entry *old_entry = NULL; u32 calipso_ptr_len; if (!calipso_cache_enabled || calipso_cache_bucketsize <= 0) return 0; calipso_ptr_len = calipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; entry->key = kmemdup(calipso_ptr + 2, calipso_ptr_len, GFP_ATOMIC); if (!entry->key) { ret_val = -ENOMEM; goto cache_add_failure; } entry->key_len = calipso_ptr_len; entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len); refcount_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1); spin_lock_bh(&calipso_cache[bkt].lock); if (calipso_cache[bkt].size < calipso_cache_bucketsize) { list_add(&entry->list, &calipso_cache[bkt].list); calipso_cache[bkt].size += 1; } else { old_entry = list_entry(calipso_cache[bkt].list.prev, struct calipso_map_cache_entry, list); list_del(&old_entry->list); list_add(&entry->list, &calipso_cache[bkt].list); calipso_cache_entry_free(old_entry); } spin_unlock_bh(&calipso_cache[bkt].lock); return 0; cache_add_failure: if (entry) calipso_cache_entry_free(entry); return ret_val; } /* DOI List Functions */ /** * calipso_doi_search - Searches for a DOI definition * @doi: the DOI to search for * * Description: * Search the DOI definition list for a DOI definition with a DOI value that * matches @doi. The caller is responsible for calling rcu_read_[un]lock(). * Returns a pointer to the DOI definition on success and NULL on failure. */ static struct calipso_doi *calipso_doi_search(u32 doi) { struct calipso_doi *iter; list_for_each_entry_rcu(iter, &calipso_doi_list, list) if (iter->doi == doi && refcount_read(&iter->refcount)) return iter; return NULL; } /** * calipso_doi_add - Add a new DOI to the CALIPSO protocol engine * @doi_def: the DOI structure * @audit_info: NetLabel audit information * * Description: * The caller defines a new DOI for use by the CALIPSO engine and calls this * function to add it to the list of acceptable domains. The caller must * ensure that the mapping table specified in @doi_def->map meets all of the * requirements of the mapping type (see calipso.h for details). Returns * zero on success and non-zero on failure. * */ static int calipso_doi_add(struct calipso_doi *doi_def, struct netlbl_audit *audit_info) { int ret_val = -EINVAL; u32 doi; u32 doi_type; struct audit_buffer *audit_buf; doi = doi_def->doi; doi_type = doi_def->type; if (doi_def->doi == CALIPSO_DOI_UNKNOWN) goto doi_add_return; refcount_set(&doi_def->refcount, 1); spin_lock(&calipso_doi_list_lock); if (calipso_doi_search(doi_def->doi)) { spin_unlock(&calipso_doi_list_lock); ret_val = -EEXIST; goto doi_add_return; } list_add_tail_rcu(&doi_def->list, &calipso_doi_list); spin_unlock(&calipso_doi_list_lock); ret_val = 0; doi_add_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_ADD, audit_info); if (audit_buf) { const char *type_str; switch (doi_type) { case CALIPSO_MAP_PASS: type_str = "pass"; break; default: type_str = "(unknown)"; } audit_log_format(audit_buf, " calipso_doi=%u calipso_type=%s res=%u", doi, type_str, ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * calipso_doi_free - Frees a DOI definition * @doi_def: the DOI definition * * Description: * This function frees all of the memory associated with a DOI definition. * */ static void calipso_doi_free(struct calipso_doi *doi_def) { kfree(doi_def); } /** * calipso_doi_free_rcu - Frees a DOI definition via the RCU pointer * @entry: the entry's RCU field * * Description: * This function is designed to be used as a callback to the call_rcu() * function so that the memory allocated to the DOI definition can be released * safely. * */ static void calipso_doi_free_rcu(struct rcu_head *entry) { struct calipso_doi *doi_def; doi_def = container_of(entry, struct calipso_doi, rcu); calipso_doi_free(doi_def); } /** * calipso_doi_remove - Remove an existing DOI from the CALIPSO protocol engine * @doi: the DOI value * @audit_secid: the LSM secid to use in the audit message * * Description: * Removes a DOI definition from the CALIPSO engine. The NetLabel routines will * be called to release their own LSM domain mappings as well as our own * domain list. Returns zero on success and negative values on failure. * */ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info) { int ret_val; struct calipso_doi *doi_def; struct audit_buffer *audit_buf; spin_lock(&calipso_doi_list_lock); doi_def = calipso_doi_search(doi); if (!doi_def) { spin_unlock(&calipso_doi_list_lock); ret_val = -ENOENT; goto doi_remove_return; } list_del_rcu(&doi_def->list); spin_unlock(&calipso_doi_list_lock); calipso_doi_putdef(doi_def); ret_val = 0; doi_remove_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_DEL, audit_info); if (audit_buf) { audit_log_format(audit_buf, " calipso_doi=%u res=%u", doi, ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * calipso_doi_getdef - Returns a reference to a valid DOI definition * @doi: the DOI value * * Description: * Searches for a valid DOI definition and if one is found it is returned to * the caller. Otherwise NULL is returned. The caller must ensure that * calipso_doi_putdef() is called when the caller is done. * */ static struct calipso_doi *calipso_doi_getdef(u32 doi) { struct calipso_doi *doi_def; rcu_read_lock(); doi_def = calipso_doi_search(doi); if (!doi_def) goto doi_getdef_return; if (!refcount_inc_not_zero(&doi_def->refcount)) doi_def = NULL; doi_getdef_return: rcu_read_unlock(); return doi_def; } /** * calipso_doi_putdef - Releases a reference for the given DOI definition * @doi_def: the DOI definition * * Description: * Releases a DOI definition reference obtained from calipso_doi_getdef(). * */ static void calipso_doi_putdef(struct calipso_doi *doi_def) { if (!doi_def) return; if (!refcount_dec_and_test(&doi_def->refcount)) return; calipso_cache_invalidate(); call_rcu(&doi_def->rcu, calipso_doi_free_rcu); } /** * calipso_doi_walk - Iterate through the DOI definitions * @skip_cnt: skip past this number of DOI definitions, updated * @callback: callback for each DOI definition * @cb_arg: argument for the callback function * * Description: * Iterate over the DOI definition list, skipping the first @skip_cnt entries. * For each entry call @callback, if @callback returns a negative value stop * 'walking' through the list and return. Updates the value in @skip_cnt upon * return. Returns zero on success, negative values on failure. * */ static int calipso_doi_walk(u32 *skip_cnt, int (*callback)(struct calipso_doi *doi_def, void *arg), void *cb_arg) { int ret_val = -ENOENT; u32 doi_cnt = 0; struct calipso_doi *iter_doi; rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &calipso_doi_list, list) if (refcount_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); if (ret_val < 0) { doi_cnt--; goto doi_walk_return; } } doi_walk_return: rcu_read_unlock(); *skip_cnt = doi_cnt; return ret_val; } /** * calipso_validate - Validate a CALIPSO option * @skb: the packet * @option: the start of the option * * Description: * This routine is called to validate a CALIPSO option. * If the option is valid then %true is returned, otherwise * %false is returned. * * The caller should have already checked that the length of the * option (including the TLV header) is >= 10 and that the catmap * length is consistent with the option length. * * We leave checks on the level and categories to the socket layer. */ bool calipso_validate(const struct sk_buff *skb, const unsigned char *option) { struct calipso_doi *doi_def; bool ret_val; u16 crc, len = option[1] + 2; static const u8 zero[2]; /* The original CRC runs over the option including the TLV header * with the CRC-16 field (at offset 8) zeroed out. */ crc = crc_ccitt(0xffff, option, 8); crc = crc_ccitt(crc, zero, sizeof(zero)); if (len > 10) crc = crc_ccitt(crc, option + 10, len - 10); crc = ~crc; if (option[8] != (crc & 0xff) || option[9] != ((crc >> 8) & 0xff)) return false; rcu_read_lock(); doi_def = calipso_doi_search(get_unaligned_be32(option + 2)); ret_val = !!doi_def; rcu_read_unlock(); return ret_val; } /** * calipso_map_cat_hton - Perform a category mapping from host to network * @doi_def: the DOI definition * @secattr: the security attributes * @net_cat: the zero'd out category bitmap in network/CALIPSO format * @net_cat_len: the length of the CALIPSO bitmap in bytes * * Description: * Perform a label mapping to translate a local MLS category bitmap to the * correct CALIPSO bitmap using the given DOI definition. Returns the minimum * size in bytes of the network bitmap on success, negative values otherwise. * */ static int calipso_map_cat_hton(const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *net_cat, u32 net_cat_len) { int spot = -1; u32 net_spot_max = 0; u32 net_clen_bits = net_cat_len * 8; for (;;) { spot = netlbl_catmap_walk(secattr->attr.mls.cat, spot + 1); if (spot < 0) break; if (spot >= net_clen_bits) return -ENOSPC; netlbl_bitmap_setbit(net_cat, spot, 1); if (spot > net_spot_max) net_spot_max = spot; } return (net_spot_max / 32 + 1) * 4; } /** * calipso_map_cat_ntoh - Perform a category mapping from network to host * @doi_def: the DOI definition * @net_cat: the category bitmap in network/CALIPSO format * @net_cat_len: the length of the CALIPSO bitmap in bytes * @secattr: the security attributes * * Description: * Perform a label mapping to translate a CALIPSO bitmap to the correct local * MLS category bitmap using the given DOI definition. Returns zero on * success, negative values on failure. * */ static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def, const unsigned char *net_cat, u32 net_cat_len, struct netlbl_lsm_secattr *secattr) { int ret_val; int spot = -1; u32 net_clen_bits = net_cat_len * 8; for (;;) { spot = netlbl_bitmap_walk(net_cat, net_clen_bits, spot + 1, 1); if (spot < 0) { if (spot == -2) return -EFAULT; return 0; } ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, spot, GFP_ATOMIC); if (ret_val != 0) return ret_val; } return -EINVAL; } /** * calipso_pad_write - Writes pad bytes in TLV format * @buf: the buffer * @offset: offset from start of buffer to write padding * @count: number of pad bytes to write * * Description: * Write @count bytes of TLV padding into @buffer starting at offset @offset. * @count should be less than 8 - see RFC 4942. * */ static int calipso_pad_write(unsigned char *buf, unsigned int offset, unsigned int count) { if (WARN_ON_ONCE(count >= 8)) return -EINVAL; switch (count) { case 0: break; case 1: buf[offset] = IPV6_TLV_PAD1; break; default: buf[offset] = IPV6_TLV_PADN; buf[offset + 1] = count - 2; if (count > 2) memset(buf + offset + 2, 0, count - 2); break; } return 0; } /** * calipso_genopt - Generate a CALIPSO option * @buf: the option buffer * @start: offset from which to write * @buf_len: the size of opt_buf * @doi_def: the CALIPSO DOI to use * @secattr: the security attributes * * Description: * Generate a CALIPSO option using the DOI definition and security attributes * passed to the function. This also generates upto three bytes of leading * padding that ensures that the option is 4n + 2 aligned. It returns the * number of bytes written (including any initial padding). */ static int calipso_genopt(unsigned char *buf, u32 start, u32 buf_len, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; u32 len, pad; u16 crc; static const unsigned char padding[4] = {2, 1, 0, 3}; unsigned char *calipso; /* CALIPSO has 4n + 2 alignment */ pad = padding[start & 3]; if (buf_len <= start + pad + CALIPSO_HDR_LEN) return -ENOSPC; if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0) return -EPERM; len = CALIPSO_HDR_LEN; if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { ret_val = calipso_map_cat_hton(doi_def, secattr, buf + start + pad + len, buf_len - start - pad - len); if (ret_val < 0) return ret_val; len += ret_val; } calipso_pad_write(buf, start, pad); calipso = buf + start + pad; calipso[0] = IPV6_TLV_CALIPSO; calipso[1] = len - 2; *(__be32 *)(calipso + 2) = htonl(doi_def->doi); calipso[6] = (len - CALIPSO_HDR_LEN) / 4; calipso[7] = secattr->attr.mls.lvl, crc = ~crc_ccitt(0xffff, calipso, len); calipso[8] = crc & 0xff; calipso[9] = (crc >> 8) & 0xff; return pad + len; } /* Hop-by-hop hdr helper functions */ /** * calipso_opt_update - Replaces socket's hop options with a new set * @sk: the socket * @hop: new hop options * * Description: * Replaces @sk's hop options with @hop. @hop may be NULL to leave * the socket with no hop options. * */ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop) { struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts; txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop); txopt_put(old); if (IS_ERR(txopts)) return PTR_ERR(txopts); txopts = ipv6_update_options(sk, txopts); if (txopts) { atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); txopt_put(txopts); } return 0; } /** * calipso_tlv_len - Returns the length of the TLV * @opt: the option header * @offset: offset of the TLV within the header * * Description: * Returns the length of the TLV option at offset @offset within * the option header @opt. Checks that the entire TLV fits inside * the option header, returns a negative value if this is not the case. */ static int calipso_tlv_len(struct ipv6_opt_hdr *opt, unsigned int offset) { unsigned char *tlv = (unsigned char *)opt; unsigned int opt_len = ipv6_optlen(opt), tlv_len; if (offset < sizeof(*opt) || offset >= opt_len) return -EINVAL; if (tlv[offset] == IPV6_TLV_PAD1) return 1; if (offset + 1 >= opt_len) return -EINVAL; tlv_len = tlv[offset + 1] + 2; if (offset + tlv_len > opt_len) return -EINVAL; return tlv_len; } /** * calipso_opt_find - Finds the CALIPSO option in an IPv6 hop options header * @hop: the hop options header * @start: on return holds the offset of any leading padding * @end: on return holds the offset of the first non-pad TLV after CALIPSO * * Description: * Finds the space occupied by a CALIPSO option (including any leading and * trailing padding). * * If a CALIPSO option exists set @start and @end to the * offsets within @hop of the start of padding before the first * CALIPSO option and the end of padding after the first CALIPSO * option. In this case the function returns 0. * * In the absence of a CALIPSO option, @start and @end will be * set to the start and end of any trailing padding in the header. * This is useful when appending a new option, as the caller may want * to overwrite some of this padding. In this case the function will * return -ENOENT. */ static int calipso_opt_find(struct ipv6_opt_hdr *hop, unsigned int *start, unsigned int *end) { int ret_val = -ENOENT, tlv_len; unsigned int opt_len, offset, offset_s = 0, offset_e = 0; unsigned char *opt = (unsigned char *)hop; opt_len = ipv6_optlen(hop); offset = sizeof(*hop); while (offset < opt_len) { tlv_len = calipso_tlv_len(hop, offset); if (tlv_len < 0) return tlv_len; switch (opt[offset]) { case IPV6_TLV_PAD1: case IPV6_TLV_PADN: if (offset_e) offset_e = offset; break; case IPV6_TLV_CALIPSO: ret_val = 0; offset_e = offset; break; default: if (offset_e == 0) offset_s = offset; else goto out; } offset += tlv_len; } out: if (offset_s) *start = offset_s + calipso_tlv_len(hop, offset_s); else *start = sizeof(*hop); if (offset_e) *end = offset_e + calipso_tlv_len(hop, offset_e); else *end = opt_len; return ret_val; } /** * calipso_opt_insert - Inserts a CALIPSO option into an IPv6 hop opt hdr * @hop: the original hop options header * @doi_def: the CALIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Creates a new hop options header based on @hop with a * CALIPSO option added to it. If @hop already contains a CALIPSO * option this is overwritten, otherwise the new option is appended * after any existing options. If @hop is NULL then the new header * will contain just the CALIPSO option and any needed padding. * */ static struct ipv6_opt_hdr * calipso_opt_insert(struct ipv6_opt_hdr *hop, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { unsigned int start, end, buf_len, pad, hop_len; struct ipv6_opt_hdr *new; int ret_val; if (hop) { hop_len = ipv6_optlen(hop); ret_val = calipso_opt_find(hop, &start, &end); if (ret_val && ret_val != -ENOENT) return ERR_PTR(ret_val); } else { hop_len = 0; start = sizeof(*hop); end = 0; } buf_len = hop_len + start - end + CALIPSO_OPT_LEN_MAX_WITH_PAD; new = kzalloc(buf_len, GFP_ATOMIC); if (!new) return ERR_PTR(-ENOMEM); if (start > sizeof(*hop)) memcpy(new, hop, start); ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def, secattr); if (ret_val < 0) { kfree(new); return ERR_PTR(ret_val); } buf_len = start + ret_val; /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */ pad = ((buf_len & 4) + (end & 7)) & 7; calipso_pad_write((unsigned char *)new, buf_len, pad); buf_len += pad; if (end != hop_len) { memcpy((char *)new + buf_len, (char *)hop + end, hop_len - end); buf_len += hop_len - end; } new->nexthdr = 0; new->hdrlen = buf_len / 8 - 1; return new; } /** * calipso_opt_del - Removes the CALIPSO option from an option header * @hop: the original header * @new: the new header * * Description: * Creates a new header based on @hop without any CALIPSO option. If @hop * doesn't contain a CALIPSO option it returns -ENOENT. If @hop contains * no other non-padding options, it returns zero with @new set to NULL. * Otherwise it returns zero, creates a new header without the CALIPSO * option (and removing as much padding as possible) and returns with * @new set to that header. * */ static int calipso_opt_del(struct ipv6_opt_hdr *hop, struct ipv6_opt_hdr **new) { int ret_val; unsigned int start, end, delta, pad, hop_len; ret_val = calipso_opt_find(hop, &start, &end); if (ret_val) return ret_val; hop_len = ipv6_optlen(hop); if (start == sizeof(*hop) && end == hop_len) { /* There's no other option in the header so return NULL */ *new = NULL; return 0; } delta = (end - start) & ~7; *new = kzalloc(hop_len - delta, GFP_ATOMIC); if (!*new) return -ENOMEM; memcpy(*new, hop, start); (*new)->hdrlen -= delta / 8; pad = (end - start) & 7; calipso_pad_write((unsigned char *)*new, start, pad); if (end != hop_len) memcpy((char *)*new + start + pad, (char *)hop + end, hop_len - end); return 0; } /** * calipso_opt_getattr - Get the security attributes from a memory block * @calipso: the CALIPSO option * @secattr: the security attributes * * Description: * Inspect @calipso and return the security attributes in @secattr. * Returns zero on success and negative values on failure. * */ static int calipso_opt_getattr(const unsigned char *calipso, struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; u32 doi, len = calipso[1], cat_len = calipso[6] * 4; struct calipso_doi *doi_def; if (cat_len + 8 > len) return -EINVAL; if (calipso_cache_check(calipso + 2, calipso[1], secattr) == 0) return 0; doi = get_unaligned_be32(calipso + 2); rcu_read_lock(); doi_def = calipso_doi_search(doi); if (!doi_def) goto getattr_return; secattr->attr.mls.lvl = calipso[7]; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (cat_len) { ret_val = calipso_map_cat_ntoh(doi_def, calipso + 10, cat_len, secattr); if (ret_val != 0) { netlbl_catmap_free(secattr->attr.mls.cat); goto getattr_return; } if (secattr->attr.mls.cat) secattr->flags |= NETLBL_SECATTR_MLS_CAT; } secattr->type = NETLBL_NLTYPE_CALIPSO; getattr_return: rcu_read_unlock(); return ret_val; } /* sock functions. */ /** * calipso_sock_getattr - Get the security attributes from a sock * @sk: the sock * @secattr: the security attributes * * Description: * Query @sk to see if there is a CALIPSO option attached to the sock and if * there is return the CALIPSO security attributes in @secattr. This function * requires that @sk be locked, or privately held, but it does not do any * locking itself. Returns zero on success and negative values on failure. * */ static int calipso_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { struct ipv6_opt_hdr *hop; int opt_len, len, ret_val = -ENOMSG, offset; unsigned char *opt; struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); if (!txopts || !txopts->hopopt) goto done; hop = txopts->hopopt; opt = (unsigned char *)hop; opt_len = ipv6_optlen(hop); offset = sizeof(*hop); while (offset < opt_len) { len = calipso_tlv_len(hop, offset); if (len < 0) { ret_val = len; goto done; } switch (opt[offset]) { case IPV6_TLV_CALIPSO: if (len < CALIPSO_HDR_LEN) ret_val = -EINVAL; else ret_val = calipso_opt_getattr(&opt[offset], secattr); goto done; default: offset += len; break; } } done: txopt_put(txopts); return ret_val; } /** * calipso_sock_setattr - Add a CALIPSO option to a socket * @sk: the socket * @doi_def: the CALIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Set the CALIPSO option on the given socket using the DOI definition and * security attributes passed to the function. This function requires * exclusive access to @sk, which means it either needs to be in the * process of being created or locked. Returns zero on success and negative * values on failure. * */ static int calipso_sock_setattr(struct sock *sk, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; struct ipv6_opt_hdr *old, *new; struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); old = NULL; if (txopts) old = txopts->hopopt; new = calipso_opt_insert(old, doi_def, secattr); txopt_put(txopts); if (IS_ERR(new)) return PTR_ERR(new); ret_val = calipso_opt_update(sk, new); kfree(new); return ret_val; } /** * calipso_sock_delattr - Delete the CALIPSO option from a socket * @sk: the socket * * Description: * Removes the CALIPSO option from a socket, if present. * */ static void calipso_sock_delattr(struct sock *sk) { struct ipv6_opt_hdr *new_hop; struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); if (!txopts || !txopts->hopopt) goto done; if (calipso_opt_del(txopts->hopopt, &new_hop)) goto done; calipso_opt_update(sk, new_hop); kfree(new_hop); done: txopt_put(txopts); } /* request sock functions. */ /** * calipso_req_setattr - Add a CALIPSO option to a connection request socket * @req: the connection request socket * @doi_def: the CALIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Set the CALIPSO option on the given socket using the DOI definition and * security attributes passed to the function. Returns zero on success and * negative values on failure. * */ static int calipso_req_setattr(struct request_sock *req, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { struct ipv6_txoptions *txopts; struct inet_request_sock *req_inet = inet_rsk(req); struct ipv6_opt_hdr *old, *new; struct sock *sk = sk_to_full_sk(req_to_sk(req)); if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt) old = req_inet->ipv6_opt->hopopt; else old = NULL; new = calipso_opt_insert(old, doi_def, secattr); if (IS_ERR(new)) return PTR_ERR(new); txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); kfree(new); if (IS_ERR(txopts)) return PTR_ERR(txopts); txopts = xchg(&req_inet->ipv6_opt, txopts); if (txopts) { atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); txopt_put(txopts); } return 0; } /** * calipso_req_delattr - Delete the CALIPSO option from a request socket * @reg: the request socket * * Description: * Removes the CALIPSO option from a request socket, if present. * */ static void calipso_req_delattr(struct request_sock *req) { struct inet_request_sock *req_inet = inet_rsk(req); struct ipv6_opt_hdr *new; struct ipv6_txoptions *txopts; struct sock *sk = sk_to_full_sk(req_to_sk(req)); if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt) return; if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new)) return; /* Nothing to do */ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); if (!IS_ERR(txopts)) { txopts = xchg(&req_inet->ipv6_opt, txopts); if (txopts) { atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); txopt_put(txopts); } } kfree(new); } /* skbuff functions. */ /** * calipso_skbuff_optptr - Find the CALIPSO option in the packet * @skb: the packet * * Description: * Parse the packet's IP header looking for a CALIPSO option. Returns a pointer * to the start of the CALIPSO option on success, NULL if one if not found. * */ static unsigned char *calipso_skbuff_optptr(const struct sk_buff *skb) { const struct ipv6hdr *ip6_hdr = ipv6_hdr(skb); int offset; if (ip6_hdr->nexthdr != NEXTHDR_HOP) return NULL; offset = ipv6_find_tlv(skb, sizeof(*ip6_hdr), IPV6_TLV_CALIPSO); if (offset >= 0) return (unsigned char *)ip6_hdr + offset; return NULL; } /** * calipso_skbuff_setattr - Set the CALIPSO option on a packet * @skb: the packet * @doi_def: the CALIPSO DOI to use * @secattr: the security attributes * * Description: * Set the CALIPSO option on the given packet based on the security attributes. * Returns a pointer to the IP header on success and NULL on failure. * */ static int calipso_skbuff_setattr(struct sk_buff *skb, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; struct ipv6hdr *ip6_hdr; struct ipv6_opt_hdr *hop; unsigned char buf[CALIPSO_MAX_BUFFER]; int len_delta, new_end, pad, payload; unsigned int start, end; ip6_hdr = ipv6_hdr(skb); if (ip6_hdr->nexthdr == NEXTHDR_HOP) { hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); ret_val = calipso_opt_find(hop, &start, &end); if (ret_val && ret_val != -ENOENT) return ret_val; } else { start = 0; end = 0; } memset(buf, 0, sizeof(buf)); ret_val = calipso_genopt(buf, start & 3, sizeof(buf), doi_def, secattr); if (ret_val < 0) return ret_val; new_end = start + ret_val; /* At this point new_end aligns to 4n, so (new_end & 4) pads to 8n */ pad = ((new_end & 4) + (end & 7)) & 7; len_delta = new_end - (int)end + pad; ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); if (ret_val < 0) return ret_val; ip6_hdr = ipv6_hdr(skb); /* Reset as skb_cow() may have moved it */ if (len_delta) { if (len_delta > 0) skb_push(skb, len_delta); else skb_pull(skb, -len_delta); memmove((char *)ip6_hdr - len_delta, ip6_hdr, sizeof(*ip6_hdr) + start); skb_reset_network_header(skb); ip6_hdr = ipv6_hdr(skb); payload = ntohs(ip6_hdr->payload_len); ip6_hdr->payload_len = htons(payload + len_delta); } hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); if (start == 0) { struct ipv6_opt_hdr *new_hop = (struct ipv6_opt_hdr *)buf; new_hop->nexthdr = ip6_hdr->nexthdr; new_hop->hdrlen = len_delta / 8 - 1; ip6_hdr->nexthdr = NEXTHDR_HOP; } else { hop->hdrlen += len_delta / 8; } memcpy((char *)hop + start, buf + (start & 3), new_end - start); calipso_pad_write((unsigned char *)hop, new_end, pad); return 0; } /** * calipso_skbuff_delattr - Delete any CALIPSO options from a packet * @skb: the packet * * Description: * Removes any and all CALIPSO options from the given packet. Returns zero on * success, negative values on failure. * */ static int calipso_skbuff_delattr(struct sk_buff *skb) { int ret_val; struct ipv6hdr *ip6_hdr; struct ipv6_opt_hdr *old_hop; u32 old_hop_len, start = 0, end = 0, delta, size, pad; if (!calipso_skbuff_optptr(skb)) return 0; /* since we are changing the packet we should make a copy */ ret_val = skb_cow(skb, skb_headroom(skb)); if (ret_val < 0) return ret_val; ip6_hdr = ipv6_hdr(skb); old_hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); old_hop_len = ipv6_optlen(old_hop); ret_val = calipso_opt_find(old_hop, &start, &end); if (ret_val) return ret_val; if (start == sizeof(*old_hop) && end == old_hop_len) { /* There's no other option in the header so we delete * the whole thing. */ delta = old_hop_len; size = sizeof(*ip6_hdr); ip6_hdr->nexthdr = old_hop->nexthdr; } else { delta = (end - start) & ~7; if (delta) old_hop->hdrlen -= delta / 8; pad = (end - start) & 7; size = sizeof(*ip6_hdr) + start + pad; calipso_pad_write((unsigned char *)old_hop, start, pad); } if (delta) { skb_pull(skb, delta); memmove((char *)ip6_hdr + delta, ip6_hdr, size); skb_reset_network_header(skb); } return 0; } static const struct netlbl_calipso_ops ops = { .doi_add = calipso_doi_add, .doi_free = calipso_doi_free, .doi_remove = calipso_doi_remove, .doi_getdef = calipso_doi_getdef, .doi_putdef = calipso_doi_putdef, .doi_walk = calipso_doi_walk, .sock_getattr = calipso_sock_getattr, .sock_setattr = calipso_sock_setattr, .sock_delattr = calipso_sock_delattr, .req_setattr = calipso_req_setattr, .req_delattr = calipso_req_delattr, .opt_getattr = calipso_opt_getattr, .skbuff_optptr = calipso_skbuff_optptr, .skbuff_setattr = calipso_skbuff_setattr, .skbuff_delattr = calipso_skbuff_delattr, .cache_invalidate = calipso_cache_invalidate, .cache_add = calipso_cache_add }; /** * calipso_init - Initialize the CALIPSO module * * Description: * Initialize the CALIPSO module and prepare it for use. Returns zero on * success and negative values on failure. * */ int __init calipso_init(void) { int ret_val; ret_val = calipso_cache_init(); if (!ret_val) netlbl_calipso_ops_register(&ops); return ret_val; } void calipso_exit(void) { netlbl_calipso_ops_register(NULL); calipso_cache_invalidate(); kfree(calipso_cache); }
46 259 213 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. * Authors: David Chinner and Glauber Costa * * Generic LRU infrastructure */ #ifndef _LRU_LIST_H #define _LRU_LIST_H #include <linux/list.h> #include <linux/nodemask.h> #include <linux/shrinker.h> struct mem_cgroup; /* list_lru_walk_cb has to always return one of those */ enum lru_status { LRU_REMOVED, /* item removed from list */ LRU_REMOVED_RETRY, /* item removed, but lock has been dropped and reacquired */ LRU_ROTATE, /* item referenced, give another pass */ LRU_SKIP, /* item cannot be locked, skip */ LRU_RETRY, /* item not freeable. May drop the lock internally, but has to return locked. */ }; struct list_lru_one { struct list_head list; /* may become negative during memcg reparenting */ long nr_items; }; struct list_lru_memcg { struct rcu_head rcu; /* array of per cgroup lists, indexed by memcg_cache_id */ struct list_lru_one *lru[0]; }; struct list_lru_node { /* protects all lists on the node, including per cgroup */ spinlock_t lock; /* global list, used for the root cgroup in cgroup aware lrus */ struct list_lru_one lru; #ifdef CONFIG_MEMCG_KMEM /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ struct list_lru_memcg __rcu *memcg_lrus; #endif long nr_items; } ____cacheline_aligned_in_smp; struct list_lru { struct list_lru_node *node; #ifdef CONFIG_MEMCG_KMEM struct list_head list; int shrinker_id; bool memcg_aware; #endif }; void list_lru_destroy(struct list_lru *lru); int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct lock_class_key *key, struct shrinker *shrinker); #define list_lru_init(lru) \ __list_lru_init((lru), false, NULL, NULL) #define list_lru_init_key(lru, key) \ __list_lru_init((lru), false, (key), NULL) #define list_lru_init_memcg(lru, shrinker) \ __list_lru_init((lru), true, NULL, shrinker) int memcg_update_all_list_lrus(int num_memcgs); void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg); /** * list_lru_add: add an element to the lru list's tail * @list_lru: the lru pointer * @item: the item to be added. * * If the element is already part of a list, this function returns doing * nothing. Therefore the caller does not need to keep state about whether or * not the element already belongs in the list and is allowed to lazy update * it. Note however that this is valid for *a* list, not *this* list. If * the caller organize itself in a way that elements can be in more than * one type of list, it is up to the caller to fully remove the item from * the previous list (with list_lru_del() for instance) before moving it * to @list_lru * * Return value: true if the list was updated, false otherwise */ bool list_lru_add(struct list_lru *lru, struct list_head *item); /** * list_lru_del: delete an element to the lru list * @list_lru: the lru pointer * @item: the item to be deleted. * * This function works analogously as list_lru_add in terms of list * manipulation. The comments about an element already pertaining to * a list are also valid for list_lru_del. * * Return value: true if the list was updated, false otherwise */ bool list_lru_del(struct list_lru *lru, struct list_head *item); /** * list_lru_count_one: return the number of objects currently held by @lru * @lru: the lru pointer. * @nid: the node id to count from. * @memcg: the cgroup to count from. * * Always return a non-negative number, 0 for empty lists. There is no * guarantee that the list is not updated while the count is being computed. * Callers that want such a guarantee need to provide an outer lock. */ unsigned long list_lru_count_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg); unsigned long list_lru_count_node(struct list_lru *lru, int nid); static inline unsigned long list_lru_shrink_count(struct list_lru *lru, struct shrink_control *sc) { return list_lru_count_one(lru, sc->nid, sc->memcg); } static inline unsigned long list_lru_count(struct list_lru *lru) { long count = 0; int nid; for_each_node_state(nid, N_NORMAL_MEMORY) count += list_lru_count_node(lru, nid); return count; } void list_lru_isolate(struct list_lru_one *list, struct list_head *item); void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, struct list_head *head); typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, struct list_lru_one *list, spinlock_t *lock, void *cb_arg); /** * list_lru_walk_one: walk a list_lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. * @isolate: callback function that is resposible for deciding what to do with * the item currently being scanned * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * * This function will scan all elements in a particular list_lru, calling the * @isolate callback for each of those items, along with the current list * spinlock and a caller-provided opaque. The @isolate callback can choose to * drop the lock internally, but *must* return with the lock held. The callback * will return an enum lru_status telling the list_lru infrastructure what to * do with the object being scanned. * * Please note that nr_to_walk does not mean how many objects will be freed, * just how many objects will be scanned. * * Return value: the number of objects effectively removed from the LRU. */ unsigned long list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); /** * list_lru_walk_one_irq: walk a list_lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. * @isolate: callback function that is resposible for deciding what to do with * the item currently being scanned * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * * Same as @list_lru_walk_one except that the spinlock is acquired with * spin_lock_irq(). */ unsigned long list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); unsigned long list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); static inline unsigned long list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc, list_lru_walk_cb isolate, void *cb_arg) { return list_lru_walk_one(lru, sc->nid, sc->memcg, isolate, cb_arg, &sc->nr_to_scan); } static inline unsigned long list_lru_shrink_walk_irq(struct list_lru *lru, struct shrink_control *sc, list_lru_walk_cb isolate, void *cb_arg) { return list_lru_walk_one_irq(lru, sc->nid, sc->memcg, isolate, cb_arg, &sc->nr_to_scan); } static inline unsigned long list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, void *cb_arg, unsigned long nr_to_walk) { long isolated = 0; int nid; for_each_node_state(nid, N_NORMAL_MEMORY) { isolated += list_lru_walk_node(lru, nid, isolate, cb_arg, &nr_to_walk); if (nr_to_walk <= 0) break; } return isolated; } #endif /* _LRU_LIST_H */
16 5 5 25 24 22 19 25 17 10 5 13 13 17 39 9 39 32 9 39 29 19 12 28 28 28 22 22 29 17 16 15 14 1 14 11 14 14 13 11 12 11 12 17 1 1 24 23 23 22 20 17 17 17 20 3 2 3 6 6 4 5 5 5 4 3 4 9 7 9 8 1 9 9 9 9 9 9 9 6 3 2 7 1 30 27 17 23 12 27 27 27 24 16 19 12 11 6 5 5 30 30 6 2 2 2 2 2 6 1 21 20 19 19 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 21 13 12 11 10 1 1 1 1 1 1 1 1 1 9 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 /* * af_llc.c - LLC User Interface SAPs * Description: * Functions in this module are implementation of socket based llc * communications for the Linux operating system. Support of llc class * one and class two is provided via SOCK_DGRAM and SOCK_STREAM * respectively. * * An llc2 connection is (mac + sap), only one llc2 sap connection * is allowed per mac. Though one sap may have multiple mac + sap * connections. * * Copyright (c) 2001 by Jay Schulist <jschlst@samba.org> * 2002-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <net/llc.h> #include <net/llc_sap.h> #include <net/llc_pdu.h> #include <net/llc_conn.h> #include <net/tcp_states.h> /* remember: uninitialized global data is zeroed because its in .bss */ static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static const struct proto_ops llc_ui_ops; static bool llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #if 0 #define dprintk(args...) printk(KERN_DEBUG args) #else #define dprintk(args...) #endif /* Maybe we'll add some more in the future. */ #define LLC_CMSG_PKTINFO 1 /** * llc_ui_next_link_no - return the next unused link number for a sap * @sap: Address of sap to get link number from. * * Return the next unused link number for a given sap. */ static inline u16 llc_ui_next_link_no(int sap) { return llc_ui_sap_link_no_max[sap]++; } /** * llc_proto_type - return eth protocol for ARP header type * @arphrd: ARP header type. * * Given an ARP header type return the corresponding ethernet protocol. */ static inline __be16 llc_proto_type(u16 arphrd) { return htons(ETH_P_802_2); } /** * llc_ui_addr_null - determines if a address structure is null * @addr: Address to test if null. */ static inline u8 llc_ui_addr_null(struct sockaddr_llc *addr) { return !memcmp(addr, &llc_ui_addrnull, sizeof(*addr)); } /** * llc_ui_header_len - return length of llc header based on operation * @sk: Socket which contains a valid llc socket type. * @addr: Complete sockaddr_llc structure received from the user. * * Provide the length of the llc header depending on what kind of * operation the user would like to perform and the type of socket. * Returns the correct llc header length. */ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) { u8 rc = LLC_PDU_LEN_U; if (addr->sllc_test) rc = LLC_PDU_LEN_U; else if (addr->sllc_xid) /* We need to expand header to sizeof(struct llc_xid_info) * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header * as XID PDU. In llc_ui_sendmsg() we reserved header size and then * filled all other space with user data. If we won't reserve this * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data */ rc = LLC_PDU_LEN_U_XID; else if (sk->sk_type == SOCK_STREAM) rc = LLC_PDU_LEN_I; return rc; } /** * llc_ui_send_data - send data via reliable llc2 connection * @sk: Connection the socket is using. * @skb: Data the user wishes to send. * @noblock: can we block waiting for data? * * Send data via reliable llc2 connection. * Returns 0 upon success, non-zero if action did not succeed. * * This function always consumes a reference to the skb. */ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) { struct llc_sock* llc = llc_sk(sk); if (unlikely(llc_data_accept_state(llc->state) || llc->remote_busy_flag || llc->p_flag)) { long timeout = sock_sndtimeo(sk, noblock); int rc; rc = llc_ui_wait_for_busy_core(sk, timeout); if (rc) { kfree_skb(skb); return rc; } } return llc_build_and_send_pkt(sk, skb); } static void llc_ui_sk_init(struct socket *sock, struct sock *sk) { sock_graft(sk, sock); sk->sk_type = sock->type; sock->ops = &llc_ui_ops; } static struct proto llc_proto = { .name = "LLC", .owner = THIS_MODULE, .obj_size = sizeof(struct llc_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, }; /** * llc_ui_create - alloc and init a new llc_ui socket * @net: network namespace (must be default network) * @sock: Socket to initialize and attach allocated sk to. * @protocol: Unused. * @kern: on behalf of kernel or userspace * * Allocate and initialize a new llc_ui socket, validate the user wants a * socket type we have available. * Returns 0 upon success, negative upon failure. */ static int llc_ui_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); } } return rc; } /** * llc_ui_release - shutdown socket * @sock: Socket to release. * * Shutdown and deallocate an existing socket. */ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; if (unlikely(sk == NULL)) goto out; sock_hold(sk); lock_sock(sk); llc = llc_sk(sk); dprintk("%s: closing local(%02X) remote(%02X)\n", __func__, llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); if (!sock_flag(sk, SOCK_ZAPPED)) { struct llc_sap *sap = llc->sap; /* Hold this for release_sock(), so that llc_backlog_rcv() * could still use it. */ llc_sap_hold(sap); llc_sap_remove_socket(llc->sap, sk); release_sock(sk); llc_sap_put(sap); } else { release_sock(sk); } if (llc->dev) dev_put(llc->dev); sock_put(sk); llc_sk_free(sk); out: return 0; } /** * llc_ui_autoport - provide dynamically allocate SAP number * * Provide the caller with a dynamically allocated SAP number according * to the rules that are set in this function. Returns: 0, upon failure, * SAP number otherwise. */ static int llc_ui_autoport(void) { struct llc_sap *sap; int i, tries = 0; while (tries < LLC_SAP_DYN_TRIES) { for (i = llc_ui_sap_last_autoport; i < LLC_SAP_DYN_STOP; i += 2) { sap = llc_sap_find(i); if (!sap) { llc_ui_sap_last_autoport = i + 2; goto out; } llc_sap_put(sap); } llc_ui_sap_last_autoport = LLC_SAP_DYN_START; tries++; } i = 0; out: return i; } /** * llc_ui_autobind - automatically bind a socket to a sap * @sock: socket to bind * @addr: address to connect to * * Used by llc_ui_connect and llc_ui_sendmsg when the user hasn't * specifically used llc_ui_bind to bind to an specific address/sap * * Returns: 0 upon success, negative otherwise. */ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; if (!sock_flag(sk, SOCK_ZAPPED)) goto out; if (!addr->sllc_arphrd) addr->sllc_arphrd = ARPHRD_ETHER; if (addr->sllc_arphrd != ARPHRD_ETHER) goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (dev && addr->sllc_arphrd != dev->type) { dev_put(dev); dev = NULL; } } else dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); if (!dev) goto out; rc = -EUSERS; llc->laddr.lsap = llc_ui_autoport(); if (!llc->laddr.lsap) goto out; rc = -EBUSY; /* some other network layer is using the sap */ sap = llc_sap_open(llc->laddr.lsap, NULL); if (!sap) goto out; /* Note: We do not expect errors from this point. */ llc->dev = dev; dev = NULL; memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ llc_sap_add_socket(sap, sk); sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out: dev_put(dev); return rc; } /** * llc_ui_bind - bind a socket to a specific address. * @sock: Socket to bind an address to. * @uaddr: Address the user wants the socket bound to. * @addrlen: Length of the uaddr structure. * * Bind a socket to a specific address. For llc a user is able to bind to * a specific sap only or mac + sap. * If the user desires to bind to a specific mac + sap, it is possible to * have multiple sap connections via multiple macs. * Bind and autobind for that matter must enforce the correct sap usage * otherwise all hell will break loose. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) { struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (!addr->sllc_arphrd) addr->sllc_arphrd = ARPHRD_ETHER; if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER)) goto out; rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); if (dev) { if (is_zero_ether_addr(addr->sllc_mac)) memcpy(addr->sllc_mac, dev->dev_addr, IFHWADDRLEN); if (addr->sllc_arphrd != dev->type || !ether_addr_equal(addr->sllc_mac, dev->dev_addr)) { rc = -EINVAL; dev = NULL; } } } else { dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); } if (dev) dev_hold(dev); rcu_read_unlock(); if (!dev) goto out; if (!addr->sllc_sap) { rc = -EUSERS; addr->sllc_sap = llc_ui_autoport(); if (!addr->sllc_sap) goto out; } sap = llc_sap_find(addr->sllc_sap); if (!sap) { sap = llc_sap_open(addr->sllc_sap, NULL); rc = -EBUSY; /* some other network layer is using the sap */ if (!sap) goto out; } else { struct llc_addr laddr, daddr; struct sock *ask; memset(&laddr, 0, sizeof(laddr)); memset(&daddr, 0, sizeof(daddr)); /* * FIXME: check if the address is multicast, * only SOCK_DGRAM can do this. */ memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN); laddr.lsap = addr->sllc_sap; rc = -EADDRINUSE; /* mac + sap clash. */ ask = llc_lookup_established(sap, &daddr, &laddr); if (ask) { sock_put(ask); goto out_put; } } /* Note: We do not expect errors from this point. */ llc->dev = dev; dev = NULL; llc->laddr.lsap = addr->sllc_sap; memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ llc_sap_add_socket(sap, sk); sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out_put: llc_sap_put(sap); out: dev_put(dev); release_sock(sk); return rc; } /** * llc_ui_shutdown - shutdown a connect llc2 socket. * @sock: Socket to shutdown. * @how: What part of the socket to shutdown. * * Shutdown a connected llc2 socket. Currently this function only supports * shutting down both sends and receives (2), we could probably make this * function such that a user can shutdown only half the connection but not * right now. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int rc = -ENOTCONN; lock_sock(sk); if (unlikely(sk->sk_state != TCP_ESTABLISHED)) goto out; rc = -EINVAL; if (how != 2) goto out; rc = llc_send_disc(sk); if (!rc) rc = llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); out: release_sock(sk); return rc; } /** * llc_ui_connect - Connect to a remote llc2 mac + sap. * @sock: Socket which will be connected to the remote destination. * @uaddr: Remote and possibly the local address of the new connection. * @addrlen: Size of uaddr structure. * @flags: Operational flags specified by the user. * * Connect to a remote llc2 mac + sap. The caller must specify the * destination mac and address to connect to. If the user hasn't previously * called bind(2) with a smac the address of the first interface of the * specified arp type will be used. * This function will autobind if user did not previously call bind. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; int rc = -EINVAL; lock_sock(sk); if (unlikely(addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (unlikely(addr->sllc_family != AF_LLC)) goto out; if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EALREADY; if (unlikely(sock->state == SS_CONNECTING)) goto out; /* bind connection to sap if user hasn't done it. */ if (sock_flag(sk, SOCK_ZAPPED)) { /* bind to sap with null dev, exclusive */ rc = llc_ui_autobind(sock, addr); if (rc) goto out; } llc->daddr.lsap = addr->sllc_sap; memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap); rc = llc_establish_connection(sk, llc->dev->dev_addr, addr->sllc_mac, addr->sllc_sap); if (rc) { dprintk("%s: llc_ui_send_conn failed :-(\n", __func__); sock->state = SS_UNCONNECTED; sk->sk_state = TCP_CLOSE; goto out; } if (sk->sk_state == TCP_SYN_SENT) { const long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) goto out; rc = sock_intr_errno(timeo); if (signal_pending(current)) goto out; } if (sk->sk_state == TCP_CLOSE) goto sock_error; sock->state = SS_CONNECTED; rc = 0; out: release_sock(sk); return rc; sock_error: rc = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; goto out; } /** * llc_ui_listen - allow a normal socket to accept incoming connections * @sock: Socket to allow incoming connections on. * @backlog: Number of connections to queue. * * Allow a normal socket to accept incoming connections. * Returns 0 upon success, negative otherwise. */ static int llc_ui_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int rc = -EINVAL; lock_sock(sk); if (unlikely(sock->state != SS_UNCONNECTED)) goto out; rc = -EOPNOTSUPP; if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EAGAIN; if (sock_flag(sk, SOCK_ZAPPED)) goto out; rc = 0; if (!(unsigned int)backlog) /* BSDism */ backlog = 1; sk->sk_max_ack_backlog = backlog; if (sk->sk_state != TCP_LISTEN) { sk->sk_ack_backlog = 0; sk->sk_state = TCP_LISTEN; } sk->sk_socket->flags |= __SO_ACCEPTCON; out: release_sock(sk); return rc; } static int llc_ui_wait_for_disc(struct sock *sk, long timeout) { DEFINE_WAIT_FUNC(wait, woken_wake_function); int rc = 0; add_wait_queue(sk_sleep(sk), &wait); while (1) { if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; rc = 0; } remove_wait_queue(sk_sleep(sk), &wait); return rc; } static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) { DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(sk_sleep(sk), &wait); while (1) { if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait)) break; if (signal_pending(current) || !timeout) break; } remove_wait_queue(sk_sleep(sk), &wait); return timeout; } static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct llc_sock *llc = llc_sk(sk); int rc; add_wait_queue(sk_sleep(sk), &wait); while (1) { rc = 0; if (sk_wait_event(sk, &timeout, (sk->sk_shutdown & RCV_SHUTDOWN) || (!llc_data_accept_state(llc->state) && !llc->remote_busy_flag && !llc->p_flag), &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; } remove_wait_queue(sk_sleep(sk), &wait); return rc; } static int llc_wait_data(struct sock *sk, long timeo) { int rc; while (1) { /* * POSIX 1003.1g mandates this order. */ rc = sock_error(sk); if (rc) break; rc = 0; if (sk->sk_shutdown & RCV_SHUTDOWN) break; rc = -EAGAIN; if (!timeo) break; rc = sock_intr_errno(timeo); if (signal_pending(current)) break; rc = 0; if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; } static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) { struct llc_sock *llc = llc_sk(skb->sk); if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); put_cmsg(msg, SOL_LLC, LLC_OPT_PKTINFO, sizeof(info), &info); } } /** * llc_ui_accept - accept a new incoming connection. * @sock: Socket which connections arrive on. * @newsock: Socket to move incoming connection to. * @flags: User specified operational flags. * @kern: If the socket is kernel internal * * Accept a new incoming connection. * Returns 0 upon success, negative otherwise. */ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { struct sock *sk = sock->sk, *newsk; struct llc_sock *llc, *newllc; struct sk_buff *skb; int rc = -EOPNOTSUPP; dprintk("%s: accepting on %02X\n", __func__, llc_sk(sk)->laddr.lsap); lock_sock(sk); if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EINVAL; if (unlikely(sock->state != SS_UNCONNECTED || sk->sk_state != TCP_LISTEN)) goto out; /* wait for a connection to arrive. */ if (skb_queue_empty(&sk->sk_receive_queue)) { rc = llc_wait_data(sk, sk->sk_rcvtimeo); if (rc) goto out; } dprintk("%s: got a new connection on %02X\n", __func__, llc_sk(sk)->laddr.lsap); skb = skb_dequeue(&sk->sk_receive_queue); rc = -EINVAL; if (!skb->sk) goto frees; rc = 0; newsk = skb->sk; /* attach connection to a new socket. */ llc_ui_sk_init(newsock, newsk); sock_reset_flag(newsk, SOCK_ZAPPED); newsk->sk_state = TCP_ESTABLISHED; newsock->state = SS_CONNECTED; llc = llc_sk(sk); newllc = llc_sk(newsk); memcpy(&newllc->addr, &llc->addr, sizeof(newllc->addr)); newllc->link = llc_ui_next_link_no(newllc->laddr.lsap); /* put original socket back into a clean listen state. */ sk->sk_state = TCP_LISTEN; sk->sk_ack_backlog--; dprintk("%s: ok success on %02X, client on %02X\n", __func__, llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap); frees: kfree_skb(skb); out: release_sock(sk); return rc; } /** * llc_ui_recvmsg - copy received data to the socket user. * @sock: Socket to copy data from. * @msg: Various user space related information. * @len: Size of user buffer. * @flags: User specified flags. * * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name); const int nonblock = flags & MSG_DONTWAIT; struct sk_buff *skb = NULL; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); size_t copied = 0; u32 peek_seq = 0; u32 *seq, skb_len; unsigned long used; int target; /* Read at least this many bytes */ long timeo; lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) goto out; timeo = sock_rcvtimeo(sk, nonblock); seq = &llc->copied_seq; if (flags & MSG_PEEK) { peek_seq = llc->copied_seq; seq = &peek_seq; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); copied = 0; do { u32 offset; /* * We need to check signals first, to get correct SIGURG * handling. FIXME: Need to check this doesn't impact 1003.1g * and move it down to the bottom of the loop */ if (signal_pending(current)) { if (copied) break; copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } /* Next get a buffer. */ skb = skb_peek(&sk->sk_receive_queue); if (skb) { offset = *seq; goto found_ok_skb; } /* Well, if we have backlog, try to process it now yet. */ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || (flags & MSG_PEEK)) break; } else { if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_CLOSE) { if (!sock_flag(sk, SOCK_DONE)) { /* * This occurs when user tries to read * from never connected socket. */ copied = -ENOTCONN; break; } break; } if (!timeo) { copied = -EAGAIN; break; } } if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", current->comm, task_pid_nr(current)); peek_seq = llc->copied_seq; } continue; found_ok_skb: skb_len = skb->len; /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) used = len; if (!(flags & MSG_TRUNC)) { int rc = skb_copy_datagram_msg(skb, offset, msg, used); if (rc) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } *seq += used; copied += used; len -= used; /* For non stream protcols we get one packet per recvmsg call */ if (sk->sk_type != SOCK_STREAM) goto copy_uaddr; if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); *seq = 0; } /* Partial read */ if (used + offset < skb_len) continue; } while (len > 0); out: release_sock(sk); return copied; copy_uaddr: if (uaddr != NULL && skb != NULL) { memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); } if (llc_sk(sk)->cmsg_flags) llc_cmsg_rcv(msg, skb); if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); *seq = 0; } goto out; } /** * llc_ui_sendmsg - Transmit data provided by the socket user. * @sock: Socket to transmit data from. * @msg: Various user related information. * @len: Length of data to transmit. * * Transmit data provided by the socket user. * Returns non-negative upon success, negative otherwise. */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; struct sk_buff *skb = NULL; size_t size = 0; int rc = -EINVAL, copied = 0, hdrlen; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); lock_sock(sk); if (addr) { if (msg->msg_namelen < sizeof(*addr)) goto out; } else { if (llc_ui_addr_null(&llc->addr)) goto out; addr = &llc->addr; } /* must bind connection to sap if user hasn't done it. */ if (sock_flag(sk, SOCK_ZAPPED)) { /* bind to sap with null dev, exclusive. */ rc = llc_ui_autobind(sock, addr); if (rc) goto out; } hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); size = hdrlen + len; if (size > llc->dev->mtu) size = llc->dev->mtu; copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (!skb) goto out; skb->dev = llc->dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) { llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); skb = NULL; goto out; } if (addr->sllc_test) { llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); skb = NULL; goto out; } if (addr->sllc_xid) { llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); skb = NULL; goto out; } rc = -ENOPROTOOPT; if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua)) goto out; rc = llc_ui_send_data(sk, skb, noblock); skb = NULL; out: kfree_skb(skb); if (rc) dprintk("%s: failed sending from %02X to %02X: %d\n", __func__, llc->laddr.lsap, llc->daddr.lsap, rc); release_sock(sk); return rc ? : copied; } /** * llc_ui_getname - return the address info of a socket * @sock: Socket to get address of. * @uaddr: Address structure to return information. * @uaddrlen: Length of address structure. * @peer: Does user want local or remote address information. * * Return the address information of a socket. */ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) goto out; if(llc->dev) sllc.sllc_arphrd = llc->dev->type; sllc.sllc_sap = llc->daddr.lsap; memcpy(&sllc.sllc_mac, &llc->daddr.mac, IFHWADDRLEN); } else { rc = -EINVAL; if (!llc->sap) goto out; sllc.sllc_sap = llc->sap->laddr.lsap; if (llc->dev) { sllc.sllc_arphrd = llc->dev->type; memcpy(&sllc.sllc_mac, llc->dev->dev_addr, IFHWADDRLEN); } } sllc.sllc_family = AF_LLC; memcpy(uaddr, &sllc, sizeof(sllc)); rc = sizeof(sllc); out: release_sock(sk); return rc; } /** * llc_ui_ioctl - io controls for PF_LLC * @sock: Socket to get/set info * @cmd: command * @arg: optional argument for cmd * * get/set info on llc sockets */ static int llc_ui_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return -ENOIOCTLCMD; } /** * llc_ui_setsockopt - set various connection specific parameters. * @sock: Socket to set options on. * @level: Socket level user is requesting operations on. * @optname: Operation name. * @optval: User provided operation data. * @optlen: Length of optval. * * Set various connection specific parameters. */ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); unsigned int opt; int rc = -EINVAL; lock_sock(sk); if (unlikely(level != SOL_LLC || optlen != sizeof(int))) goto out; rc = get_user(opt, (int __user *)optval); if (rc) goto out; rc = -EINVAL; switch (optname) { case LLC_OPT_RETRY: if (opt > LLC_OPT_MAX_RETRY) goto out; llc->n2 = opt; break; case LLC_OPT_SIZE: if (opt > LLC_OPT_MAX_SIZE) goto out; llc->n1 = opt; break; case LLC_OPT_ACK_TMR_EXP: if (opt > LLC_OPT_MAX_ACK_TMR_EXP) goto out; llc->ack_timer.expire = opt * HZ; break; case LLC_OPT_P_TMR_EXP: if (opt > LLC_OPT_MAX_P_TMR_EXP) goto out; llc->pf_cycle_timer.expire = opt * HZ; break; case LLC_OPT_REJ_TMR_EXP: if (opt > LLC_OPT_MAX_REJ_TMR_EXP) goto out; llc->rej_sent_timer.expire = opt * HZ; break; case LLC_OPT_BUSY_TMR_EXP: if (opt > LLC_OPT_MAX_BUSY_TMR_EXP) goto out; llc->busy_state_timer.expire = opt * HZ; break; case LLC_OPT_TX_WIN: if (opt > LLC_OPT_MAX_WIN) goto out; llc->k = opt; break; case LLC_OPT_RX_WIN: if (opt > LLC_OPT_MAX_WIN) goto out; llc->rw = opt; break; case LLC_OPT_PKTINFO: if (opt) llc->cmsg_flags |= LLC_CMSG_PKTINFO; else llc->cmsg_flags &= ~LLC_CMSG_PKTINFO; break; default: rc = -ENOPROTOOPT; goto out; } rc = 0; out: release_sock(sk); return rc; } /** * llc_ui_getsockopt - get connection specific socket info * @sock: Socket to get information from. * @level: Socket level user is requesting operations on. * @optname: Operation name. * @optval: Variable to return operation data in. * @optlen: Length of optval. * * Get connection specific socket information. */ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); int val = 0, len = 0, rc = -EINVAL; lock_sock(sk); if (unlikely(level != SOL_LLC)) goto out; rc = get_user(len, optlen); if (rc) goto out; rc = -EINVAL; if (len != sizeof(int)) goto out; switch (optname) { case LLC_OPT_RETRY: val = llc->n2; break; case LLC_OPT_SIZE: val = llc->n1; break; case LLC_OPT_ACK_TMR_EXP: val = llc->ack_timer.expire / HZ; break; case LLC_OPT_P_TMR_EXP: val = llc->pf_cycle_timer.expire / HZ; break; case LLC_OPT_REJ_TMR_EXP: val = llc->rej_sent_timer.expire / HZ; break; case LLC_OPT_BUSY_TMR_EXP: val = llc->busy_state_timer.expire / HZ; break; case LLC_OPT_TX_WIN: val = llc->k; break; case LLC_OPT_RX_WIN: val = llc->rw; break; case LLC_OPT_PKTINFO: val = (llc->cmsg_flags & LLC_CMSG_PKTINFO) != 0; break; default: rc = -ENOPROTOOPT; goto out; } rc = 0; if (put_user(len, optlen) || copy_to_user(optval, &val, len)) rc = -EFAULT; out: release_sock(sk); return rc; } static const struct net_proto_family llc_ui_family_ops = { .family = PF_LLC, .create = llc_ui_create, .owner = THIS_MODULE, }; static const struct proto_ops llc_ui_ops = { .family = PF_LLC, .owner = THIS_MODULE, .release = llc_ui_release, .bind = llc_ui_bind, .connect = llc_ui_connect, .socketpair = sock_no_socketpair, .accept = llc_ui_accept, .getname = llc_ui_getname, .poll = datagram_poll, .ioctl = llc_ui_ioctl, .listen = llc_ui_listen, .shutdown = llc_ui_shutdown, .setsockopt = llc_ui_setsockopt, .getsockopt = llc_ui_getsockopt, .sendmsg = llc_ui_sendmsg, .recvmsg = llc_ui_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; static const char llc_proc_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; static const char llc_sysctl_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the sysctl entries\n"; static const char llc_sock_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the network family\n"; static int __init llc2_init(void) { int rc = proto_register(&llc_proto, 0); if (rc != 0) goto out; llc_build_offset_table(); llc_station_init(); llc_ui_sap_last_autoport = LLC_SAP_DYN_START; rc = llc_proc_init(); if (rc != 0) { printk(llc_proc_err_msg); goto out_station; } rc = llc_sysctl_init(); if (rc) { printk(llc_sysctl_err_msg); goto out_proc; } rc = sock_register(&llc_ui_family_ops); if (rc) { printk(llc_sock_err_msg); goto out_sysctl; } llc_add_pack(LLC_DEST_SAP, llc_sap_handler); llc_add_pack(LLC_DEST_CONN, llc_conn_handler); out: return rc; out_sysctl: llc_sysctl_exit(); out_proc: llc_proc_exit(); out_station: llc_station_exit(); proto_unregister(&llc_proto); goto out; } static void __exit llc2_exit(void) { llc_station_exit(); llc_remove_pack(LLC_DEST_SAP); llc_remove_pack(LLC_DEST_CONN); sock_unregister(PF_LLC); llc_proc_exit(); llc_sysctl_exit(); proto_unregister(&llc_proto); } module_init(llc2_init); module_exit(llc2_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003"); MODULE_DESCRIPTION("IEEE 802.2 PF_LLC support"); MODULE_ALIAS_NETPROTO(PF_LLC);
82 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _VT_KERN_H #define _VT_KERN_H /* * this really is an extension of the vc_cons structure in console.c, but * with information needed by the vt package */ #include <linux/vt.h> #include <linux/kd.h> #include <linux/tty.h> #include <linux/mutex.h> #include <linux/console_struct.h> #include <linux/mm.h> #include <linux/consolemap.h> #include <linux/notifier.h> /* * Presently, a lot of graphics programs do not restore the contents of * the higher font pages. Defining this flag will avoid use of them, but * will lose support for PIO_FONTRESET. Note that many font operations are * not likely to work with these programs anyway; they need to be * fixed. The linux/Documentation directory includes a code snippet * to save and restore the text font. */ #ifdef CONFIG_VGA_CONSOLE #define BROKEN_GRAPHICS_PROGRAMS 1 #endif extern void kd_mksound(unsigned int hz, unsigned int ticks); extern int kbd_rate(struct kbd_repeat *rep); extern int fg_console, last_console, want_console; /* console.c */ int vc_allocate(unsigned int console); int vc_cons_allocated(unsigned int console); int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines); struct vc_data *vc_deallocate(unsigned int console); void reset_palette(struct vc_data *vc); void do_blank_screen(int entering_gfx); void do_unblank_screen(int leaving_gfx); void unblank_screen(void); void poke_blanked_console(void); int con_font_op(struct vc_data *vc, struct console_font_op *op); int con_set_cmap(unsigned char __user *cmap); int con_get_cmap(unsigned char __user *cmap); void scrollback(struct vc_data *vc); void scrollfront(struct vc_data *vc, int lines); void clear_buffer_attributes(struct vc_data *vc); void update_region(struct vc_data *vc, unsigned long start, int count); void redraw_screen(struct vc_data *vc, int is_switch); #define update_screen(x) redraw_screen(x, 0) #define switch_screen(x) redraw_screen(x, 1) struct tty_struct; int tioclinux(struct tty_struct *tty, unsigned long arg); #ifdef CONFIG_CONSOLE_TRANSLATIONS /* consolemap.c */ struct unipair; int con_set_trans_old(unsigned char __user * table); int con_get_trans_old(unsigned char __user * table); int con_set_trans_new(unsigned short __user * table); int con_get_trans_new(unsigned short __user * table); int con_clear_unimap(struct vc_data *vc); int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list); int con_get_unimap(struct vc_data *vc, ushort ct, ushort __user *uct, struct unipair __user *list); int con_set_default_unimap(struct vc_data *vc); void con_free_unimap(struct vc_data *vc); int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc); #define vc_translate(vc, c) ((vc)->vc_translate[(c) | \ ((vc)->vc_toggle_meta ? 0x80 : 0)]) #else static inline int con_set_trans_old(unsigned char __user *table) { return 0; } static inline int con_get_trans_old(unsigned char __user *table) { return -EINVAL; } static inline int con_set_trans_new(unsigned short __user *table) { return 0; } static inline int con_get_trans_new(unsigned short __user *table) { return -EINVAL; } static inline int con_clear_unimap(struct vc_data *vc) { return 0; } static inline int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list) { return 0; } static inline int con_get_unimap(struct vc_data *vc, ushort ct, ushort __user *uct, struct unipair __user *list) { return -EINVAL; } static inline int con_set_default_unimap(struct vc_data *vc) { return 0; } static inline void con_free_unimap(struct vc_data *vc) { } static inline void con_protect_unimap(struct vc_data *vc, int rdonly) { } static inline int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc) { return 0; } #define vc_translate(vc, c) (c) #endif /* vt.c */ void vt_event_post(unsigned int event, unsigned int old, unsigned int new); int vt_waitactive(int n); void change_console(struct vc_data *new_vc); void reset_vc(struct vc_data *vc); extern int do_unbind_con_driver(const struct consw *csw, int first, int last, int deflt); int vty_init(const struct file_operations *console_fops); static inline bool vt_force_oops_output(struct vc_data *vc) { if (oops_in_progress && vc->vc_panic_force_write && panic_timeout >= 0) return true; return false; } extern bool vt_dont_switch; extern int default_utf8; extern int global_cursor_default; struct vt_spawn_console { spinlock_t lock; struct pid *pid; int sig; }; extern struct vt_spawn_console vt_spawn_con; extern int vt_move_to_console(unsigned int vt, int alloc); /* Interfaces for VC notification of character events (for accessibility etc) */ struct vt_notifier_param { struct vc_data *vc; /* VC on which the update happened */ unsigned int c; /* Printed char */ }; extern int register_vt_notifier(struct notifier_block *nb); extern int unregister_vt_notifier(struct notifier_block *nb); extern void hide_boot_cursor(bool hide); /* keyboard provided interfaces */ extern int vt_do_diacrit(unsigned int cmd, void __user *up, int eperm); extern int vt_do_kdskbmode(int console, unsigned int arg); extern int vt_do_kdskbmeta(int console, unsigned int arg); extern int vt_do_kbkeycode_ioctl(int cmd, struct kbkeycode __user *user_kbkc, int perm); extern int vt_do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, int console); extern int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm); extern int vt_do_kdskled(int console, int cmd, unsigned long arg, int perm); extern int vt_do_kdgkbmode(int console); extern int vt_do_kdgkbmeta(int console); extern void vt_reset_unicode(int console); extern int vt_get_shift_state(void); extern void vt_reset_keyboard(int console); extern int vt_get_leds(int console, int flag); extern int vt_get_kbd_mode_bit(int console, int bit); extern void vt_set_kbd_mode_bit(int console, int bit); extern void vt_clr_kbd_mode_bit(int console, int bit); extern void vt_set_led_state(int console, int leds); extern void vt_set_led_state(int console, int leds); extern void vt_kbd_con_start(int console); extern void vt_kbd_con_stop(int console); void vc_scrolldelta_helper(struct vc_data *c, int lines, unsigned int rolled_over, void *_base, unsigned int size); #endif /* _VT_KERN_H */
762 763 833 832 447 46 44 46 1449 633 1451 12 12 788 128 789 771 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 /* * net/l3mdev/l3mdev.c - L3 master device implementation * Copyright (c) 2015 Cumulus Networks * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ #include <linux/netdevice.h> #include <net/fib_rules.h> #include <net/l3mdev.h> /** * l3mdev_master_ifindex - get index of L3 master device * @dev: targeted interface */ int l3mdev_master_ifindex_rcu(const struct net_device *dev) { int ifindex = 0; if (!dev) return 0; if (netif_is_l3_master(dev)) { ifindex = dev->ifindex; } else if (netif_is_l3_slave(dev)) { struct net_device *master; struct net_device *_dev = (struct net_device *)dev; /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. * list_first_or_null_rcu does not handle a const arg. We aren't * making changes, just want the master device from that list so * typecast to remove the const */ master = netdev_master_upper_dev_get_rcu(_dev); if (master) ifindex = master->ifindex; } return ifindex; } EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu); /** * l3mdev_fib_table - get FIB table id associated with an L3 * master interface * @dev: targeted interface */ u32 l3mdev_fib_table_rcu(const struct net_device *dev) { u32 tb_id = 0; if (!dev) return 0; if (netif_is_l3_master(dev)) { if (dev->l3mdev_ops->l3mdev_fib_table) tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev); } else if (netif_is_l3_slave(dev)) { /* Users of netdev_master_upper_dev_get_rcu need non-const, * but current inet_*type functions take a const */ struct net_device *_dev = (struct net_device *) dev; const struct net_device *master; master = netdev_master_upper_dev_get_rcu(_dev); if (master && master->l3mdev_ops->l3mdev_fib_table) tb_id = master->l3mdev_ops->l3mdev_fib_table(master); } return tb_id; } EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) { struct net_device *dev; u32 tb_id = 0; if (!ifindex) return 0; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) tb_id = l3mdev_fib_table_rcu(dev); rcu_read_unlock(); return tb_id; } EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); /** * l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup */ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; if (fl6->flowi6_oif) { rcu_read_lock(); dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (dev && netif_is_l3_slave(dev)) dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_link_scope_lookup) dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); rcu_read_unlock(); } return dst; } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); /** * l3mdev_fib_rule_match - Determine if flowi references an * L3 master device * @net: network namespace for device index lookup * @fl: flow struct */ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) { struct net_device *dev; int rc = 0; rcu_read_lock(); dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_fib_table) { arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); rc = 1; goto out; } dev = dev_get_by_index_rcu(net, fl->flowi_iif); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_fib_table) { arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); rc = 1; goto out; } out: rcu_read_unlock(); return rc; } void l3mdev_update_flow(struct net *net, struct flowi *fl) { struct net_device *dev; int ifindex; rcu_read_lock(); if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { ifindex = l3mdev_master_ifindex_rcu(dev); if (ifindex) { fl->flowi_oif = ifindex; fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; goto out; } } } if (fl->flowi_iif) { dev = dev_get_by_index_rcu(net, fl->flowi_iif); if (dev) { ifindex = l3mdev_master_ifindex_rcu(dev); if (ifindex) { fl->flowi_iif = ifindex; fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; } } } out: rcu_read_unlock(); } EXPORT_SYMBOL_GPL(l3mdev_update_flow);
11 1 1 1 56 62 17 17 5 3 2 2 5 169 190 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #ifndef __HCI_CORE_H #define __HCI_CORE_H #include <linux/leds.h> #include <linux/rculist.h> #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_sock.h> /* HCI priority */ #define HCI_PRIO_MAX 7 /* HCI maximum id value */ #define HCI_MAX_ID 10000 /* HCI Core structures */ struct inquiry_data { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; __u8 pscan_mode; __u8 dev_class[3]; __le16 clock_offset; __s8 rssi; __u8 ssp_mode; }; struct inquiry_entry { struct list_head all; /* inq_cache.all */ struct list_head list; /* unknown or resolve */ enum { NAME_NOT_KNOWN, NAME_NEEDED, NAME_PENDING, NAME_KNOWN, } name_state; __u32 timestamp; struct inquiry_data data; }; struct discovery_state { int type; enum { DISCOVERY_STOPPED, DISCOVERY_STARTING, DISCOVERY_FINDING, DISCOVERY_RESOLVING, DISCOVERY_STOPPING, } state; struct list_head all; /* All devices found during inquiry */ struct list_head unknown; /* Name state not known */ struct list_head resolve; /* Name needs to be resolved */ __u32 timestamp; bdaddr_t last_adv_addr; u8 last_adv_addr_type; s8 last_adv_rssi; u32 last_adv_flags; u8 last_adv_data[HCI_MAX_AD_LENGTH]; u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; bool limited; s8 rssi; u16 uuid_count; u8 (*uuids)[16]; unsigned long scan_start; unsigned long scan_duration; }; struct hci_conn_hash { struct list_head list; unsigned int acl_num; unsigned int amp_num; unsigned int sco_num; unsigned int le_num; unsigned int le_num_slave; }; struct bdaddr_list { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; }; struct bt_uuid { struct list_head list; u8 uuid[16]; u8 size; u8 svc_hint; }; struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; u8 type; u8 val[16]; }; struct smp_ltk { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; u8 authenticated; u8 type; u8 enc_size; __le16 ediv; __le64 rand; u8 val[16]; }; struct smp_irk { struct list_head list; struct rcu_head rcu; bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; u8 val[16]; }; struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; }; struct oob_data { struct list_head list; bdaddr_t bdaddr; u8 bdaddr_type; u8 present; u8 hash192[16]; u8 rand192[16]; u8 hash256[16]; u8 rand256[16]; }; struct adv_info { struct list_head list; bool pending; __u8 instance; __u32 flags; __u16 timeout; __u16 remaining_time; __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_AD_LENGTH]; __u16 scan_rsp_len; __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; __s8 tx_power; bdaddr_t random_addr; bool rpa_expired; struct delayed_work rpa_expired_cb; }; #define HCI_MAX_ADV_INSTANCES 5 #define HCI_DEFAULT_ADV_DURATION 2 #define HCI_MAX_SHORT_NAME_LENGTH 10 /* Min encryption key size to match with SMP */ #define HCI_MIN_ENC_KEY_SIZE 7 /* Default LE RPA expiry time, 15 minutes */ #define HCI_DEFAULT_RPA_TIMEOUT (15 * 60) /* Default min/max age of connection information (1s/3s) */ #define DEFAULT_CONN_INFO_MIN_AGE 1000 #define DEFAULT_CONN_INFO_MAX_AGE 3000 struct amp_assoc { __u16 len; __u16 offset; __u16 rem_len; __u16 len_so_far; __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; }; #define HCI_MAX_PAGES 3 struct hci_dev { struct list_head list; struct mutex lock; char name[8]; unsigned long flags; __u16 id; __u8 bus; __u8 dev_type; bdaddr_t bdaddr; bdaddr_t setup_addr; bdaddr_t public_addr; bdaddr_t random_addr; bdaddr_t static_addr; __u8 adv_addr_type; __u8 dev_name[HCI_MAX_NAME_LENGTH]; __u8 short_name[HCI_MAX_SHORT_NAME_LENGTH]; __u8 eir[HCI_MAX_EIR_LENGTH]; __u16 appearance; __u8 dev_class[3]; __u8 major_class; __u8 minor_class; __u8 max_page; __u8 features[HCI_MAX_PAGES][8]; __u8 le_features[8]; __u8 le_white_list_size; __u8 le_resolv_list_size; __u8 le_num_of_adv_sets; __u8 le_states[8]; __u8 commands[64]; __u8 hci_ver; __u16 hci_rev; __u8 lmp_ver; __u16 manufacturer; __u16 lmp_subver; __u16 voice_setting; __u8 num_iac; __u8 stored_max_keys; __u8 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; __u16 page_scan_interval; __u16 page_scan_window; __u8 page_scan_type; __u8 le_adv_channel_map; __u16 le_adv_min_interval; __u16 le_adv_max_interval; __u8 le_scan_type; __u16 le_scan_interval; __u16 le_scan_window; __u16 le_conn_min_interval; __u16 le_conn_max_interval; __u16 le_conn_latency; __u16 le_supv_timeout; __u16 le_def_tx_len; __u16 le_def_tx_time; __u16 le_max_tx_len; __u16 le_max_tx_time; __u16 le_max_rx_len; __u16 le_max_rx_time; __u8 le_max_key_size; __u8 le_min_key_size; __u16 discov_interleaved_timeout; __u16 conn_info_min_age; __u16 conn_info_max_age; __u8 ssp_debug_mode; __u8 hw_error_code; __u32 clock; __u16 devid_source; __u16 devid_vendor; __u16 devid_product; __u16 devid_version; __u16 pkt_type; __u16 esco_type; __u16 link_policy; __u16 link_mode; __u32 idle_timeout; __u16 sniff_min_interval; __u16 sniff_max_interval; __u8 amp_status; __u32 amp_total_bw; __u32 amp_max_bw; __u32 amp_min_latency; __u32 amp_max_pdu; __u8 amp_type; __u16 amp_pal_cap; __u16 amp_assoc_size; __u32 amp_max_flush_to; __u32 amp_be_flush_to; struct amp_assoc loc_assoc; __u8 flow_ctl_mode; unsigned int auto_accept_delay; unsigned long quirks; atomic_t cmd_cnt; unsigned int acl_cnt; unsigned int sco_cnt; unsigned int le_cnt; unsigned int acl_mtu; unsigned int sco_mtu; unsigned int le_mtu; unsigned int acl_pkts; unsigned int sco_pkts; unsigned int le_pkts; __u16 block_len; __u16 block_mtu; __u16 num_blocks; __u16 block_cnt; unsigned long acl_last_tx; unsigned long sco_last_tx; unsigned long le_last_tx; __u8 le_tx_def_phys; __u8 le_rx_def_phys; struct workqueue_struct *workqueue; struct workqueue_struct *req_workqueue; struct work_struct power_on; struct delayed_work power_off; struct work_struct error_reset; __u16 discov_timeout; struct delayed_work discov_off; struct delayed_work service_cache; struct delayed_work cmd_timer; struct work_struct rx_work; struct work_struct cmd_work; struct work_struct tx_work; struct work_struct discov_update; struct work_struct bg_scan_update; struct work_struct scan_update; struct work_struct connectable_update; struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; struct sk_buff_head rx_q; struct sk_buff_head raw_q; struct sk_buff_head cmd_q; struct sk_buff *sent_cmd; struct mutex req_lock; wait_queue_head_t req_wait_q; __u32 req_status; __u32 req_result; struct sk_buff *req_skb; void *smp_data; void *smp_bredr_data; struct discovery_state discovery; struct hci_conn_hash conn_hash; struct list_head mgmt_pending; struct list_head blacklist; struct list_head whitelist; struct list_head uuids; struct list_head link_keys; struct list_head long_term_keys; struct list_head identity_resolving_keys; struct list_head remote_oob_data; struct list_head le_white_list; struct list_head le_resolv_list; struct list_head le_conn_params; struct list_head pend_le_conns; struct list_head pend_le_reports; struct hci_dev_stats stat; atomic_t promisc; const char *hw_info; const char *fw_info; struct dentry *debugfs; struct device dev; struct rfkill *rfkill; DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); __s8 adv_tx_power; __u8 adv_data[HCI_MAX_AD_LENGTH]; __u8 adv_data_len; __u8 scan_rsp_data[HCI_MAX_AD_LENGTH]; __u8 scan_rsp_data_len; struct list_head adv_instances; unsigned int adv_instance_cnt; __u8 cur_adv_instance; __u16 adv_instance_timeout; struct delayed_work adv_instance_expire; __u8 irk[16]; __u32 rpa_timeout; struct delayed_work rpa_expired; bdaddr_t rpa; #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); int (*flush)(struct hci_dev *hdev); int (*setup)(struct hci_dev *hdev); int (*shutdown)(struct hci_dev *hdev); int (*send)(struct hci_dev *hdev, struct sk_buff *skb); void (*notify)(struct hci_dev *hdev, unsigned int evt); void (*hw_error)(struct hci_dev *hdev, u8 code); int (*post_init)(struct hci_dev *hdev); int (*set_diag)(struct hci_dev *hdev, bool enable); int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr); }; #define HCI_PHY_HANDLE(handle) (handle & 0xff) struct hci_conn { struct list_head list; atomic_t refcnt; bdaddr_t dst; __u8 dst_type; bdaddr_t src; __u8 src_type; bdaddr_t init_addr; __u8 init_addr_type; bdaddr_t resp_addr; __u8 resp_addr_type; __u16 handle; __u16 state; __u8 mode; __u8 type; __u8 role; bool out; __u8 attempt; __u8 dev_class[3]; __u8 features[HCI_MAX_PAGES][8]; __u16 pkt_type; __u16 link_policy; __u8 key_type; __u8 auth_type; __u8 sec_level; __u8 pending_sec_level; __u8 pin_length; __u8 enc_key_size; __u8 io_capability; __u32 passkey_notify; __u8 passkey_entered; __u16 disc_timeout; __u16 conn_timeout; __u16 setting; __u16 le_conn_min_interval; __u16 le_conn_max_interval; __u16 le_conn_interval; __u16 le_conn_latency; __u16 le_supv_timeout; __u8 le_adv_data[HCI_MAX_AD_LENGTH]; __u8 le_adv_data_len; __s8 rssi; __s8 tx_power; __s8 max_tx_power; unsigned long flags; __u32 clock; __u16 clock_accuracy; unsigned long conn_info_timestamp; __u8 remote_cap; __u8 remote_auth; __u8 remote_id; unsigned int sent; struct sk_buff_head data_q; struct list_head chan_list; struct delayed_work disc_work; struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; struct hci_dev *hdev; void *l2cap_data; void *sco_data; struct amp_mgr *amp_mgr; struct hci_conn *link; void (*connect_cfm_cb) (struct hci_conn *conn, u8 status); void (*security_cfm_cb) (struct hci_conn *conn, u8 status); void (*disconn_cfm_cb) (struct hci_conn *conn, u8 reason); }; struct hci_chan { struct list_head list; __u16 handle; struct hci_conn *conn; struct sk_buff_head data_q; unsigned int sent; __u8 state; bool amp; }; struct hci_conn_params { struct list_head list; struct list_head action; bdaddr_t addr; u8 addr_type; u16 conn_min_interval; u16 conn_max_interval; u16 conn_latency; u16 supervision_timeout; enum { HCI_AUTO_CONN_DISABLED, HCI_AUTO_CONN_REPORT, HCI_AUTO_CONN_DIRECT, HCI_AUTO_CONN_ALWAYS, HCI_AUTO_CONN_LINK_LOSS, HCI_AUTO_CONN_EXPLICIT, } auto_connect; struct hci_conn *conn; bool explicit_connect; }; extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) #define hci_dev_change_flag(hdev, nr) change_bit((nr), (hdev)->dev_flags) #define hci_dev_test_flag(hdev, nr) test_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_set_flag(hdev, nr) test_and_set_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_volatile_flags(hdev) \ do { \ hci_dev_clear_flag(hdev, HCI_LE_SCAN); \ hci_dev_clear_flag(hdev, HCI_LE_ADV); \ hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ } while (0) /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); #if IS_ENABLED(CONFIG_BT_BREDR) int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb); #else static inline int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) { return 0; } static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) { } #endif /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ #define INQUIRY_ENTRY_AGE_MAX (HZ*60) /* 60 seconds */ static inline void discovery_init(struct hci_dev *hdev) { hdev->discovery.state = DISCOVERY_STOPPED; INIT_LIST_HEAD(&hdev->discovery.all); INIT_LIST_HEAD(&hdev->discovery.unknown); INIT_LIST_HEAD(&hdev->discovery.resolve); hdev->discovery.report_invalid_rssi = true; hdev->discovery.rssi = HCI_RSSI_INVALID; } static inline void hci_discovery_filter_clear(struct hci_dev *hdev) { hdev->discovery.result_filtering = false; hdev->discovery.report_invalid_rssi = true; hdev->discovery.rssi = HCI_RSSI_INVALID; hdev->discovery.uuid_count = 0; kfree(hdev->discovery.uuids); hdev->discovery.uuids = NULL; hdev->discovery.scan_start = 0; hdev->discovery.scan_duration = 0; } bool hci_discovery_active(struct hci_dev *hdev); void hci_discovery_set_state(struct hci_dev *hdev, int state); static inline int inquiry_cache_empty(struct hci_dev *hdev) { return list_empty(&hdev->discovery.all); } static inline long inquiry_cache_age(struct hci_dev *hdev) { struct discovery_state *c = &hdev->discovery; return jiffies - c->timestamp; } static inline long inquiry_entry_age(struct inquiry_entry *e) { return jiffies - e->timestamp; } struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr); struct inquiry_entry *hci_inquiry_cache_lookup_unknown(struct hci_dev *hdev, bdaddr_t *bdaddr); struct inquiry_entry *hci_inquiry_cache_lookup_resolve(struct hci_dev *hdev, bdaddr_t *bdaddr, int state); void hci_inquiry_cache_update_resolve(struct hci_dev *hdev, struct inquiry_entry *ie); u32 hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data, bool name_known); void hci_inquiry_cache_flush(struct hci_dev *hdev); /* ----- HCI Connections ----- */ enum { HCI_CONN_AUTH_PEND, HCI_CONN_REAUTH_PEND, HCI_CONN_ENCRYPT_PEND, HCI_CONN_RSWITCH_PEND, HCI_CONN_MODE_CHANGE_PEND, HCI_CONN_SCO_SETUP_PEND, HCI_CONN_MGMT_CONNECTED, HCI_CONN_SSP_ENABLED, HCI_CONN_SC_ENABLED, HCI_CONN_AES_CCM, HCI_CONN_POWER_SAVE, HCI_CONN_FLUSH_KEY, HCI_CONN_ENCRYPT, HCI_CONN_AUTH, HCI_CONN_SECURE, HCI_CONN_FIPS, HCI_CONN_STK_ENCRYPT, HCI_CONN_AUTH_INITIATOR, HCI_CONN_DROP, HCI_CONN_PARAM_REMOVAL_PEND, HCI_CONN_NEW_LINK_KEY, HCI_CONN_SCANNING, HCI_CONN_AUTH_FAILURE, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; return hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && test_bit(HCI_CONN_SSP_ENABLED, &conn->flags); } static inline bool hci_conn_sc_enabled(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; return hci_dev_test_flag(hdev, HCI_SC_ENABLED) && test_bit(HCI_CONN_SC_ENABLED, &conn->flags); } static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) { struct hci_conn_hash *h = &hdev->conn_hash; list_add_rcu(&c->list, &h->list); switch (c->type) { case ACL_LINK: h->acl_num++; break; case AMP_LINK: h->amp_num++; break; case LE_LINK: h->le_num++; if (c->role == HCI_ROLE_SLAVE) h->le_num_slave++; break; case SCO_LINK: case ESCO_LINK: h->sco_num++; break; } } static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) { struct hci_conn_hash *h = &hdev->conn_hash; list_del_rcu(&c->list); synchronize_rcu(); switch (c->type) { case ACL_LINK: h->acl_num--; break; case AMP_LINK: h->amp_num--; break; case LE_LINK: h->le_num--; if (c->role == HCI_ROLE_SLAVE) h->le_num_slave--; break; case SCO_LINK: case ESCO_LINK: h->sco_num--; break; } } static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) { struct hci_conn_hash *h = &hdev->conn_hash; switch (type) { case ACL_LINK: return h->acl_num; case AMP_LINK: return h->amp_num; case LE_LINK: return h->le_num; case SCO_LINK: case ESCO_LINK: return h->sco_num; default: return 0; } } static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; return c->acl_num + c->amp_num + c->sco_num + c->le_num; } static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; __u8 type = INVALID_LINK; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->handle == handle) { type = c->type; break; } } rcu_read_unlock(); return type; } static inline struct hci_conn *hci_conn_hash_lookup_handle(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->handle == handle) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_conn_hash_lookup_ba(struct hci_dev *hdev, __u8 type, bdaddr_t *ba) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type == type && !bacmp(&c->dst, ba)) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev, bdaddr_t *ba, __u8 ba_type) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type != LE_LINK) continue; if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, __u8 type, __u16 state) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type == type && c->state == state) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type == LE_LINK && c->state == BT_CONNECT && !test_bit(HCI_CONN_SCANNING, &c->flags)) { rcu_read_unlock(); return c; } } rcu_read_unlock(); return NULL; } int hci_disconnect(struct hci_conn *conn, __u8 reason); bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role); int hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); struct hci_chan *hci_chan_create(struct hci_conn *conn); void hci_chan_del(struct hci_chan *chan); void hci_chan_list_flush(struct hci_conn *conn); struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, u8 role, bdaddr_t *direct_rpa); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, __u16 setting); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, bool initiator); int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); void hci_le_conn_failed(struct hci_conn *conn, u8 status); /* * hci_conn_get() and hci_conn_put() are used to control the life-time of an * "hci_conn" object. They do not guarantee that the hci_conn object is running, * working or anything else. They just guarantee that the object is available * and can be dereferenced. So you can use its locks, local variables and any * other constant data. * Before accessing runtime data, you _must_ lock the object and then check that * it is still running. As soon as you release the locks, the connection might * get dropped, though. * * On the other hand, hci_conn_hold() and hci_conn_drop() are used to control * how long the underlying connection is held. So every channel that runs on the * hci_conn object calls this to prevent the connection from disappearing. As * long as you hold a device, you must also guarantee that you have a valid * reference to the device via hci_conn_get() (or the initial reference from * hci_conn_add()). * The hold()/drop() ref-count is known to drop below 0 sometimes, which doesn't * break because nobody cares for that. But this means, we cannot use * _get()/_drop() in it, but require the caller to have a valid ref (FIXME). */ static inline struct hci_conn *hci_conn_get(struct hci_conn *conn) { get_device(&conn->dev); return conn; } static inline void hci_conn_put(struct hci_conn *conn) { put_device(&conn->dev); } static inline void hci_conn_hold(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); atomic_inc(&conn->refcnt); cancel_delayed_work(&conn->disc_work); } static inline void hci_conn_drop(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); if (atomic_dec_and_test(&conn->refcnt)) { unsigned long timeo; switch (conn->type) { case ACL_LINK: case LE_LINK: cancel_delayed_work(&conn->idle_work); if (conn->state == BT_CONNECTED) { timeo = conn->disc_timeout; if (!conn->out) timeo *= 2; } else { timeo = 0; } break; case AMP_LINK: timeo = conn->disc_timeout; break; default: timeo = 0; break; } cancel_delayed_work(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, &conn->disc_work, timeo); } } /* ----- HCI Devices ----- */ static inline void hci_dev_put(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, kref_read(&d->dev.kobj.kref)); put_device(&d->dev); } static inline struct hci_dev *hci_dev_hold(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, kref_read(&d->dev.kobj.kref)); get_device(&d->dev); return d; } #define hci_dev_lock(d) mutex_lock(&d->lock) #define hci_dev_unlock(d) mutex_unlock(&d->lock) #define to_hci_dev(d) container_of(d, struct hci_dev, dev) #define to_hci_conn(c) container_of(c, struct hci_conn, dev) static inline void *hci_get_drvdata(struct hci_dev *hdev) { return dev_get_drvdata(&hdev->dev); } static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) { dev_set_drvdata(&hdev->dev, data); } struct hci_dev *hci_dev_get(int index); struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); __printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); __printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); int hci_dev_reset(__u16 dev); int hci_dev_reset_stat(__u16 dev); int hci_dev_cmd(unsigned int cmd, void __user *arg); int hci_get_dev_list(void __user *arg); int hci_get_dev_info(void __user *arg); int hci_get_conn_list(void __user *arg); int hci_get_conn_info(struct hci_dev *hdev, void __user *arg); int hci_get_auth_info(struct hci_dev *hdev, void __user *arg); int hci_inquiry(void __user *arg); struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type); int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type); void hci_bdaddr_list_clear(struct list_head *list); struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_clear_disabled(struct hci_dev *hdev); struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type); void hci_uuids_clear(struct hci_dev *hdev); void hci_link_keys_clear(struct hci_dev *hdev); struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len, bool *persistent); struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 type, u8 authenticated, u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand); struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 role); int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); void hci_smp_ltks_clear(struct hci_dev *hdev); int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa); struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type); struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 val[16], bdaddr_t *rpa); void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type); void hci_smp_irks_clear(struct hci_dev *hdev); bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type); void hci_remote_oob_data_clear(struct hci_dev *hdev); struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 *hash192, u8 *rand192, u8 *hash256, u8 *rand256); int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type); void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, u16 timeout, u16 duration); int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); void hci_init_sysfs(struct hci_dev *hdev); void hci_conn_init_sysfs(struct hci_conn *conn); void hci_conn_add_sysfs(struct hci_conn *conn); void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev)) /* ----- LMP capabilities ----- */ #define lmp_encrypt_capable(dev) ((dev)->features[0][0] & LMP_ENCRYPT) #define lmp_rswitch_capable(dev) ((dev)->features[0][0] & LMP_RSWITCH) #define lmp_hold_capable(dev) ((dev)->features[0][0] & LMP_HOLD) #define lmp_sniff_capable(dev) ((dev)->features[0][0] & LMP_SNIFF) #define lmp_park_capable(dev) ((dev)->features[0][1] & LMP_PARK) #define lmp_inq_rssi_capable(dev) ((dev)->features[0][3] & LMP_RSSI_INQ) #define lmp_esco_capable(dev) ((dev)->features[0][3] & LMP_ESCO) #define lmp_bredr_capable(dev) (!((dev)->features[0][4] & LMP_NO_BREDR)) #define lmp_le_capable(dev) ((dev)->features[0][4] & LMP_LE) #define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR) #define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC) #define lmp_ext_inq_capable(dev) ((dev)->features[0][6] & LMP_EXT_INQ) #define lmp_le_br_capable(dev) (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR)) #define lmp_ssp_capable(dev) ((dev)->features[0][6] & LMP_SIMPLE_PAIR) #define lmp_no_flush_capable(dev) ((dev)->features[0][6] & LMP_NO_FLUSH) #define lmp_lsto_capable(dev) ((dev)->features[0][7] & LMP_LSTO) #define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR) #define lmp_ext_feat_capable(dev) ((dev)->features[0][7] & LMP_EXTFEATURES) #define lmp_transp_capable(dev) ((dev)->features[0][2] & LMP_TRANSPARENT) #define lmp_edr_2m_capable(dev) ((dev)->features[0][3] & LMP_EDR_2M) #define lmp_edr_3m_capable(dev) ((dev)->features[0][3] & LMP_EDR_3M) #define lmp_edr_3slot_capable(dev) ((dev)->features[0][4] & LMP_EDR_3SLOT) #define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT) /* ----- Extended LMP capabilities ----- */ #define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER) #define lmp_csb_slave_capable(dev) ((dev)->features[2][0] & LMP_CSB_SLAVE) #define lmp_sync_train_capable(dev) ((dev)->features[2][0] & LMP_SYNC_TRAIN) #define lmp_sync_scan_capable(dev) ((dev)->features[2][0] & LMP_SYNC_SCAN) #define lmp_sc_capable(dev) ((dev)->features[2][1] & LMP_SC) #define lmp_ping_capable(dev) ((dev)->features[2][1] & LMP_PING) /* ----- Host capabilities ----- */ #define lmp_host_ssp_capable(dev) ((dev)->features[1][0] & LMP_HOST_SSP) #define lmp_host_sc_capable(dev) ((dev)->features[1][0] & LMP_HOST_SC) #define lmp_host_le_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE)) #define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR)) #define hdev_is_powered(dev) (test_bit(HCI_UP, &(dev)->flags) && \ !hci_dev_test_flag(dev, HCI_AUTO_OFF)) #define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \ hci_dev_test_flag(dev, HCI_SC_ENABLED)) #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M)) #define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M)) #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ ((dev)->commands[37] & 0x40)) /* Use ext create connection if command is supported */ #define use_ext_conn(dev) ((dev)->commands[37] & 0x80) /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type, __u8 *flags) { switch (type) { case ACL_LINK: return l2cap_connect_ind(hdev, bdaddr); case SCO_LINK: case ESCO_LINK: return sco_connect_ind(hdev, bdaddr, flags); default: BT_ERR("unknown link type %d", type); return -EINVAL; } } static inline int hci_proto_disconn_ind(struct hci_conn *conn) { if (conn->type != ACL_LINK && conn->type != LE_LINK) return HCI_ERROR_REMOTE_USER_TERM; return l2cap_disconn_ind(conn); } /* ----- HCI callbacks ----- */ struct hci_cb { struct list_head list; char *name; void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); } mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); } static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); } mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; __u8 encrypt; if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) return; encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->security_cfm) cb->security_cfm(conn, status, encrypt); } mutex_unlock(&hci_cb_list_lock); if (conn->security_cfm_cb) conn->security_cfm_cb(conn, status); } static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { if (!status) conn->state = BT_CONNECTED; hci_connect_cfm(conn, status); hci_conn_drop(conn); return; } if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) encrypt = 0x00; else if (test_bit(HCI_CONN_AES_CCM, &conn->flags)) encrypt = 0x02; else encrypt = 0x01; if (!status) { if (conn->sec_level == BT_SECURITY_SDP) conn->sec_level = BT_SECURITY_LOW; if (conn->pending_sec_level > conn->sec_level) conn->sec_level = conn->pending_sec_level; } mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->security_cfm) cb->security_cfm(conn, status, encrypt); } mutex_unlock(&hci_cb_list_lock); if (conn->security_cfm_cb) conn->security_cfm_cb(conn, status); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); } mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { struct hci_cb *cb; mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); } mutex_unlock(&hci_cb_list_lock); } static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, size_t *data_len) { size_t parsed = 0; if (eir_len < 2) return NULL; while (parsed < eir_len - 1) { u8 field_len = eir[0]; if (field_len == 0) break; parsed += field_len + 1; if (parsed > eir_len) break; if (eir[1] != type) { eir += field_len + 1; continue; } /* Zero length data */ if (field_len == 1) return NULL; if (data_len) *data_len = field_len - 1; return &eir[2]; } return NULL; } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) { if (addr_type != ADDR_LE_DEV_RANDOM) return false; if ((bdaddr->b[5] & 0xc0) == 0x40) return true; return false; } static inline bool hci_is_identity_address(bdaddr_t *addr, u8 addr_type) { if (addr_type == ADDR_LE_DEV_PUBLIC) return true; /* Check for Random Static address type */ if ((addr->b[5] & 0xc0) == 0xc0) return true; return false; } static inline struct smp_irk *hci_get_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) { if (!hci_bdaddr_is_rpa(bdaddr, addr_type)) return NULL; return hci_find_irk_by_rpa(hdev, bdaddr); } static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, u16 to_multiplier) { u16 max_latency; if (min > max || min < 6 || max > 3200) return -EINVAL; if (to_multiplier < 10 || to_multiplier > 3200) return -EINVAL; if (max >= to_multiplier * 8) return -EINVAL; max_latency = (to_multiplier * 4 / max) - 1; if (latency > 499 || latency > max_latency) return -EINVAL; return 0; } int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout); int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk); void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, void *data, u16 data_len, ktime_t tstamp, int flag, struct sock *skip_sk); void hci_sock_dev_event(struct hci_dev *hdev, int event); #define HCI_MGMT_VAR_LEN BIT(0) #define HCI_MGMT_NO_HDEV BIT(1) #define HCI_MGMT_UNTRUSTED BIT(2) #define HCI_MGMT_UNCONFIGURED BIT(3) struct hci_mgmt_handler { int (*func) (struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len); size_t data_len; unsigned long flags; }; struct hci_mgmt_chan { struct list_head list; unsigned short channel; size_t handler_count; const struct hci_mgmt_handler *handlers; void (*hdev_init) (struct sock *sk, struct hci_dev *hdev); }; int hci_mgmt_chan_register(struct hci_mgmt_chan *c); void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); /* Management interface */ #define DISCOV_TYPE_BREDR (BIT(BDADDR_BREDR)) #define DISCOV_TYPE_LE (BIT(BDADDR_LE_PUBLIC) | \ BIT(BDADDR_LE_RANDOM)) #define DISCOV_TYPE_INTERLEAVED (BIT(BDADDR_BREDR) | \ BIT(BDADDR_LE_PUBLIC) | \ BIT(BDADDR_LE_RANDOM)) /* These LE scan and inquiry parameters were chosen according to LE General * Discovery Procedure specification. */ #define DISCOV_LE_SCAN_WIN 0x12 #define DISCOV_LE_SCAN_INT 0x12 #define DISCOV_LE_TIMEOUT 10240 /* msec */ #define DISCOV_INTERLEAVED_TIMEOUT 5120 /* msec */ #define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04 #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); void mgmt_power_on(struct hci_dev *hdev, int err); void __mgmt_power_off(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u32 flags, u8 *name, u8 name_len); void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 reason, bool mgmt_connected); void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure); void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status); void mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status); int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u32 value, u8 confirm_hint); int mgmt_user_confirm_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_confirm_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_passkey_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type); int mgmt_user_passkey_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status); int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u32 passkey, u8 entered); void mgmt_auth_failed(struct hci_conn *conn, u8 status); void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status); void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); void mgmt_discovering(struct hci_dev *hdev, u8 discovering); bool mgmt_powering_down(struct hci_dev *hdev); void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent); void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent); void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, bool persistent); void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 store_hint, u16 min_interval, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, __u8 ltk[16], __u8 key_size); void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type); #define SCO_AIRMODE_MASK 0x0003 #define SCO_AIRMODE_CVSD 0x0000 #define SCO_AIRMODE_TRANSP 0x0003 #endif /* __HCI_CORE_H */
5477 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 // SPDX-License-Identifier: GPL-2.0 /* * Detect hard and soft lockups on a system * * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. * * Note: Most of this code is borrowed heavily from the original softlockup * detector, so thanks to Ingo for the initial implementation. * Some chunks also taken from the old x86-specific nmi watchdog code, thanks * to those contributors as well. */ #define pr_fmt(fmt) "watchdog: " fmt #include <linux/mm.h> #include <linux/cpu.h> #include <linux/nmi.h> #include <linux/init.h> #include <linux/module.h> #include <linux/sysctl.h> #include <linux/tick.h> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/isolation.h> #include <linux/stop_machine.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> static DEFINE_MUTEX(watchdog_mutex); #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED) # define NMI_WATCHDOG_DEFAULT 1 #else # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED) # define NMI_WATCHDOG_DEFAULT 0 #endif unsigned long __read_mostly watchdog_enabled; int __read_mostly watchdog_user_enabled = 1; int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; int __read_mostly soft_watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; int __read_mostly nmi_watchdog_available; struct cpumask watchdog_allowed_mask __read_mostly; struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); #ifdef CONFIG_HARDLOCKUP_DETECTOR /* * Should we panic when a soft-lockup or hard-lockup occurs: */ unsigned int __read_mostly hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these * cases this function can be called to disable hard lockup detection. This * function should only be executed once by the boot processor before the * kernel command line parameters are parsed, because otherwise it is not * possible to override this in hardlockup_panic_setup(). */ void __init hardlockup_detector_disable(void) { nmi_watchdog_user_enabled = 0; } static int __init hardlockup_panic_setup(char *str) { if (!strncmp(str, "panic", 5)) hardlockup_panic = 1; else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) nmi_watchdog_user_enabled = 0; else if (!strncmp(str, "1", 1)) nmi_watchdog_user_enabled = 1; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); # ifdef CONFIG_SMP int __read_mostly sysctl_hardlockup_all_cpu_backtrace; static int __init hardlockup_all_cpu_backtrace_setup(char *str) { sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); return 1; } __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); # endif /* CONFIG_SMP */ #endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* * These functions can be overridden if an architecture implements its * own hardlockup detector. * * watchdog_nmi_enable/disable can be implemented to start and stop when * softlockup watchdog threads start and stop. The arch must select the * SOFTLOCKUP_DETECTOR Kconfig. */ int __weak watchdog_nmi_enable(unsigned int cpu) { hardlockup_detector_perf_enable(); return 0; } void __weak watchdog_nmi_disable(unsigned int cpu) { hardlockup_detector_perf_disable(); } /* Return 0, if a NMI watchdog is available. Error code otherwise */ int __weak __init watchdog_nmi_probe(void) { return hardlockup_detector_perf_init(); } /** * watchdog_nmi_stop - Stop the watchdog for reconfiguration * * The reconfiguration steps are: * watchdog_nmi_stop(); * update_variables(); * watchdog_nmi_start(); */ void __weak watchdog_nmi_stop(void) { } /** * watchdog_nmi_start - Start the watchdog after reconfiguration * * Counterpart to watchdog_nmi_stop(). * * The following variables have been updated in update_variables() and * contain the currently valid configuration: * - watchdog_enabled * - watchdog_thresh * - watchdog_cpumask */ void __weak watchdog_nmi_start(void) { } /** * lockup_detector_update_enable - Update the sysctl enable bit * * Caller needs to make sure that the NMI/perf watchdogs are off, so this * can't race with watchdog_nmi_disable(). */ static void lockup_detector_update_enable(void) { watchdog_enabled = 0; if (!watchdog_user_enabled) return; if (nmi_watchdog_available && nmi_watchdog_user_enabled) watchdog_enabled |= NMI_WATCHDOG_ENABLED; if (soft_watchdog_user_enabled) watchdog_enabled |= SOFT_WATCHDOG_ENABLED; } #ifdef CONFIG_SOFTLOCKUP_DETECTOR #define SOFTLOCKUP_RESET ULONG_MAX /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; static bool softlockup_initialized __read_mostly; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, soft_watchdog_warn); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static unsigned long soft_lockup_nmi_warn; static int __init softlockup_panic_setup(char *str) { softlockup_panic = simple_strtoul(str, NULL, 0); return 1; } __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { watchdog_user_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); static int __init nosoftlockup_setup(char *str) { soft_watchdog_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); #ifdef CONFIG_SMP int __read_mostly sysctl_softlockup_all_cpu_backtrace; static int __init softlockup_all_cpu_backtrace_setup(char *str) { sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); #endif static void __lockup_detector_cleanup(void); /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- * lockups can have false positives under extreme conditions. So we generally * want a higher threshold for soft lockups than for hard lockups. So we couple * the thresholds with a factor: we make the soft threshold twice the amount of * time the hard threshold is. */ static int get_softlockup_thresh(void) { return watchdog_thresh * 2; } /* * Returns seconds, approximately. We don't need nanosecond * resolution, and we don't need to waste time with a big divide when * 2^30ns == 1.074s. */ static unsigned long get_timestamp(void) { return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ } static void set_sample_period(void) { /* * convert watchdog_thresh from seconds to ns * the divide by 5 is to give hrtimer several chances (two * or three with the current relation between the soft * and hard thresholds) to increment before the * hardlockup detector generates a warning */ sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); watchdog_update_hrtimer_threshold(sample_period); } /* Commands for resetting the watchdog */ static void __touch_watchdog(void) { __this_cpu_write(watchdog_touch_ts, get_timestamp()); } /** * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls * * Call when the scheduler may have stalled for legitimate reasons * preventing the watchdog task from executing - e.g. the scheduler * entering idle state. This should only be used for scheduler events. * Use touch_softlockup_watchdog() for everything else. */ notrace void touch_softlockup_watchdog_sched(void) { /* * Preemption can be enabled. It doesn't matter which CPU's timestamp * gets zeroed here, so use the raw_ operation. */ raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } notrace void touch_softlockup_watchdog(void) { touch_softlockup_watchdog_sched(); wq_watchdog_touch(raw_smp_processor_id()); } EXPORT_SYMBOL(touch_softlockup_watchdog); void touch_all_softlockup_watchdogs(void) { int cpu; /* * watchdog_mutex cannpt be taken here, as this might be called * from (soft)interrupt context, so the access to * watchdog_allowed_cpumask might race with a concurrent update. * * The watchdog time stamp can race against a concurrent real * update as well, the only side effect might be a cycle delay for * the softlockup check. */ for_each_cpu(cpu, &watchdog_allowed_mask) per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET; wq_watchdog_touch(-1); } void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(); if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){ /* Warn about unreasonable delays. */ if (time_after(now, touch_ts + get_softlockup_thresh())) return now - touch_ts; } return 0; } /* watchdog detector functions */ bool is_hardlockup(void) { unsigned long hrint = __this_cpu_read(hrtimer_interrupts); if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) return true; __this_cpu_write(hrtimer_interrupts_saved, hrint); return false; } static void watchdog_interrupt_count(void) { __this_cpu_inc(hrtimer_interrupts); } static DEFINE_PER_CPU(struct completion, softlockup_completion); static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); /* * The watchdog thread function - touches the timestamp. * * It only runs once every sample_period seconds (4 seconds by * default) to reset the softlockup timestamp. If this gets delayed * for more than 2*watchdog_thresh seconds then the debug-printout * triggers in watchdog_timer_fn(). */ static int softlockup_fn(void *data) { __this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_read(hrtimer_interrupts)); __touch_watchdog(); complete(this_cpu_ptr(&softlockup_completion)); return 0; } /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); struct pt_regs *regs = get_irq_regs(); int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; if (!watchdog_enabled) return HRTIMER_NORESTART; /* kick the hardlockup detector */ watchdog_interrupt_count(); /* kick the softlockup detector */ if (completion_done(this_cpu_ptr(&softlockup_completion))) { reinit_completion(this_cpu_ptr(&softlockup_completion)); stop_one_cpu_nowait(smp_processor_id(), softlockup_fn, NULL, this_cpu_ptr(&softlockup_stop_work)); } /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); if (touch_ts == SOFTLOCKUP_RESET) { if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically * make sure the scheduler tick is up to date. */ __this_cpu_write(softlockup_touch_sync, false); sched_clock_tick(); } /* Clear the guest paused flag on watchdog reset */ kvm_check_and_clear_guest_paused(); __touch_watchdog(); return HRTIMER_RESTART; } /* check for a softlockup * This is done by making sure a high priority task is * being scheduled. The task touches the watchdog to * indicate it is getting cpu time. If it hasn't then * this is a good indication some task is hogging the cpu */ duration = is_softlockup(touch_ts); if (unlikely(duration)) { /* * If a virtual machine is stopped by the host it can look to * the watchdog like a soft lockup, check to see if the host * stopped the vm before we issue the warning */ if (kvm_check_and_clear_guest_paused()) return HRTIMER_RESTART; /* only warn once */ if (__this_cpu_read(soft_watchdog_warn) == true) { /* * When multiple processes are causing softlockups the * softlockup detector only warns on the first one * because the code relies on a full quiet cycle to * re-arm. The second process prevents the quiet cycle * and never gets reported. Use task pointers to detect * this. */ if (__this_cpu_read(softlockup_task_ptr_saved) != current) { __this_cpu_write(soft_watchdog_warn, false); __touch_watchdog(); } return HRTIMER_RESTART; } if (softlockup_all_cpu_backtrace) { /* Prevent multiple soft-lockup reports if one cpu is already * engaged in dumping cpu back traces */ if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { /* Someone else will report us. Let's give up */ __this_cpu_write(soft_watchdog_warn, true); return HRTIMER_RESTART; } } pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); __this_cpu_write(softlockup_task_ptr_saved, current); print_modules(); print_irqtrace_events(current); if (regs) show_regs(regs); else dump_stack(); if (softlockup_all_cpu_backtrace) { /* Avoid generating two back traces for current * given that one is already made above */ trigger_allbutself_cpu_backtrace(); clear_bit(0, &soft_lockup_nmi_warn); /* Barrier to sync with other cpus */ smp_mb__after_atomic(); } add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); __this_cpu_write(soft_watchdog_warn, true); } else __this_cpu_write(soft_watchdog_warn, false); return HRTIMER_RESTART; } static void watchdog_enable(unsigned int cpu) { struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); struct completion *done = this_cpu_ptr(&softlockup_completion); WARN_ON_ONCE(cpu != smp_processor_id()); init_completion(done); complete(done); /* * Start the timer first to prevent the NMI watchdog triggering * before the timer has a chance to fire. */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL_PINNED); /* Initialize timestamp */ __touch_watchdog(); /* Enable the perf event */ if (watchdog_enabled & NMI_WATCHDOG_ENABLED) watchdog_nmi_enable(cpu); } static void watchdog_disable(unsigned int cpu) { struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); WARN_ON_ONCE(cpu != smp_processor_id()); /* * Disable the perf event first. That prevents that a large delay * between disabling the timer and disabling the perf event causes * the perf NMI to detect a false positive. */ watchdog_nmi_disable(cpu); hrtimer_cancel(hrtimer); wait_for_completion(this_cpu_ptr(&softlockup_completion)); } static int softlockup_stop_fn(void *data) { watchdog_disable(smp_processor_id()); return 0; } static void softlockup_stop_all(void) { int cpu; if (!softlockup_initialized) return; for_each_cpu(cpu, &watchdog_allowed_mask) smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false); cpumask_clear(&watchdog_allowed_mask); } static int softlockup_start_fn(void *data) { watchdog_enable(smp_processor_id()); return 0; } static void softlockup_start_all(void) { int cpu; cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); for_each_cpu(cpu, &watchdog_allowed_mask) smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false); } int lockup_detector_online_cpu(unsigned int cpu) { if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) watchdog_enable(cpu); return 0; } int lockup_detector_offline_cpu(unsigned int cpu) { if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) watchdog_disable(cpu); return 0; } static void __lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); softlockup_stop_all(); set_sample_period(); lockup_detector_update_enable(); if (watchdog_enabled && watchdog_thresh) softlockup_start_all(); watchdog_nmi_start(); cpus_read_unlock(); /* * Must be called outside the cpus locked section to prevent * recursive locking in the perf code. */ __lockup_detector_cleanup(); } void lockup_detector_reconfigure(void) { mutex_lock(&watchdog_mutex); __lockup_detector_reconfigure(); mutex_unlock(&watchdog_mutex); } /* * Create the watchdog thread infrastructure and configure the detector(s). * * The threads are not unparked as watchdog_allowed_mask is empty. When * the threads are sucessfully initialized, take the proper locks and * unpark the threads in the watchdog_cpumask if the watchdog is enabled. */ static __init void lockup_detector_setup(void) { /* * If sysctl is off and watchdog got disabled on the command line, * nothing to do here. */ lockup_detector_update_enable(); if (!IS_ENABLED(CONFIG_SYSCTL) && !(watchdog_enabled && watchdog_thresh)) return; mutex_lock(&watchdog_mutex); __lockup_detector_reconfigure(); softlockup_initialized = true; mutex_unlock(&watchdog_mutex); } #else /* CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); lockup_detector_update_enable(); watchdog_nmi_start(); cpus_read_unlock(); } void lockup_detector_reconfigure(void) { __lockup_detector_reconfigure(); } static inline void lockup_detector_setup(void) { __lockup_detector_reconfigure(); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_cleanup(void) { lockdep_assert_held(&watchdog_mutex); hardlockup_detector_perf_cleanup(); } /** * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes * * Caller must not hold the cpu hotplug rwsem. */ void lockup_detector_cleanup(void) { mutex_lock(&watchdog_mutex); __lockup_detector_cleanup(); mutex_unlock(&watchdog_mutex); } /** * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) * * Special interface for parisc. It prevents lockup detector warnings from * the default pm_poweroff() function which busy loops forever. */ void lockup_detector_soft_poweroff(void) { watchdog_enabled = 0; } #ifdef CONFIG_SYSCTL /* Propagate any changes to the watchdog threads */ static void proc_watchdog_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); __lockup_detector_reconfigure(); } /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * * caller | table->data points to | 'which' * -------------------|----------------------------|-------------------------- * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED | * | | SOFT_WATCHDOG_ENABLED * -------------------|----------------------------|-------------------------- * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED * -------------------|----------------------------|-------------------------- * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED */ static int proc_watchdog_common(int which, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int err, old, *param = table->data; mutex_lock(&watchdog_mutex); if (!write) { /* * On read synchronize the userspace interface. This is a * racy snapshot. */ *param = (watchdog_enabled & which) != 0; err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); } else { old = READ_ONCE(*param); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!err && old != READ_ONCE(*param)) proc_watchdog_update(); } mutex_unlock(&watchdog_mutex); return err; } /* * /proc/sys/kernel/watchdog */ int proc_watchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED, table, write, buffer, lenp, ppos); } /* * /proc/sys/kernel/nmi_watchdog */ int proc_nmi_watchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!nmi_watchdog_available && write) return -ENOTSUPP; return proc_watchdog_common(NMI_WATCHDOG_ENABLED, table, write, buffer, lenp, ppos); } /* * /proc/sys/kernel/soft_watchdog */ int proc_soft_watchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return proc_watchdog_common(SOFT_WATCHDOG_ENABLED, table, write, buffer, lenp, ppos); } /* * /proc/sys/kernel/watchdog_thresh */ int proc_watchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int err, old; mutex_lock(&watchdog_mutex); old = READ_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!err && write && old != READ_ONCE(watchdog_thresh)) proc_watchdog_update(); mutex_unlock(&watchdog_mutex); return err; } /* * The cpumask is the mask of possible cpus that the watchdog can run * on, not the mask of cpus it is actually running on. This allows the * user to specify a mask that will include cpus that have not yet * been brought online, if desired. */ int proc_watchdog_cpumask(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int err; mutex_lock(&watchdog_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); if (!err && write) proc_watchdog_update(); mutex_unlock(&watchdog_mutex); return err; } #endif /* CONFIG_SYSCTL */ void __init lockup_detector_init(void) { if (tick_nohz_full_enabled()) pr_info("Disabling watchdog on nohz_full cores by default\n"); cpumask_copy(&watchdog_cpumask, housekeeping_cpumask(HK_FLAG_TIMER)); if (!watchdog_nmi_probe()) nmi_watchdog_available = true; lockup_detector_setup(); }
139 138 14 14 14 45 7 42 45 45 137 7 139 1 1 24 24 3 3 18 315 315 20 20 1 20 2 20 4 20 59 59 22 49 22 2 12 38 10 49 49 29 49 49 2 49 19 19 18 18 18 18 1 19 18 18 17 18 2 2 18 6 5 4 4 1 8 8 7 7 7 6 5 7 8 23 22 19 19 20 21 53 1 53 1 28 27 26 7 2 6 2 3 2 1 3 1 19 103 103 53 3 5 4 1 6 26 25 26 1 1 4 1 1 1 1 1 2 7 54 54 26 5 5 3 2 1 1 53 53 52 9 45 45 45 45 45 44 51 52 5 7 3 1 1 5 5 4 3 2 2 1 2 2 1 3 8 59 59 59 59 59 1 1 2 2 17 17 3 2 2 2 4 4 19 18 1 19 18 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 /* * net/dccp/proto.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/dccp.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/random.h> #include <linux/slab.h> #include <net/checksum.h> #include <net/inet_sock.h> #include <net/inet_common.h> #include <net/sock.h> #include <net/xfrm.h> #include <asm/ioctls.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/delay.h> #include <linux/poll.h> #include "ccid.h" #include "dccp.h" #include "feat.h" #define CREATE_TRACE_POINTS #include "trace.h" DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); struct percpu_counter dccp_orphan_count; EXPORT_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); /* the maximum queue length for tx in packets. 0 is no limit */ int sysctl_dccp_tx_qlen __read_mostly = 5; #ifdef CONFIG_IP_DCCP_DEBUG static const char *dccp_state_name(const int state) { static const char *const dccp_state_names[] = { [DCCP_OPEN] = "OPEN", [DCCP_REQUESTING] = "REQUESTING", [DCCP_PARTOPEN] = "PARTOPEN", [DCCP_LISTEN] = "LISTEN", [DCCP_RESPOND] = "RESPOND", [DCCP_CLOSING] = "CLOSING", [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", [DCCP_TIME_WAIT] = "TIME_WAIT", [DCCP_CLOSED] = "CLOSED", }; if (state >= DCCP_MAX_STATES) return "INVALID STATE!"; else return dccp_state_names[state]; } #endif void dccp_set_state(struct sock *sk, const int state) { const int oldstate = sk->sk_state; dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk, dccp_state_name(oldstate), dccp_state_name(state)); WARN_ON(state == oldstate); switch (state) { case DCCP_OPEN: if (oldstate != DCCP_OPEN) DCCP_INC_STATS(DCCP_MIB_CURRESTAB); /* Client retransmits all Confirm options until entering OPEN */ if (oldstate == DCCP_PARTOPEN) dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg); break; case DCCP_CLOSED: if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ || oldstate == DCCP_CLOSING) DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); if (inet_csk(sk)->icsk_bind_hash != NULL && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); /* fall through */ default: if (oldstate == DCCP_OPEN) DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); } /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ inet_sk_set_state(sk, state); } EXPORT_SYMBOL_GPL(dccp_set_state); static void dccp_finish_passive_close(struct sock *sk) { switch (sk->sk_state) { case DCCP_PASSIVE_CLOSE: /* Node (client or server) has received Close packet. */ dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); dccp_set_state(sk, DCCP_CLOSED); break; case DCCP_PASSIVE_CLOSEREQ: /* * Client received CloseReq. We set the `active' flag so that * dccp_send_close() retransmits the Close as per RFC 4340, 8.3. */ dccp_send_close(sk, 1); dccp_set_state(sk, DCCP_CLOSING); } } void dccp_done(struct sock *sk) { dccp_set_state(sk, DCCP_CLOSED); dccp_clear_xmit_timers(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); else inet_csk_destroy_sock(sk); } EXPORT_SYMBOL_GPL(dccp_done); const char *dccp_packet_name(const int type) { static const char *const dccp_packet_names[] = { [DCCP_PKT_REQUEST] = "REQUEST", [DCCP_PKT_RESPONSE] = "RESPONSE", [DCCP_PKT_DATA] = "DATA", [DCCP_PKT_ACK] = "ACK", [DCCP_PKT_DATAACK] = "DATAACK", [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", [DCCP_PKT_CLOSE] = "CLOSE", [DCCP_PKT_RESET] = "RESET", [DCCP_PKT_SYNC] = "SYNC", [DCCP_PKT_SYNCACK] = "SYNCACK", }; if (type >= DCCP_NR_PKT_TYPES) return "INVALID"; else return dccp_packet_names[type]; } EXPORT_SYMBOL_GPL(dccp_packet_name); static void dccp_sk_destruct(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_tx_ccid = NULL; inet_sock_destruct(sk); } int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; sk->sk_destruct = dccp_sk_destruct; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); INIT_LIST_HEAD(&dp->dccps_featneg); /* control socket doesn't need feat nego */ if (likely(ctl_sock_initialized)) return dccp_feat_init(sk); return 0; } EXPORT_SYMBOL_GPL(dccp_init_sock); void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } /* Clean up a referenced DCCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(sk); kfree(dp->dccps_service_list); dp->dccps_service_list = NULL; if (dp->dccps_hc_rx_ackvec != NULL) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; } ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); dp->dccps_hc_rx_ccid = NULL; /* clean up feature negotiation state */ dccp_feat_list_purge(&dp->dccps_featneg); } EXPORT_SYMBOL_GPL(dccp_destroy_sock); static inline int dccp_listen_start(struct sock *sk, int backlog) { struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; /* do not start to listen if feature negotiation setup fails */ if (dccp_feat_finalise_settings(dp)) return -EPROTO; return inet_csk_listen_start(sk, backlog); } static inline int dccp_need_reset(int state) { return state != DCCP_CLOSED && state != DCCP_LISTEN && state != DCCP_REQUESTING; } int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; if (old_state != DCCP_CLOSED) dccp_set_state(sk, DCCP_CLOSED); /* * This corresponds to the ABORT function of RFC793, sec. 3.8 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted". */ if (old_state == DCCP_LISTEN) { inet_csk_listen_stop(sk); } else if (dccp_need_reset(old_state)) { dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); sk->sk_err = ECONNRESET; } else if (old_state == DCCP_REQUESTING) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); dp->dccps_hc_rx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } inet->inet_dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); icsk->icsk_backoff = 0; inet_csk_delack_init(sk); __sk_dst_reset(sk); WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; } EXPORT_SYMBOL_GPL(dccp_disconnect); /* * Wait for a DCCP event. * * Note that we don't need to lock the socket, as the upper poll layers * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ __poll_t dccp_poll(struct file *file, struct socket *sock, poll_table *wait) { __poll_t mask; struct sock *sk = sock->sk; sock_poll_wait(file, sock, wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events by poll logic and correct handling of state changes made by another threads is impossible in any case. */ mask = 0; if (sk->sk_err) mask = EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected? */ if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { if (atomic_read(&sk->sk_rmem_alloc) > 0) mask |= EPOLLIN | EPOLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after * wspace test but before the flags are set, * IO signal will be lost. */ if (sk_stream_is_writeable(sk)) mask |= EPOLLOUT | EPOLLWRNORM; } } } return mask; } EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) { int rc = -ENOTCONN; lock_sock(sk); if (sk->sk_state == DCCP_LISTEN) goto out; switch (cmd) { case SIOCINQ: { struct sk_buff *skb; unsigned long amount = 0; skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { /* * We will only return the amount of this packet since * that is all that will be read. */ amount = skb->len; } rc = put_user(amount, (int __user *)arg); } break; default: rc = -ENOIOCTLCMD; break; } out: release_sock(sk); return rc; } EXPORT_SYMBOL_GPL(dccp_ioctl); static int dccp_setsockopt_service(struct sock *sk, const __be32 service, char __user *optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_service_list *sl = NULL; if (service == DCCP_SERVICE_INVALID_VALUE || optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) return -EINVAL; if (optlen > sizeof(service)) { sl = kmalloc(optlen, GFP_KERNEL); if (sl == NULL) return -ENOMEM; sl->dccpsl_nr = optlen / sizeof(u32) - 1; if (copy_from_user(sl->dccpsl_list, optval + sizeof(service), optlen - sizeof(service)) || dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { kfree(sl); return -EFAULT; } } lock_sock(sk); dp->dccps_service = service; kfree(dp->dccps_service_list); dp->dccps_service_list = sl; release_sock(sk); return 0; } static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) { u8 *list, len; int i, rc; if (cscov < 0 || cscov > 15) return -EINVAL; /* * Populate a list of permissible values, in the range cscov...15. This * is necessary since feature negotiation of single values only works if * both sides incidentally choose the same value. Since the list starts * lowest-value first, negotiation will pick the smallest shared value. */ if (cscov == 0) return 0; len = 16 - cscov; list = kmalloc(len, GFP_KERNEL); if (list == NULL) return -ENOBUFS; for (i = 0; i < len; i++) list[i] = cscov++; rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); if (rc == 0) { if (rx) dccp_sk(sk)->dccps_pcrlen = cscov; else dccp_sk(sk)->dccps_pcslen = cscov; } kfree(list); return rc; } static int dccp_setsockopt_ccid(struct sock *sk, int type, char __user *optval, unsigned int optlen) { u8 *val; int rc = 0; if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) return -EINVAL; val = memdup_user(optval, optlen); if (IS_ERR(val)) return PTR_ERR(val); lock_sock(sk); if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); release_sock(sk); kfree(val); return rc; } static int do_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); int val, err = 0; switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); return 0; case DCCP_SOCKOPT_CHANGE_L: case DCCP_SOCKOPT_CHANGE_R: DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); return 0; case DCCP_SOCKOPT_CCID: case DCCP_SOCKOPT_RX_CCID: case DCCP_SOCKOPT_TX_CCID: return dccp_setsockopt_ccid(sk, optname, optval, optlen); } if (optlen < (int)sizeof(int)) return -EINVAL; if (get_user(val, (int __user *)optval)) return -EFAULT; if (optname == DCCP_SOCKOPT_SERVICE) return dccp_setsockopt_service(sk, val, optval, optlen); lock_sock(sk); switch (optname) { case DCCP_SOCKOPT_SERVER_TIMEWAIT: if (dp->dccps_role != DCCP_ROLE_SERVER) err = -EOPNOTSUPP; else dp->dccps_server_timewait = (val != 0); break; case DCCP_SOCKOPT_SEND_CSCOV: err = dccp_setsockopt_cscov(sk, val, false); break; case DCCP_SOCKOPT_RECV_CSCOV: err = dccp_setsockopt_cscov(sk, val, true); break; case DCCP_SOCKOPT_QPOLICY_ID: if (sk->sk_state != DCCP_CLOSED) err = -EISCONN; else if (val < 0 || val >= DCCPQ_POLICY_MAX) err = -EINVAL; else dp->dccps_qpolicy = val; break; case DCCP_SOCKOPT_QPOLICY_TXQLEN: if (val < 0) err = -EINVAL; else dp->dccps_tx_qlen = val; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (level != SOL_DCCP) return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen); return do_dccp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL_GPL(dccp_setsockopt); #ifdef CONFIG_COMPAT int compat_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (level != SOL_DCCP) return inet_csk_compat_setsockopt(sk, level, optname, optval, optlen); return do_dccp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL_GPL(compat_dccp_setsockopt); #endif static int dccp_getsockopt_service(struct sock *sk, int len, __be32 __user *optval, int __user *optlen) { const struct dccp_sock *dp = dccp_sk(sk); const struct dccp_service_list *sl; int err = -ENOENT, slen = 0, total_len = sizeof(u32); lock_sock(sk); if ((sl = dp->dccps_service_list) != NULL) { slen = sl->dccpsl_nr * sizeof(u32); total_len += slen; } err = -EINVAL; if (total_len > len) goto out; err = 0; if (put_user(total_len, optlen) || put_user(dp->dccps_service, optval) || (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) err = -EFAULT; out: release_sock(sk); return err; } static int do_dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct dccp_sock *dp; int val, len; if (get_user(len, optlen)) return -EFAULT; if (len < (int)sizeof(int)) return -EINVAL; dp = dccp_sk(sk); switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); return 0; case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); case DCCP_SOCKOPT_GET_CUR_MPS: val = dp->dccps_mss_cache; break; case DCCP_SOCKOPT_AVAILABLE_CCIDS: return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); case DCCP_SOCKOPT_TX_CCID: val = ccid_get_current_tx_ccid(dp); if (val < 0) return -ENOPROTOOPT; break; case DCCP_SOCKOPT_RX_CCID: val = ccid_get_current_rx_ccid(dp); if (val < 0) return -ENOPROTOOPT; break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: val = dp->dccps_server_timewait; break; case DCCP_SOCKOPT_SEND_CSCOV: val = dp->dccps_pcslen; break; case DCCP_SOCKOPT_RECV_CSCOV: val = dp->dccps_pcrlen; break; case DCCP_SOCKOPT_QPOLICY_ID: val = dp->dccps_qpolicy; break; case DCCP_SOCKOPT_QPOLICY_TXQLEN: val = dp->dccps_tx_qlen; break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); case 192 ... 255: return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, len, (u32 __user *)optval, optlen); default: return -ENOPROTOOPT; } len = sizeof(val); if (put_user(len, optlen) || copy_to_user(optval, &val, len)) return -EFAULT; return 0; } int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level != SOL_DCCP) return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level, optname, optval, optlen); return do_dccp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL_GPL(dccp_getsockopt); #ifdef CONFIG_COMPAT int compat_dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level != SOL_DCCP) return inet_csk_compat_getsockopt(sk, level, optname, optval, optlen); return do_dccp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); #endif static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) { struct cmsghdr *cmsg; /* * Assign an (opaque) qpolicy priority value to skb->priority. * * We are overloading this skb field for use with the qpolicy subystem. * The skb->priority is normally used for the SO_PRIORITY option, which * is initialised from sk_priority. Since the assignment of sk_priority * to skb->priority happens later (on layer 3), we overload this field * for use with queueing priorities as long as the skb is on layer 4. * The default priority value (if nothing is set) is 0. */ skb->priority = 0; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_DCCP) continue; if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) return -EINVAL; switch (cmsg->cmsg_type) { case DCCP_SCM_PRIORITY: if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) return -EINVAL; skb->priority = *(__u32 *)CMSG_DATA(cmsg); break; default: return -EINVAL; } } return 0; } int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { const struct dccp_sock *dp = dccp_sk(sk); const int flags = msg->msg_flags; const int noblock = flags & MSG_DONTWAIT; struct sk_buff *skb; int rc, size; long timeo; trace_dccp_probe(sk, len); if (len > dp->dccps_mss_cache) return -EMSGSIZE; lock_sock(sk); timeo = sock_sndtimeo(sk, noblock); /* * We have to use sk_stream_wait_connect here to set sk_write_pending, * so that the trick in dccp_rcv_request_sent_state_process. */ /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_release; size = sk->sk_prot->max_header + len; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (skb == NULL) goto out_release; if (dccp_qpolicy_full(sk)) { rc = -EAGAIN; goto out_discard; } if (sk->sk_state == DCCP_CLOSED) { rc = -ENOTCONN; goto out_discard; } skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc != 0) goto out_discard; rc = dccp_msghdr_parse(msg, skb); if (rc != 0) goto out_discard; dccp_qpolicy_push(sk, skb); /* * The xmit_timer is set if the TX CCID is rate-based and will expire * when congestion control permits to release further packets into the * network. Window-based CCIDs do not use this timer. */ if (!timer_pending(&dp->dccps_xmit_timer)) dccp_write_xmit(sk); out_release: release_sock(sk); return rc ? : len; out_discard: kfree_skb(skb); goto out_release; } EXPORT_SYMBOL_GPL(dccp_sendmsg); int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { const struct dccp_hdr *dh; long timeo; lock_sock(sk); if (sk->sk_state == DCCP_LISTEN) { len = -ENOTCONN; goto out; } timeo = sock_rcvtimeo(sk, nonblock); do { struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); if (skb == NULL) goto verify_sock_status; dh = dccp_hdr(skb); switch (dh->dccph_type) { case DCCP_PKT_DATA: case DCCP_PKT_DATAACK: goto found_ok_skb; case DCCP_PKT_CLOSE: case DCCP_PKT_CLOSEREQ: if (!(flags & MSG_PEEK)) dccp_finish_passive_close(sk); /* fall through */ case DCCP_PKT_RESET: dccp_pr_debug("found fin (%s) ok!\n", dccp_packet_name(dh->dccph_type)); len = 0; goto found_fin_ok; default: dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); sk_eat_skb(sk, skb); } verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { len = 0; break; } if (sk->sk_err) { len = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) { len = 0; break; } if (sk->sk_state == DCCP_CLOSED) { if (!sock_flag(sk, SOCK_DONE)) { /* This occurs when user tries to read * from never connected socket. */ len = -ENOTCONN; break; } len = 0; break; } if (!timeo) { len = -EAGAIN; break; } if (signal_pending(current)) { len = sock_intr_errno(timeo); break; } sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) len = skb->len; else if (len < skb->len) msg->msg_flags |= MSG_TRUNC; if (skb_copy_datagram_msg(skb, 0, msg, len)) { /* Exception. Bailout! */ len = -EFAULT; break; } if (flags & MSG_TRUNC) len = skb->len; found_fin_ok: if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; } while (1); out: release_sock(sk); return len; } EXPORT_SYMBOL_GPL(dccp_recvmsg); int inet_dccp_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; unsigned char old_state; int err; lock_sock(sk); err = -EINVAL; if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) goto out; old_state = sk->sk_state; if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) goto out; /* Really, if the socket is already in listen state * we can only allow the backlog to be adjusted. */ if (old_state != DCCP_LISTEN) { /* * FIXME: here it probably should be sk->sk_prot->listen_start * see tcp_listen_start */ err = dccp_listen_start(sk, backlog); if (err) goto out; } sk->sk_max_ack_backlog = backlog; err = 0; out: release_sock(sk); return err; } EXPORT_SYMBOL_GPL(inet_dccp_listen); static void dccp_terminate_connection(struct sock *sk) { u8 next_state = DCCP_CLOSED; switch (sk->sk_state) { case DCCP_PASSIVE_CLOSE: case DCCP_PASSIVE_CLOSEREQ: dccp_finish_passive_close(sk); break; case DCCP_PARTOPEN: dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); /* fall through */ case DCCP_OPEN: dccp_send_close(sk, 1); if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER && !dccp_sk(sk)->dccps_server_timewait) next_state = DCCP_ACTIVE_CLOSEREQ; else next_state = DCCP_CLOSING; /* fall through */ default: dccp_set_state(sk, next_state); } } void dccp_close(struct sock *sk, long timeout) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; u32 data_was_unread = 0; int state; lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == DCCP_LISTEN) { dccp_set_state(sk, DCCP_CLOSED); /* Special case. */ inet_csk_listen_stop(sk); goto adjudge_to_death; } sk_stop_timer(sk, &dp->dccps_xmit_timer); /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the *reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { data_was_unread += skb->len; __kfree_skb(skb); } /* If socket has been already reset kill it. */ if (sk->sk_state == DCCP_CLOSED) goto adjudge_to_death; if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); dccp_set_state(sk, DCCP_CLOSED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (sk->sk_state != DCCP_CLOSED) { /* * Normal connection termination. May need to wait if there are * still packets in the TX queue that are delayed by the CCID. */ dccp_flush_write_queue(sk, &timeout); dccp_terminate_connection(sk); } /* * Flush write queue. This may be necessary in several cases: * - we have been closed by the peer but still have application data; * - abortive termination (unread data or zero linger time), * - normal termination but queue could not be flushed within time limit */ __skb_queue_purge(&sk->sk_write_queue); sk_stream_wait_close(sk, timeout); adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); /* * It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); percpu_counter_inc(sk->sk_prot->orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) goto out; if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); local_bh_enable(); sock_put(sk); } EXPORT_SYMBOL_GPL(dccp_close); void dccp_shutdown(struct sock *sk, int how) { dccp_pr_debug("called shutdown(%x)\n", how); } EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int __init dccp_mib_init(void) { dccp_statistics = alloc_percpu(struct dccp_mib); if (!dccp_statistics) return -ENOMEM; return 0; } static inline void dccp_mib_exit(void) { free_percpu(dccp_statistics); } static int thash_entries; module_param(thash_entries, int, 0444); MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG bool dccp_debug; module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); #endif static int __init dccp_init(void) { unsigned long goal; int ehash_order, bhash_order, i; int rc; BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); if (rc) goto out_fail; rc = -ENOBUFS; inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_free_percpu; /* * Size and allocate the main established and bind bucket * hash tables. * * The methodology is similar to that of the buffer cache. */ if (totalram_pages >= (128 * 1024)) goal = totalram_pages >> (21 - PAGE_SHIFT); else goal = totalram_pages >> (23 - PAGE_SHIFT); if (thash_entries) goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) ; do { unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); while (hash_size & (hash_size - 1)) hash_size--; dccp_hashinfo.ehash_mask = hash_size - 1; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); } while (!dccp_hashinfo.ehash && --ehash_order > 0); if (!dccp_hashinfo.ehash) { DCCP_CRIT("Failed to allocate DCCP established hash table"); goto out_free_bind_bucket_cachep; } for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); if (inet_ehash_locks_alloc(&dccp_hashinfo)) goto out_free_dccp_ehash; bhash_order = ehash_order; do { dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / sizeof(struct inet_bind_hashbucket); if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) continue; dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order); } while (!dccp_hashinfo.bhash && --bhash_order >= 0); if (!dccp_hashinfo.bhash) { DCCP_CRIT("Failed to allocate DCCP bind hash table"); goto out_free_dccp_locks; } for (i = 0; i < dccp_hashinfo.bhash_size; i++) { spin_lock_init(&dccp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); } rc = dccp_mib_init(); if (rc) goto out_free_dccp_bhash; rc = dccp_ackvec_init(); if (rc) goto out_free_dccp_mib; rc = dccp_sysctl_init(); if (rc) goto out_ackvec_exit; rc = ccid_initialize_builtins(); if (rc) goto out_sysctl_exit; dccp_timestamping_init(); return 0; out_sysctl_exit: dccp_sysctl_exit(); out_ackvec_exit: dccp_ackvec_exit(); out_free_dccp_mib: dccp_mib_exit(); out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); out_free_dccp_locks: inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); out_free_percpu: percpu_counter_destroy(&dccp_orphan_count); out_fail: dccp_hashinfo.bhash = NULL; dccp_hashinfo.ehash = NULL; dccp_hashinfo.bind_bucket_cachep = NULL; return rc; } static void __exit dccp_fini(void) { ccid_cleanup_builtins(); dccp_mib_exit(); free_pages((unsigned long)dccp_hashinfo.bhash, get_order(dccp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))); free_pages((unsigned long)dccp_hashinfo.ehash, get_order((dccp_hashinfo.ehash_mask + 1) * sizeof(struct inet_ehash_bucket))); inet_ehash_locks_free(&dccp_hashinfo); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); percpu_counter_destroy(&dccp_orphan_count); } module_init(dccp_init); module_exit(dccp_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
11 758 110 838 68 35 10 8 49 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FIB_RULES_H #define __NET_FIB_RULES_H #include <linux/types.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> #include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> #include <net/fib_notifier.h> struct fib_kuid_range { kuid_t start; kuid_t end; }; struct fib_rule { struct list_head list; int iifindex; int oifindex; u32 mark; u32 mark_mask; u32 flags; u32 table; u8 action; u8 l3mdev; u8 proto; u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; struct net *fr_net; refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; struct fib_rule_port_range sport_range; struct fib_rule_port_range dport_range; struct rcu_head rcu; }; struct fib_lookup_arg { void *lookup_ptr; const void *lookup_data; void *result; struct fib_rule *rule; u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { int family; struct list_head list; int rule_size; int addr_size; int unresolved_rules; int nr_goto_rules; unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); bool (*suppress)(struct fib_rule *, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, struct nlattr **, struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush * the route cache if one exists. */ void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; const struct nla_policy *policy; struct list_head rules_list; struct module *owner; struct net *fro_net; struct rcu_head rcu; }; struct fib_rule_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_rule *rule; }; #define FRA_GENERIC_POLICY \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ [FRA_L3MDEV] = { .type = NLA_U8 }, \ [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ [FRA_PROTOCOL] = { .type = NLA_U8 }, \ [FRA_IP_PROTO] = { .type = NLA_U8 }, \ [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } #ifdef CONFIG_NET_L3_MASTER_DEV static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->l3mdev ? arg->table : rule->table; } #else static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->table; } #endif static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) return nla_get_u32(nla[FRA_TABLE]); return frh->table; } static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) { return range->start != 0 && range->end != 0; } static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, __be16 port) { return ntohs(port) >= a->start && ntohs(port) <= a->end; } static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) { return a->start != 0 && a->end != 0 && a->end < 0xffff && a->start <= a->end; } static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, struct fib_rule_port_range *b) { return a->start == b->start && a->end == b->end; } static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { return rule->ip_proto || fib_rule_port_range_set(&rule->sport_range) || fib_rule_port_range_set(&rule->dport_range); } struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); #endif
68 60 59 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/osf.c * * Code extracted from drivers/block/genhd.c * * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King */ #include "check.h" #include "osf.h" #define MAX_OSF_PARTITIONS 18 int osf_partition(struct parsed_partitions *state) { int i; int slot = 1; unsigned int npartitions; Sector sect; unsigned char *data; struct disklabel { __le32 d_magic; __le16 d_type,d_subtype; u8 d_typename[16]; u8 d_packname[16]; __le32 d_secsize; __le32 d_nsectors; __le32 d_ntracks; __le32 d_ncylinders; __le32 d_secpercyl; __le32 d_secprtunit; __le16 d_sparespertrack; __le16 d_sparespercyl; __le32 d_acylinders; __le16 d_rpm, d_interleave, d_trackskew, d_cylskew; __le32 d_headswitch, d_trkseek, d_flags; __le32 d_drivedata[5]; __le32 d_spare[5]; __le32 d_magic2; __le16 d_checksum; __le16 d_npartitions; __le32 d_bbsize, d_sbsize; struct d_partition { __le32 p_size; __le32 p_offset; __le32 p_fsize; u8 p_fstype; u8 p_frag; __le16 p_cpg; } d_partitions[MAX_OSF_PARTITIONS]; } * label; struct d_partition * partition; data = read_part_sector(state, 0, &sect); if (!data) return -1; label = (struct disklabel *) (data+64); partition = label->d_partitions; if (le32_to_cpu(label->d_magic) != DISKLABELMAGIC) { put_dev_sector(sect); return 0; } if (le32_to_cpu(label->d_magic2) != DISKLABELMAGIC) { put_dev_sector(sect); return 0; } npartitions = le16_to_cpu(label->d_npartitions); if (npartitions > MAX_OSF_PARTITIONS) { put_dev_sector(sect); return 0; } for (i = 0 ; i < npartitions; i++, partition++) { if (slot == state->limit) break; if (le32_to_cpu(partition->p_size)) put_partition(state, slot, le32_to_cpu(partition->p_offset), le32_to_cpu(partition->p_size)); slot++; } strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; }
931 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_COREDUMP_H #define _LINUX_SCHED_COREDUMP_H #include <linux/mm_types.h> #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ /* mm flags */ /* for SUID_DUMP_* above */ #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) extern void set_dumpable(struct mm_struct *mm, int value); /* * This returns the actual value of the suid_dumpable flag. For things * that are using this for checking for privilege transitions, it must * test against SUID_DUMP_USER rather than treating it as a boolean * value. */ static inline int __get_dumpable(unsigned long mm_flags) { return mm_flags & MMF_DUMPABLE_MASK; } static inline int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); } /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 #define MMF_DUMP_MAPPED_PRIVATE 4 #define MMF_DUMP_MAPPED_SHARED 5 #define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_HUGETLB_PRIVATE 7 #define MMF_DUMP_HUGETLB_SHARED 8 #define MMF_DUMP_DAX_PRIVATE 9 #define MMF_DUMP_DAX_SHARED 10 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS #define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS # define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) #else # define MMF_DUMP_MASK_DEFAULT_ELF 0 #endif /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ /* * This one-shot flag is dropped due to necessity of changing exe once again * on NFS restore */ //#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 27 /* mm is shared between processes */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 /* include/net/xdp.h * * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. * Released under terms in GPL version 2. See COPYING. */ #ifndef __LINUX_NET_XDP_H__ #define __LINUX_NET_XDP_H__ /** * DOC: XDP RX-queue information * * The XDP RX-queue info (xdp_rxq_info) is associated with the driver * level RX-ring queues. It is information that is specific to how * the driver have configured a given RX-ring queue. * * Each xdp_buff frame received in the driver carry a (pointer) * reference to this xdp_rxq_info structure. This provides the XDP * data-path read-access to RX-info for both kernel and bpf-side * (limited subset). * * For now, direct access is only safe while running in NAPI/softirq * context. Contents is read-mostly and must not be updated during * driver NAPI/softirq poll. * * The driver usage API is a register and unregister API. * * The struct is not directly tied to the XDP prog. A new XDP prog * can be attached as long as it doesn't change the underlying * RX-ring. If the RX-ring does change significantly, the NIC driver * naturally need to stop the RX-ring before purging and reallocating * memory. In that process the driver MUST call unregistor (which * also apply for driver shutdown and unload). The register API is * also mandatory during RX-ring setup. */ enum xdp_mem_type { MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */ MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */ MEM_TYPE_PAGE_POOL, MEM_TYPE_ZERO_COPY, MEM_TYPE_MAX, }; /* XDP flags for ndo_xdp_xmit */ #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH struct xdp_mem_info { u32 type; /* enum xdp_mem_type, but known size type */ u32 id; }; struct page_pool; struct zero_copy_allocator { void (*free)(struct zero_copy_allocator *zca, unsigned long handle); }; struct xdp_rxq_info { struct net_device *dev; u32 queue_index; u32 reg_state; struct xdp_mem_info mem; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ struct xdp_buff { void *data; void *data_end; void *data_meta; void *data_hard_start; unsigned long handle; struct xdp_rxq_info *rxq; }; struct xdp_frame { void *data; u16 len; u16 headroom; u16 metasize; /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, * while mem info is valid on remote CPU. */ struct xdp_mem_info mem; struct net_device *dev_rx; /* used by cpumap */ }; /* Clear kernel pointers in xdp_frame */ static inline void xdp_scrub_frame(struct xdp_frame *frame) { frame->data = NULL; frame->dev_rx = NULL; } /* Convert xdp_buff to xdp_frame */ static inline struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) { struct xdp_frame *xdp_frame; int metasize; int headroom; /* TODO: implement clone, copy, use "native" MEM_TYPE */ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) return NULL; /* Assure headroom is available for storing info */ headroom = xdp->data - xdp->data_hard_start; metasize = xdp->data - xdp->data_meta; metasize = metasize > 0 ? metasize : 0; if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) return NULL; /* Store info in top of packet */ xdp_frame = xdp->data_hard_start; xdp_frame->data = xdp->data; xdp_frame->len = xdp->data_end - xdp->data; xdp_frame->headroom = headroom - sizeof(*xdp_frame); xdp_frame->metasize = metasize; /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ xdp_frame->mem = xdp->rxq->mem; return xdp_frame; } void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. */ static __always_inline void xdp_set_data_meta_invalid(struct xdp_buff *xdp) { xdp->data_meta = xdp->data + 1; } static __always_inline bool xdp_data_meta_unsupported(const struct xdp_buff *xdp) { return unlikely(xdp->data_meta > xdp->data); } struct xdp_attachment_info { struct bpf_prog *prog; u32 flags; }; struct netdev_bpf; int xdp_attachment_query(struct xdp_attachment_info *info, struct netdev_bpf *bpf); bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, struct netdev_bpf *bpf); void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf); #endif /* __LINUX_NET_XDP_H__ */
68 68 38 39 64 64 64 64 38 60 37 68 36 12 6 13 12 13 27 5 4 4 17 10 10 9 3 10 7 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 1 1 4 4 1 3 3 3 3 3 2 2 3 2 4 3 2 2 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 1 5 5 5 5 5 6 6 1 1 4 3 1 2 2 7 6 4 4 1 1 1 1 1 2 1 1 1 1 1 3 2 1 3 2 2 1 3 2 1 7 5 3 3 3 1 16 15 4 3 4 2 4 8 16 4 4 1 3 2 1 2 2 2 1 1 1 1 1 1 2 12 11 11 10 9 5 4 4 4 4 3 2 2 2 1 1 1 1 1 1 2 3 3 3 3 4 3 2 4 4 3 3 3 3 2 1 4 3 3 2 1 2 1 1 1 1 1 1 2 6 5 4 6 2 2 2 5 5 4 94 95 95 14 23 95 59 59 3 3 56 55 54 1 56 14 56 79 56 1 79 79 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * Authors: * Alexander Aring <aar@pengutronix.de> * * Based on: net/wireless/nl80211.c */ #include <linux/rtnetlink.h> #include <net/cfg802154.h> #include <net/genetlink.h> #include <net/mac802154.h> #include <net/netlink.h> #include <net/nl802154.h> #include <net/sock.h> #include "nl802154.h" #include "rdev-ops.h" #include "core.h" /* the netlink family */ static struct genl_family nl802154_fam; /* multicast groups */ enum nl802154_multicast_groups { NL802154_MCGRP_CONFIG, }; static const struct genl_multicast_group nl802154_mcgrps[] = { [NL802154_MCGRP_CONFIG] = { .name = "config", }, }; /* returns ERR_PTR values */ static struct wpan_dev * __cfg802154_wpan_dev_from_attrs(struct net *netns, struct nlattr **attrs) { struct cfg802154_registered_device *rdev; struct wpan_dev *result = NULL; bool have_ifidx = attrs[NL802154_ATTR_IFINDEX]; bool have_wpan_dev_id = attrs[NL802154_ATTR_WPAN_DEV]; u64 wpan_dev_id; int wpan_phy_idx = -1; int ifidx = -1; ASSERT_RTNL(); if (!have_ifidx && !have_wpan_dev_id) return ERR_PTR(-EINVAL); if (have_ifidx) ifidx = nla_get_u32(attrs[NL802154_ATTR_IFINDEX]); if (have_wpan_dev_id) { wpan_dev_id = nla_get_u64(attrs[NL802154_ATTR_WPAN_DEV]); wpan_phy_idx = wpan_dev_id >> 32; } list_for_each_entry(rdev, &cfg802154_rdev_list, list) { struct wpan_dev *wpan_dev; if (wpan_phy_net(&rdev->wpan_phy) != netns) continue; if (have_wpan_dev_id && rdev->wpan_phy_idx != wpan_phy_idx) continue; list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) { if (have_ifidx && wpan_dev->netdev && wpan_dev->netdev->ifindex == ifidx) { result = wpan_dev; break; } if (have_wpan_dev_id && wpan_dev->identifier == (u32)wpan_dev_id) { result = wpan_dev; break; } } if (result) break; } if (result) return result; return ERR_PTR(-ENODEV); } static struct cfg802154_registered_device * __cfg802154_rdev_from_attrs(struct net *netns, struct nlattr **attrs) { struct cfg802154_registered_device *rdev = NULL, *tmp; struct net_device *netdev; ASSERT_RTNL(); if (!attrs[NL802154_ATTR_WPAN_PHY] && !attrs[NL802154_ATTR_IFINDEX] && !attrs[NL802154_ATTR_WPAN_DEV]) return ERR_PTR(-EINVAL); if (attrs[NL802154_ATTR_WPAN_PHY]) rdev = cfg802154_rdev_by_wpan_phy_idx( nla_get_u32(attrs[NL802154_ATTR_WPAN_PHY])); if (attrs[NL802154_ATTR_WPAN_DEV]) { u64 wpan_dev_id = nla_get_u64(attrs[NL802154_ATTR_WPAN_DEV]); struct wpan_dev *wpan_dev; bool found = false; tmp = cfg802154_rdev_by_wpan_phy_idx(wpan_dev_id >> 32); if (tmp) { /* make sure wpan_dev exists */ list_for_each_entry(wpan_dev, &tmp->wpan_dev_list, list) { if (wpan_dev->identifier != (u32)wpan_dev_id) continue; found = true; break; } if (!found) tmp = NULL; if (rdev && tmp != rdev) return ERR_PTR(-EINVAL); rdev = tmp; } } if (attrs[NL802154_ATTR_IFINDEX]) { int ifindex = nla_get_u32(attrs[NL802154_ATTR_IFINDEX]); netdev = __dev_get_by_index(netns, ifindex); if (netdev) { if (netdev->ieee802154_ptr) tmp = wpan_phy_to_rdev( netdev->ieee802154_ptr->wpan_phy); else tmp = NULL; /* not wireless device -- return error */ if (!tmp) return ERR_PTR(-EINVAL); /* mismatch -- return error */ if (rdev && tmp != rdev) return ERR_PTR(-EINVAL); rdev = tmp; } } if (!rdev) return ERR_PTR(-ENODEV); if (netns != wpan_phy_net(&rdev->wpan_phy)) return ERR_PTR(-ENODEV); return rdev; } /* This function returns a pointer to the driver * that the genl_info item that is passed refers to. * * The result of this can be a PTR_ERR and hence must * be checked with IS_ERR() for errors. */ static struct cfg802154_registered_device * cfg802154_get_dev_from_info(struct net *netns, struct genl_info *info) { return __cfg802154_rdev_from_attrs(netns, info->attrs); } /* policy for the attributes */ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_WPAN_PHY] = { .type = NLA_U32 }, [NL802154_ATTR_WPAN_PHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, [NL802154_ATTR_IFINDEX] = { .type = NLA_U32 }, [NL802154_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL802154_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, [NL802154_ATTR_WPAN_DEV] = { .type = NLA_U64 }, [NL802154_ATTR_PAGE] = { .type = NLA_U8, }, [NL802154_ATTR_CHANNEL] = { .type = NLA_U8, }, [NL802154_ATTR_TX_POWER] = { .type = NLA_S32, }, [NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, }, [NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, }, [NL802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, }, [NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, }, [NL802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [NL802154_ATTR_EXTENDED_ADDR] = { .type = NLA_U64 }, [NL802154_ATTR_SHORT_ADDR] = { .type = NLA_U16, }, [NL802154_ATTR_MIN_BE] = { .type = NLA_U8, }, [NL802154_ATTR_MAX_BE] = { .type = NLA_U8, }, [NL802154_ATTR_MAX_CSMA_BACKOFFS] = { .type = NLA_U8, }, [NL802154_ATTR_MAX_FRAME_RETRIES] = { .type = NLA_S8, }, [NL802154_ATTR_LBT_MODE] = { .type = NLA_U8, }, [NL802154_ATTR_WPAN_PHY_CAPS] = { .type = NLA_NESTED }, [NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED }, [NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 }, [NL802154_ATTR_PID] = { .type = NLA_U32 }, [NL802154_ATTR_NETNS_FD] = { .type = NLA_U32 }, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, }, [NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, }, [NL802154_ATTR_SEC_OUT_KEY_ID] = { .type = NLA_NESTED, }, [NL802154_ATTR_SEC_FRAME_COUNTER] = { .type = NLA_U32 }, [NL802154_ATTR_SEC_LEVEL] = { .type = NLA_NESTED }, [NL802154_ATTR_SEC_DEVICE] = { .type = NLA_NESTED }, [NL802154_ATTR_SEC_DEVKEY] = { .type = NLA_NESTED }, [NL802154_ATTR_SEC_KEY] = { .type = NLA_NESTED }, #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static int nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg802154_registered_device **rdev, struct wpan_dev **wpan_dev) { int err; rtnl_lock(); if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, genl_family_attrbuf(&nl802154_fam), nl802154_fam.maxattr, nl802154_policy, NULL); if (err) goto out_unlock; *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk), genl_family_attrbuf(&nl802154_fam)); if (IS_ERR(*wpan_dev)) { err = PTR_ERR(*wpan_dev); goto out_unlock; } *rdev = wpan_phy_to_rdev((*wpan_dev)->wpan_phy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wpan_phy_idx + 1; cb->args[1] = (*wpan_dev)->identifier; } else { /* subtract the 1 again here */ struct wpan_phy *wpan_phy = wpan_phy_idx_to_wpan_phy(cb->args[0] - 1); struct wpan_dev *tmp; if (!wpan_phy) { err = -ENODEV; goto out_unlock; } *rdev = wpan_phy_to_rdev(wpan_phy); *wpan_dev = NULL; list_for_each_entry(tmp, &(*rdev)->wpan_dev_list, list) { if (tmp->identifier == cb->args[1]) { *wpan_dev = tmp; break; } } if (!*wpan_dev) { err = -ENODEV; goto out_unlock; } } return 0; out_unlock: rtnl_unlock(); return err; } static void nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev) { rtnl_unlock(); } #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ /* message building helper */ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ return genlmsg_put(skb, portid, seq, &nl802154_fam, flags, cmd); } static int nl802154_put_flags(struct sk_buff *msg, int attr, u32 mask) { struct nlattr *nl_flags = nla_nest_start(msg, attr); int i; if (!nl_flags) return -ENOBUFS; i = 0; while (mask) { if ((mask & 1) && nla_put_flag(msg, i)) return -ENOBUFS; mask >>= 1; i++; } nla_nest_end(msg, nl_flags); return 0; } static int nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev, struct sk_buff *msg) { struct nlattr *nl_page; unsigned long page; nl_page = nla_nest_start(msg, NL802154_ATTR_CHANNELS_SUPPORTED); if (!nl_page) return -ENOBUFS; for (page = 0; page <= IEEE802154_MAX_PAGE; page++) { if (nla_put_u32(msg, NL802154_ATTR_SUPPORTED_CHANNEL, rdev->wpan_phy.supported.channels[page])) return -ENOBUFS; } nla_nest_end(msg, nl_page); return 0; } static int nl802154_put_capabilities(struct sk_buff *msg, struct cfg802154_registered_device *rdev) { const struct wpan_phy_supported *caps = &rdev->wpan_phy.supported; struct nlattr *nl_caps, *nl_channels; int i; nl_caps = nla_nest_start(msg, NL802154_ATTR_WPAN_PHY_CAPS); if (!nl_caps) return -ENOBUFS; nl_channels = nla_nest_start(msg, NL802154_CAP_ATTR_CHANNELS); if (!nl_channels) return -ENOBUFS; for (i = 0; i <= IEEE802154_MAX_PAGE; i++) { if (caps->channels[i]) { if (nl802154_put_flags(msg, i, caps->channels[i])) return -ENOBUFS; } } nla_nest_end(msg, nl_channels); if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) { struct nlattr *nl_ed_lvls; nl_ed_lvls = nla_nest_start(msg, NL802154_CAP_ATTR_CCA_ED_LEVELS); if (!nl_ed_lvls) return -ENOBUFS; for (i = 0; i < caps->cca_ed_levels_size; i++) { if (nla_put_s32(msg, i, caps->cca_ed_levels[i])) return -ENOBUFS; } nla_nest_end(msg, nl_ed_lvls); } if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) { struct nlattr *nl_tx_pwrs; nl_tx_pwrs = nla_nest_start(msg, NL802154_CAP_ATTR_TX_POWERS); if (!nl_tx_pwrs) return -ENOBUFS; for (i = 0; i < caps->tx_powers_size; i++) { if (nla_put_s32(msg, i, caps->tx_powers[i])) return -ENOBUFS; } nla_nest_end(msg, nl_tx_pwrs); } if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) { if (nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_MODES, caps->cca_modes) || nl802154_put_flags(msg, NL802154_CAP_ATTR_CCA_OPTS, caps->cca_opts)) return -ENOBUFS; } if (nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MINBE, caps->min_minbe) || nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MINBE, caps->max_minbe) || nla_put_u8(msg, NL802154_CAP_ATTR_MIN_MAXBE, caps->min_maxbe) || nla_put_u8(msg, NL802154_CAP_ATTR_MAX_MAXBE, caps->max_maxbe) || nla_put_u8(msg, NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS, caps->min_csma_backoffs) || nla_put_u8(msg, NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS, caps->max_csma_backoffs) || nla_put_s8(msg, NL802154_CAP_ATTR_MIN_FRAME_RETRIES, caps->min_frame_retries) || nla_put_s8(msg, NL802154_CAP_ATTR_MAX_FRAME_RETRIES, caps->max_frame_retries) || nl802154_put_flags(msg, NL802154_CAP_ATTR_IFTYPES, caps->iftypes) || nla_put_u32(msg, NL802154_CAP_ATTR_LBT, caps->lbt)) return -ENOBUFS; nla_nest_end(msg, nl_caps); return 0; } static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, enum nl802154_commands cmd, struct sk_buff *msg, u32 portid, u32 seq, int flags) { struct nlattr *nl_cmds; void *hdr; int i; hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx) || nla_put_string(msg, NL802154_ATTR_WPAN_PHY_NAME, wpan_phy_name(&rdev->wpan_phy)) || nla_put_u32(msg, NL802154_ATTR_GENERATION, cfg802154_rdev_list_generation)) goto nla_put_failure; if (cmd != NL802154_CMD_NEW_WPAN_PHY) goto finish; /* DUMP PHY PIB */ /* current channel settings */ if (nla_put_u8(msg, NL802154_ATTR_PAGE, rdev->wpan_phy.current_page) || nla_put_u8(msg, NL802154_ATTR_CHANNEL, rdev->wpan_phy.current_channel)) goto nla_put_failure; /* TODO remove this behaviour, we still keep support it for a while * so users can change the behaviour to the new one. */ if (nl802154_send_wpan_phy_channels(rdev, msg)) goto nla_put_failure; /* cca mode */ if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) { if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE, rdev->wpan_phy.cca.mode)) goto nla_put_failure; if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) { if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT, rdev->wpan_phy.cca.opt)) goto nla_put_failure; } } if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) { if (nla_put_s32(msg, NL802154_ATTR_TX_POWER, rdev->wpan_phy.transmit_power)) goto nla_put_failure; } if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) { if (nla_put_s32(msg, NL802154_ATTR_CCA_ED_LEVEL, rdev->wpan_phy.cca_ed_level)) goto nla_put_failure; } if (nl802154_put_capabilities(msg, rdev)) goto nla_put_failure; nl_cmds = nla_nest_start(msg, NL802154_ATTR_SUPPORTED_COMMANDS); if (!nl_cmds) goto nla_put_failure; i = 0; #define CMD(op, n) \ do { \ if (rdev->ops->op) { \ i++; \ if (nla_put_u32(msg, i, NL802154_CMD_ ## n)) \ goto nla_put_failure; \ } \ } while (0) CMD(add_virtual_intf, NEW_INTERFACE); CMD(del_virtual_intf, DEL_INTERFACE); CMD(set_channel, SET_CHANNEL); CMD(set_pan_id, SET_PAN_ID); CMD(set_short_addr, SET_SHORT_ADDR); CMD(set_backoff_exponent, SET_BACKOFF_EXPONENT); CMD(set_max_csma_backoffs, SET_MAX_CSMA_BACKOFFS); CMD(set_max_frame_retries, SET_MAX_FRAME_RETRIES); CMD(set_lbt_mode, SET_LBT_MODE); CMD(set_ackreq_default, SET_ACKREQ_DEFAULT); if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) CMD(set_tx_power, SET_TX_POWER); if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) CMD(set_cca_ed_level, SET_CCA_ED_LEVEL); if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE) CMD(set_cca_mode, SET_CCA_MODE); #undef CMD nla_nest_end(msg, nl_cmds); finish: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } struct nl802154_dump_wpan_phy_state { s64 filter_wpan_phy; long start; }; static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl802154_dump_wpan_phy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, nl802154_fam.maxattr, nl802154_policy, NULL); /* TODO check if we can handle error here, * we have no backward compatibility */ if (ret) return 0; if (tb[NL802154_ATTR_WPAN_PHY]) state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]); if (tb[NL802154_ATTR_WPAN_DEV]) state->filter_wpan_phy = nla_get_u64(tb[NL802154_ATTR_WPAN_DEV]) >> 32; if (tb[NL802154_ATTR_IFINDEX]) { struct net_device *netdev; struct cfg802154_registered_device *rdev; int ifidx = nla_get_u32(tb[NL802154_ATTR_IFINDEX]); netdev = __dev_get_by_index(&init_net, ifidx); if (!netdev) return -ENODEV; if (netdev->ieee802154_ptr) { rdev = wpan_phy_to_rdev( netdev->ieee802154_ptr->wpan_phy); state->filter_wpan_phy = rdev->wpan_phy_idx; } } return 0; } static int nl802154_dump_wpan_phy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; struct nl802154_dump_wpan_phy_state *state = (void *)cb->args[0]; struct cfg802154_registered_device *rdev; rtnl_lock(); if (!state) { state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) { rtnl_unlock(); return -ENOMEM; } state->filter_wpan_phy = -1; ret = nl802154_dump_wpan_phy_parse(skb, cb, state); if (ret) { kfree(state); rtnl_unlock(); return ret; } cb->args[0] = (long)state; } list_for_each_entry(rdev, &cfg802154_rdev_list, list) { if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk))) continue; if (++idx <= state->start) continue; if (state->filter_wpan_phy != -1 && state->filter_wpan_phy != rdev->wpan_phy_idx) continue; /* attempt to fit multiple wpan_phy data chunks into the skb */ ret = nl802154_send_wpan_phy(rdev, NL802154_CMD_NEW_WPAN_PHY, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (ret < 0) { if ((ret == -ENOBUFS || ret == -EMSGSIZE) && !skb->len && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; rtnl_unlock(); return 1; } idx--; break; } break; } rtnl_unlock(); state->start = idx; return skb->len; } static int nl802154_dump_wpan_phy_done(struct netlink_callback *cb) { kfree((void *)cb->args[0]); return 0; } static int nl802154_get_wpan_phy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg802154_registered_device *rdev = info->user_ptr[0]; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl802154_send_wpan_phy(rdev, NL802154_CMD_NEW_WPAN_PHY, msg, info->snd_portid, info->snd_seq, 0) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static inline u64 wpan_dev_id(struct wpan_dev *wpan_dev) { return (u64)wpan_dev->identifier | ((u64)wpan_phy_to_rdev(wpan_dev->wpan_phy)->wpan_phy_idx << 32); } #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL #include <net/ieee802154_netdev.h> static int ieee802154_llsec_send_key_id(struct sk_buff *msg, const struct ieee802154_llsec_key_id *desc) { struct nlattr *nl_dev_addr; if (nla_put_u32(msg, NL802154_KEY_ID_ATTR_MODE, desc->mode)) return -ENOBUFS; switch (desc->mode) { case NL802154_KEY_ID_MODE_IMPLICIT: nl_dev_addr = nla_nest_start(msg, NL802154_KEY_ID_ATTR_IMPLICIT); if (!nl_dev_addr) return -ENOBUFS; if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_PAN_ID, desc->device_addr.pan_id) || nla_put_u32(msg, NL802154_DEV_ADDR_ATTR_MODE, desc->device_addr.mode)) return -ENOBUFS; switch (desc->device_addr.mode) { case NL802154_DEV_ADDR_SHORT: if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_SHORT, desc->device_addr.short_addr)) return -ENOBUFS; break; case NL802154_DEV_ADDR_EXTENDED: if (nla_put_le64(msg, NL802154_DEV_ADDR_ATTR_EXTENDED, desc->device_addr.extended_addr, NL802154_DEV_ADDR_ATTR_PAD)) return -ENOBUFS; break; default: /* userspace should handle unknown */ break; } nla_nest_end(msg, nl_dev_addr); break; case NL802154_KEY_ID_MODE_INDEX: break; case NL802154_KEY_ID_MODE_INDEX_SHORT: /* TODO renmae short_source? */ if (nla_put_le32(msg, NL802154_KEY_ID_ATTR_SOURCE_SHORT, desc->short_source)) return -ENOBUFS; break; case NL802154_KEY_ID_MODE_INDEX_EXTENDED: if (nla_put_le64(msg, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED, desc->extended_source, NL802154_KEY_ID_ATTR_PAD)) return -ENOBUFS; break; default: /* userspace should handle unknown */ break; } /* TODO key_id to key_idx ? Check naming */ if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) { if (nla_put_u8(msg, NL802154_KEY_ID_ATTR_INDEX, desc->id)) return -ENOBUFS; } return 0; } static int nl802154_get_llsec_params(struct sk_buff *msg, struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev) { struct nlattr *nl_key_id; struct ieee802154_llsec_params params; int ret; ret = rdev_get_llsec_params(rdev, wpan_dev, &params); if (ret < 0) return ret; if (nla_put_u8(msg, NL802154_ATTR_SEC_ENABLED, params.enabled) || nla_put_u32(msg, NL802154_ATTR_SEC_OUT_LEVEL, params.out_level) || nla_put_be32(msg, NL802154_ATTR_SEC_FRAME_COUNTER, params.frame_counter)) return -ENOBUFS; nl_key_id = nla_nest_start(msg, NL802154_ATTR_SEC_OUT_KEY_ID); if (!nl_key_id) return -ENOBUFS; ret = ieee802154_llsec_send_key_id(msg, &params.out_key); if (ret < 0) return ret; nla_nest_end(msg, nl_key_id); return 0; } #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ static int nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev) { struct net_device *dev = wpan_dev->netdev; void *hdr; hdr = nl802154hdr_put(msg, portid, seq, flags, NL802154_CMD_NEW_INTERFACE); if (!hdr) return -1; if (dev && (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex) || nla_put_string(msg, NL802154_ATTR_IFNAME, dev->name))) goto nla_put_failure; if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx) || nla_put_u32(msg, NL802154_ATTR_IFTYPE, wpan_dev->iftype) || nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV, wpan_dev_id(wpan_dev), NL802154_ATTR_PAD) || nla_put_u32(msg, NL802154_ATTR_GENERATION, rdev->devlist_generation ^ (cfg802154_rdev_list_generation << 2))) goto nla_put_failure; /* address settings */ if (nla_put_le64(msg, NL802154_ATTR_EXTENDED_ADDR, wpan_dev->extended_addr, NL802154_ATTR_PAD) || nla_put_le16(msg, NL802154_ATTR_SHORT_ADDR, wpan_dev->short_addr) || nla_put_le16(msg, NL802154_ATTR_PAN_ID, wpan_dev->pan_id)) goto nla_put_failure; /* ARET handling */ if (nla_put_s8(msg, NL802154_ATTR_MAX_FRAME_RETRIES, wpan_dev->frame_retries) || nla_put_u8(msg, NL802154_ATTR_MAX_BE, wpan_dev->max_be) || nla_put_u8(msg, NL802154_ATTR_MAX_CSMA_BACKOFFS, wpan_dev->csma_retries) || nla_put_u8(msg, NL802154_ATTR_MIN_BE, wpan_dev->min_be)) goto nla_put_failure; /* listen before transmit */ if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt)) goto nla_put_failure; /* ackreq default behaviour */ if (nla_put_u8(msg, NL802154_ATTR_ACKREQ_DEFAULT, wpan_dev->ackreq)) goto nla_put_failure; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) goto out; if (nl802154_get_llsec_params(msg, rdev, wpan_dev) < 0) goto nla_put_failure; out: #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl802154_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) { int wp_idx = 0; int if_idx = 0; int wp_start = cb->args[0]; int if_start = cb->args[1]; struct cfg802154_registered_device *rdev; struct wpan_dev *wpan_dev; rtnl_lock(); list_for_each_entry(rdev, &cfg802154_rdev_list, list) { if (!net_eq(wpan_phy_net(&rdev->wpan_phy), sock_net(skb->sk))) continue; if (wp_idx < wp_start) { wp_idx++; continue; } if_idx = 0; list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) { if (if_idx < if_start) { if_idx++; continue; } if (nl802154_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wpan_dev) < 0) { goto out; } if_idx++; } wp_idx++; } out: rtnl_unlock(); cb->args[0] = wp_idx; cb->args[1] = if_idx; return skb->len; } static int nl802154_get_interface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct wpan_dev *wdev = info->user_ptr[1]; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl802154_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; enum nl802154_iftype type = NL802154_IFTYPE_UNSPEC; __le64 extended_addr = cpu_to_le64(0x0000000000000000ULL); /* TODO avoid failing a new interface * creation due to pending removal? */ if (!info->attrs[NL802154_ATTR_IFNAME]) return -EINVAL; if (info->attrs[NL802154_ATTR_IFTYPE]) { type = nla_get_u32(info->attrs[NL802154_ATTR_IFTYPE]); if (type > NL802154_IFTYPE_MAX || !(rdev->wpan_phy.supported.iftypes & BIT(type))) return -EINVAL; } if (info->attrs[NL802154_ATTR_EXTENDED_ADDR]) extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]); if (!rdev->ops->add_virtual_intf) return -EOPNOTSUPP; return rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL802154_ATTR_IFNAME]), NET_NAME_USER, type, extended_addr); } static int nl802154_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct wpan_dev *wpan_dev = info->user_ptr[1]; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; /* If we remove a wpan device without a netdev then clear * user_ptr[1] so that nl802154_post_doit won't dereference it * to check if it needs to do dev_put(). Otherwise it crashes * since the wpan_dev has been freed, unlike with a netdev where * we need the dev_put() for the netdev to really be freed. */ if (!wpan_dev->netdev) info->user_ptr[1] = NULL; return rdev_del_virtual_intf(rdev, wpan_dev); } static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; u8 channel, page; if (!info->attrs[NL802154_ATTR_PAGE] || !info->attrs[NL802154_ATTR_CHANNEL]) return -EINVAL; page = nla_get_u8(info->attrs[NL802154_ATTR_PAGE]); channel = nla_get_u8(info->attrs[NL802154_ATTR_CHANNEL]); /* check 802.15.4 constraints */ if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL || !(rdev->wpan_phy.supported.channels[page] & BIT(channel))) return -EINVAL; return rdev_set_channel(rdev, page, channel); } static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct wpan_phy_cca cca; if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_MODE)) return -EOPNOTSUPP; if (!info->attrs[NL802154_ATTR_CCA_MODE]) return -EINVAL; cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]); /* checking 802.15.4 constraints */ if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX || !(rdev->wpan_phy.supported.cca_modes & BIT(cca.mode))) return -EINVAL; if (cca.mode == NL802154_CCA_ENERGY_CARRIER) { if (!info->attrs[NL802154_ATTR_CCA_OPT]) return -EINVAL; cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]); if (cca.opt > NL802154_CCA_OPT_ATTR_MAX || !(rdev->wpan_phy.supported.cca_opts & BIT(cca.opt))) return -EINVAL; } return rdev_set_cca_mode(rdev, &cca); } static int nl802154_set_cca_ed_level(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; s32 ed_level; int i; if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL)) return -EOPNOTSUPP; if (!info->attrs[NL802154_ATTR_CCA_ED_LEVEL]) return -EINVAL; ed_level = nla_get_s32(info->attrs[NL802154_ATTR_CCA_ED_LEVEL]); for (i = 0; i < rdev->wpan_phy.supported.cca_ed_levels_size; i++) { if (ed_level == rdev->wpan_phy.supported.cca_ed_levels[i]) return rdev_set_cca_ed_level(rdev, ed_level); } return -EINVAL; } static int nl802154_set_tx_power(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; s32 power; int i; if (!(rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER)) return -EOPNOTSUPP; if (!info->attrs[NL802154_ATTR_TX_POWER]) return -EINVAL; power = nla_get_s32(info->attrs[NL802154_ATTR_TX_POWER]); for (i = 0; i < rdev->wpan_phy.supported.tx_powers_size; i++) { if (power == rdev->wpan_phy.supported.tx_powers[i]) return rdev_set_tx_power(rdev, power); } return -EINVAL; } static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; __le16 pan_id; /* conflict here while tx/rx calls */ if (netif_running(dev)) return -EBUSY; if (wpan_dev->lowpan_dev) { if (netif_running(wpan_dev->lowpan_dev)) return -EBUSY; } /* don't change address fields on monitor */ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || !info->attrs[NL802154_ATTR_PAN_ID]) return -EINVAL; pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]); /* TODO * I am not sure about to check here on broadcast pan_id. * Broadcast is a valid setting, comment from 802.15.4: * If this value is 0xffff, the device is not associated. * * This could useful to simple deassociate an device. */ if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST)) return -EINVAL; return rdev_set_pan_id(rdev, wpan_dev, pan_id); } static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; __le16 short_addr; /* conflict here while tx/rx calls */ if (netif_running(dev)) return -EBUSY; if (wpan_dev->lowpan_dev) { if (netif_running(wpan_dev->lowpan_dev)) return -EBUSY; } /* don't change address fields on monitor */ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || !info->attrs[NL802154_ATTR_SHORT_ADDR]) return -EINVAL; short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]); /* TODO * I am not sure about to check here on broadcast short_addr. * Broadcast is a valid setting, comment from 802.15.4: * A value of 0xfffe indicates that the device has * associated but has not been allocated an address. A * value of 0xffff indicates that the device does not * have a short address. * * I think we should allow to set these settings but * don't allow to allow socket communication with it. */ if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) || short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)) return -EINVAL; return rdev_set_short_addr(rdev, wpan_dev, short_addr); } static int nl802154_set_backoff_exponent(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; u8 min_be, max_be; /* should be set on netif open inside phy settings */ if (netif_running(dev)) return -EBUSY; if (!info->attrs[NL802154_ATTR_MIN_BE] || !info->attrs[NL802154_ATTR_MAX_BE]) return -EINVAL; min_be = nla_get_u8(info->attrs[NL802154_ATTR_MIN_BE]); max_be = nla_get_u8(info->attrs[NL802154_ATTR_MAX_BE]); /* check 802.15.4 constraints */ if (min_be < rdev->wpan_phy.supported.min_minbe || min_be > rdev->wpan_phy.supported.max_minbe || max_be < rdev->wpan_phy.supported.min_maxbe || max_be > rdev->wpan_phy.supported.max_maxbe || min_be > max_be) return -EINVAL; return rdev_set_backoff_exponent(rdev, wpan_dev, min_be, max_be); } static int nl802154_set_max_csma_backoffs(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; u8 max_csma_backoffs; /* conflict here while other running iface settings */ if (netif_running(dev)) return -EBUSY; if (!info->attrs[NL802154_ATTR_MAX_CSMA_BACKOFFS]) return -EINVAL; max_csma_backoffs = nla_get_u8( info->attrs[NL802154_ATTR_MAX_CSMA_BACKOFFS]); /* check 802.15.4 constraints */ if (max_csma_backoffs < rdev->wpan_phy.supported.min_csma_backoffs || max_csma_backoffs > rdev->wpan_phy.supported.max_csma_backoffs) return -EINVAL; return rdev_set_max_csma_backoffs(rdev, wpan_dev, max_csma_backoffs); } static int nl802154_set_max_frame_retries(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; s8 max_frame_retries; if (netif_running(dev)) return -EBUSY; if (!info->attrs[NL802154_ATTR_MAX_FRAME_RETRIES]) return -EINVAL; max_frame_retries = nla_get_s8( info->attrs[NL802154_ATTR_MAX_FRAME_RETRIES]); /* check 802.15.4 constraints */ if (max_frame_retries < rdev->wpan_phy.supported.min_frame_retries || max_frame_retries > rdev->wpan_phy.supported.max_frame_retries) return -EINVAL; return rdev_set_max_frame_retries(rdev, wpan_dev, max_frame_retries); } static int nl802154_set_lbt_mode(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; int mode; if (netif_running(dev)) return -EBUSY; if (!info->attrs[NL802154_ATTR_LBT_MODE]) return -EINVAL; mode = nla_get_u8(info->attrs[NL802154_ATTR_LBT_MODE]); if (mode != 0 && mode != 1) return -EINVAL; if (!wpan_phy_supported_bool(mode, rdev->wpan_phy.supported.lbt)) return -EINVAL; return rdev_set_lbt_mode(rdev, wpan_dev, mode); } static int nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; int ackreq; if (netif_running(dev)) return -EBUSY; if (!info->attrs[NL802154_ATTR_ACKREQ_DEFAULT]) return -EINVAL; ackreq = nla_get_u8(info->attrs[NL802154_ATTR_ACKREQ_DEFAULT]); if (ackreq != 0 && ackreq != 1) return -EINVAL; return rdev_set_ackreq_default(rdev, wpan_dev, ackreq); } static int nl802154_wpan_phy_netns(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net *net; int err; if (info->attrs[NL802154_ATTR_PID]) { u32 pid = nla_get_u32(info->attrs[NL802154_ATTR_PID]); net = get_net_ns_by_pid(pid); } else if (info->attrs[NL802154_ATTR_NETNS_FD]) { u32 fd = nla_get_u32(info->attrs[NL802154_ATTR_NETNS_FD]); net = get_net_ns_by_fd(fd); } else { return -EINVAL; } if (IS_ERR(net)) return PTR_ERR(net); err = 0; /* check if anything to do */ if (!net_eq(wpan_phy_net(&rdev->wpan_phy), net)) err = cfg802154_switch_netns(rdev, net); put_net(net); return err; } #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = { [NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 }, [NL802154_DEV_ADDR_ATTR_MODE] = { .type = NLA_U32 }, [NL802154_DEV_ADDR_ATTR_SHORT] = { .type = NLA_U16 }, [NL802154_DEV_ADDR_ATTR_EXTENDED] = { .type = NLA_U64 }, }; static int ieee802154_llsec_parse_dev_addr(struct nlattr *nla, struct ieee802154_addr *addr) { struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL)) return -EINVAL; if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || !attrs[NL802154_DEV_ADDR_ATTR_MODE]) return -EINVAL; addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]); addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]); switch (addr->mode) { case NL802154_DEV_ADDR_SHORT: if (!attrs[NL802154_DEV_ADDR_ATTR_SHORT]) return -EINVAL; addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]); break; case NL802154_DEV_ADDR_EXTENDED: if (!attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]) return -EINVAL; addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]); break; default: return -EINVAL; } return 0; } static const struct nla_policy nl802154_key_id_policy[NL802154_KEY_ID_ATTR_MAX + 1] = { [NL802154_KEY_ID_ATTR_MODE] = { .type = NLA_U32 }, [NL802154_KEY_ID_ATTR_INDEX] = { .type = NLA_U8 }, [NL802154_KEY_ID_ATTR_IMPLICIT] = { .type = NLA_NESTED }, [NL802154_KEY_ID_ATTR_SOURCE_SHORT] = { .type = NLA_U32 }, [NL802154_KEY_ID_ATTR_SOURCE_EXTENDED] = { .type = NLA_U64 }, }; static int ieee802154_llsec_parse_key_id(struct nlattr *nla, struct ieee802154_llsec_key_id *desc) { struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla, nl802154_key_id_policy, NULL)) return -EINVAL; if (!attrs[NL802154_KEY_ID_ATTR_MODE]) return -EINVAL; desc->mode = nla_get_u32(attrs[NL802154_KEY_ID_ATTR_MODE]); switch (desc->mode) { case NL802154_KEY_ID_MODE_IMPLICIT: if (!attrs[NL802154_KEY_ID_ATTR_IMPLICIT]) return -EINVAL; if (ieee802154_llsec_parse_dev_addr(attrs[NL802154_KEY_ID_ATTR_IMPLICIT], &desc->device_addr) < 0) return -EINVAL; break; case NL802154_KEY_ID_MODE_INDEX: break; case NL802154_KEY_ID_MODE_INDEX_SHORT: if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT]) return -EINVAL; desc->short_source = nla_get_le32(attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT]); break; case NL802154_KEY_ID_MODE_INDEX_EXTENDED: if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED]) return -EINVAL; desc->extended_source = nla_get_le64(attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED]); break; default: return -EINVAL; } if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) { if (!attrs[NL802154_KEY_ID_ATTR_INDEX]) return -EINVAL; /* TODO change id to idx */ desc->id = nla_get_u8(attrs[NL802154_KEY_ID_ATTR_INDEX]); } return 0; } static int nl802154_set_llsec_params(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct ieee802154_llsec_params params; u32 changed = 0; int ret; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) return -EOPNOTSUPP; if (info->attrs[NL802154_ATTR_SEC_ENABLED]) { u8 enabled; enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]); if (enabled != 0 && enabled != 1) return -EINVAL; params.enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]); changed |= IEEE802154_LLSEC_PARAM_ENABLED; } if (info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID]) { ret = ieee802154_llsec_parse_key_id(info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID], &params.out_key); if (ret < 0) return ret; changed |= IEEE802154_LLSEC_PARAM_OUT_KEY; } if (info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]) { params.out_level = nla_get_u32(info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]); if (params.out_level > NL802154_SECLEVEL_MAX) return -EINVAL; changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL; } if (info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]) { params.frame_counter = nla_get_be32(info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]); changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER; } return rdev_set_llsec_params(rdev, wpan_dev, &params, changed); } static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg802154_registered_device *rdev, struct net_device *dev, const struct ieee802154_llsec_key_entry *key) { void *hdr; u32 commands[NL802154_CMD_FRAME_NR_IDS / 32]; struct nlattr *nl_key, *nl_key_id; hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; nl_key = nla_nest_start(msg, NL802154_ATTR_SEC_KEY); if (!nl_key) goto nla_put_failure; nl_key_id = nla_nest_start(msg, NL802154_KEY_ATTR_ID); if (!nl_key_id) goto nla_put_failure; if (ieee802154_llsec_send_key_id(msg, &key->id) < 0) goto nla_put_failure; nla_nest_end(msg, nl_key_id); if (nla_put_u8(msg, NL802154_KEY_ATTR_USAGE_FRAMES, key->key->frame_types)) goto nla_put_failure; if (key->key->frame_types & BIT(NL802154_FRAME_CMD)) { /* TODO for each nested */ memset(commands, 0, sizeof(commands)); commands[7] = key->key->cmd_frame_ids; if (nla_put(msg, NL802154_KEY_ATTR_USAGE_CMDS, sizeof(commands), commands)) goto nla_put_failure; } if (nla_put(msg, NL802154_KEY_ATTR_BYTES, NL802154_KEY_SIZE, key->key->key)) goto nla_put_failure; nla_nest_end(msg, nl_key); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl802154_dump_llsec_key(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg802154_registered_device *rdev = NULL; struct ieee802154_llsec_key_entry *key; struct ieee802154_llsec_table *table; struct wpan_dev *wpan_dev; int err; err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev); if (err) return err; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { err = skb->len; goto out_err; } if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; } rdev_lock_llsec_table(rdev, wpan_dev); rdev_get_llsec_table(rdev, wpan_dev, &table); /* TODO make it like station dump */ if (cb->args[2]) goto out; list_for_each_entry(key, &table->keys, list) { if (nl802154_send_key(skb, NL802154_CMD_NEW_SEC_KEY, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wpan_dev->netdev, key) < 0) { /* TODO */ err = -EIO; rdev_unlock_llsec_table(rdev, wpan_dev); goto out_err; } } cb->args[2] = 1; out: rdev_unlock_llsec_table(rdev, wpan_dev); err = skb->len; out_err: nl802154_finish_wpan_dev_dump(rdev); return err; } static const struct nla_policy nl802154_key_policy[NL802154_KEY_ATTR_MAX + 1] = { [NL802154_KEY_ATTR_ID] = { NLA_NESTED }, /* TODO handle it as for_each_nested and NLA_FLAG? */ [NL802154_KEY_ATTR_USAGE_FRAMES] = { NLA_U8 }, /* TODO handle it as for_each_nested, not static array? */ [NL802154_KEY_ATTR_USAGE_CMDS] = { .len = NL802154_CMD_FRAME_NR_IDS / 8 }, [NL802154_KEY_ATTR_BYTES] = { .len = NL802154_KEY_SIZE }, }; static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1]; struct ieee802154_llsec_key key = { }; struct ieee802154_llsec_key_id id = { }; u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { }; if (!info->attrs[NL802154_ATTR_SEC_KEY] || nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] || !attrs[NL802154_KEY_ATTR_BYTES]) return -EINVAL; if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) return -ENOBUFS; key.frame_types = nla_get_u8(attrs[NL802154_KEY_ATTR_USAGE_FRAMES]); if (key.frame_types > BIT(NL802154_FRAME_MAX) || ((key.frame_types & BIT(NL802154_FRAME_CMD)) && !attrs[NL802154_KEY_ATTR_USAGE_CMDS])) return -EINVAL; if (attrs[NL802154_KEY_ATTR_USAGE_CMDS]) { /* TODO for each nested */ nla_memcpy(commands, attrs[NL802154_KEY_ATTR_USAGE_CMDS], NL802154_CMD_FRAME_NR_IDS / 8); /* TODO understand the -EINVAL logic here? last condition */ if (commands[0] || commands[1] || commands[2] || commands[3] || commands[4] || commands[5] || commands[6] || commands[7] > BIT(NL802154_CMD_FRAME_MAX)) return -EINVAL; key.cmd_frame_ids = commands[7]; } else { key.cmd_frame_ids = 0; } nla_memcpy(key.key, attrs[NL802154_KEY_ATTR_BYTES], NL802154_KEY_SIZE); if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) return -ENOBUFS; return rdev_add_llsec_key(rdev, wpan_dev, &id, &key); } static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1]; struct ieee802154_llsec_key_id id; if (!info->attrs[NL802154_ATTR_SEC_KEY] || nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) return -ENOBUFS; return rdev_del_llsec_key(rdev, wpan_dev, &id); } static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg802154_registered_device *rdev, struct net_device *dev, const struct ieee802154_llsec_device *dev_desc) { void *hdr; struct nlattr *nl_device; hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; nl_device = nla_nest_start(msg, NL802154_ATTR_SEC_DEVICE); if (!nl_device) goto nla_put_failure; if (nla_put_u32(msg, NL802154_DEV_ATTR_FRAME_COUNTER, dev_desc->frame_counter) || nla_put_le16(msg, NL802154_DEV_ATTR_PAN_ID, dev_desc->pan_id) || nla_put_le16(msg, NL802154_DEV_ATTR_SHORT_ADDR, dev_desc->short_addr) || nla_put_le64(msg, NL802154_DEV_ATTR_EXTENDED_ADDR, dev_desc->hwaddr, NL802154_DEV_ATTR_PAD) || nla_put_u8(msg, NL802154_DEV_ATTR_SECLEVEL_EXEMPT, dev_desc->seclevel_exempt) || nla_put_u32(msg, NL802154_DEV_ATTR_KEY_MODE, dev_desc->key_mode)) goto nla_put_failure; nla_nest_end(msg, nl_device); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl802154_dump_llsec_dev(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg802154_registered_device *rdev = NULL; struct ieee802154_llsec_device *dev; struct ieee802154_llsec_table *table; struct wpan_dev *wpan_dev; int err; err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev); if (err) return err; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { err = skb->len; goto out_err; } if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; } rdev_lock_llsec_table(rdev, wpan_dev); rdev_get_llsec_table(rdev, wpan_dev, &table); /* TODO make it like station dump */ if (cb->args[2]) goto out; list_for_each_entry(dev, &table->devices, list) { if (nl802154_send_device(skb, NL802154_CMD_NEW_SEC_LEVEL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wpan_dev->netdev, dev) < 0) { /* TODO */ err = -EIO; rdev_unlock_llsec_table(rdev, wpan_dev); goto out_err; } } cb->args[2] = 1; out: rdev_unlock_llsec_table(rdev, wpan_dev); err = skb->len; out_err: nl802154_finish_wpan_dev_dump(rdev); return err; } static const struct nla_policy nl802154_dev_policy[NL802154_DEV_ATTR_MAX + 1] = { [NL802154_DEV_ATTR_FRAME_COUNTER] = { NLA_U32 }, [NL802154_DEV_ATTR_PAN_ID] = { .type = NLA_U16 }, [NL802154_DEV_ATTR_SHORT_ADDR] = { .type = NLA_U16 }, [NL802154_DEV_ATTR_EXTENDED_ADDR] = { .type = NLA_U64 }, [NL802154_DEV_ATTR_SECLEVEL_EXEMPT] = { NLA_U8 }, [NL802154_DEV_ATTR_KEY_MODE] = { NLA_U32 }, }; static int ieee802154_llsec_parse_device(struct nlattr *nla, struct ieee802154_llsec_device *dev) { struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla, nl802154_dev_policy, NULL)) return -EINVAL; memset(dev, 0, sizeof(*dev)); if (!attrs[NL802154_DEV_ATTR_FRAME_COUNTER] || !attrs[NL802154_DEV_ATTR_PAN_ID] || !attrs[NL802154_DEV_ATTR_SHORT_ADDR] || !attrs[NL802154_DEV_ATTR_EXTENDED_ADDR] || !attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT] || !attrs[NL802154_DEV_ATTR_KEY_MODE]) return -EINVAL; /* TODO be32 */ dev->frame_counter = nla_get_u32(attrs[NL802154_DEV_ATTR_FRAME_COUNTER]); dev->pan_id = nla_get_le16(attrs[NL802154_DEV_ATTR_PAN_ID]); dev->short_addr = nla_get_le16(attrs[NL802154_DEV_ATTR_SHORT_ADDR]); /* TODO rename hwaddr to extended_addr */ dev->hwaddr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]); dev->seclevel_exempt = nla_get_u8(attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT]); dev->key_mode = nla_get_u32(attrs[NL802154_DEV_ATTR_KEY_MODE]); if (dev->key_mode > NL802154_DEVKEY_MAX || (dev->seclevel_exempt != 0 && dev->seclevel_exempt != 1)) return -EINVAL; return 0; } static int nl802154_add_llsec_dev(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct ieee802154_llsec_device dev_desc; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) return -EOPNOTSUPP; if (ieee802154_llsec_parse_device(info->attrs[NL802154_ATTR_SEC_DEVICE], &dev_desc) < 0) return -EINVAL; return rdev_add_device(rdev, wpan_dev, &dev_desc); } static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; __le64 extended_addr; if (!info->attrs[NL802154_ATTR_SEC_DEVICE] || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], nl802154_dev_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]) return -EINVAL; extended_addr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]); return rdev_del_device(rdev, wpan_dev, extended_addr); } static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg802154_registered_device *rdev, struct net_device *dev, __le64 extended_addr, const struct ieee802154_llsec_device_key *devkey) { void *hdr; struct nlattr *nl_devkey, *nl_key_id; hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; nl_devkey = nla_nest_start(msg, NL802154_ATTR_SEC_DEVKEY); if (!nl_devkey) goto nla_put_failure; if (nla_put_le64(msg, NL802154_DEVKEY_ATTR_EXTENDED_ADDR, extended_addr, NL802154_DEVKEY_ATTR_PAD) || nla_put_u32(msg, NL802154_DEVKEY_ATTR_FRAME_COUNTER, devkey->frame_counter)) goto nla_put_failure; nl_key_id = nla_nest_start(msg, NL802154_DEVKEY_ATTR_ID); if (!nl_key_id) goto nla_put_failure; if (ieee802154_llsec_send_key_id(msg, &devkey->key_id) < 0) goto nla_put_failure; nla_nest_end(msg, nl_key_id); nla_nest_end(msg, nl_devkey); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl802154_dump_llsec_devkey(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg802154_registered_device *rdev = NULL; struct ieee802154_llsec_device_key *kpos; struct ieee802154_llsec_device *dpos; struct ieee802154_llsec_table *table; struct wpan_dev *wpan_dev; int err; err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev); if (err) return err; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { err = skb->len; goto out_err; } if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; } rdev_lock_llsec_table(rdev, wpan_dev); rdev_get_llsec_table(rdev, wpan_dev, &table); /* TODO make it like station dump */ if (cb->args[2]) goto out; /* TODO look if remove devkey and do some nested attribute */ list_for_each_entry(dpos, &table->devices, list) { list_for_each_entry(kpos, &dpos->keys, list) { if (nl802154_send_devkey(skb, NL802154_CMD_NEW_SEC_LEVEL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wpan_dev->netdev, dpos->hwaddr, kpos) < 0) { /* TODO */ err = -EIO; rdev_unlock_llsec_table(rdev, wpan_dev); goto out_err; } } } cb->args[2] = 1; out: rdev_unlock_llsec_table(rdev, wpan_dev); err = skb->len; out_err: nl802154_finish_wpan_dev_dump(rdev); return err; } static const struct nla_policy nl802154_devkey_policy[NL802154_DEVKEY_ATTR_MAX + 1] = { [NL802154_DEVKEY_ATTR_FRAME_COUNTER] = { NLA_U32 }, [NL802154_DEVKEY_ATTR_EXTENDED_ADDR] = { NLA_U64 }, [NL802154_DEVKEY_ATTR_ID] = { NLA_NESTED }, }; static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1]; struct ieee802154_llsec_device_key key; __le64 extended_addr; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) return -EOPNOTSUPP; if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack) < 0) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] || !attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]) return -EINVAL; /* TODO change key.id ? */ if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID], &key.key_id) < 0) return -ENOBUFS; /* TODO be32 */ key.frame_counter = nla_get_u32(attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER]); /* TODO change naming hwaddr -> extended_addr * check unique identifier short+pan OR extended_addr */ extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]); return rdev_add_devkey(rdev, wpan_dev, extended_addr, &key); } static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1]; struct ieee802154_llsec_device_key key; __le64 extended_addr; if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]) return -EINVAL; /* TODO change key.id ? */ if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID], &key.key_id) < 0) return -ENOBUFS; /* TODO change naming hwaddr -> extended_addr * check unique identifier short+pan OR extended_addr */ extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]); return rdev_del_devkey(rdev, wpan_dev, extended_addr, &key); } static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg802154_registered_device *rdev, struct net_device *dev, const struct ieee802154_llsec_seclevel *sl) { void *hdr; struct nlattr *nl_seclevel; hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; nl_seclevel = nla_nest_start(msg, NL802154_ATTR_SEC_LEVEL); if (!nl_seclevel) goto nla_put_failure; if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_FRAME, sl->frame_type) || nla_put_u32(msg, NL802154_SECLEVEL_ATTR_LEVELS, sl->sec_levels) || nla_put_u8(msg, NL802154_SECLEVEL_ATTR_DEV_OVERRIDE, sl->device_override)) goto nla_put_failure; if (sl->frame_type == NL802154_FRAME_CMD) { if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_CMD_FRAME, sl->cmd_frame_id)) goto nla_put_failure; } nla_nest_end(msg, nl_seclevel); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl802154_dump_llsec_seclevel(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg802154_registered_device *rdev = NULL; struct ieee802154_llsec_seclevel *sl; struct ieee802154_llsec_table *table; struct wpan_dev *wpan_dev; int err; err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev); if (err) return err; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) { err = skb->len; goto out_err; } if (!wpan_dev->netdev) { err = -EINVAL; goto out_err; } rdev_lock_llsec_table(rdev, wpan_dev); rdev_get_llsec_table(rdev, wpan_dev, &table); /* TODO make it like station dump */ if (cb->args[2]) goto out; list_for_each_entry(sl, &table->security_levels, list) { if (nl802154_send_seclevel(skb, NL802154_CMD_NEW_SEC_LEVEL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wpan_dev->netdev, sl) < 0) { /* TODO */ err = -EIO; rdev_unlock_llsec_table(rdev, wpan_dev); goto out_err; } } cb->args[2] = 1; out: rdev_unlock_llsec_table(rdev, wpan_dev); err = skb->len; out_err: nl802154_finish_wpan_dev_dump(rdev); return err; } static const struct nla_policy nl802154_seclevel_policy[NL802154_SECLEVEL_ATTR_MAX + 1] = { [NL802154_SECLEVEL_ATTR_LEVELS] = { .type = NLA_U8 }, [NL802154_SECLEVEL_ATTR_FRAME] = { .type = NLA_U32 }, [NL802154_SECLEVEL_ATTR_CMD_FRAME] = { .type = NLA_U32 }, [NL802154_SECLEVEL_ATTR_DEV_OVERRIDE] = { .type = NLA_U8 }, }; static int llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl) { struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla, nl802154_seclevel_policy, NULL)) return -EINVAL; memset(sl, 0, sizeof(*sl)); if (!attrs[NL802154_SECLEVEL_ATTR_LEVELS] || !attrs[NL802154_SECLEVEL_ATTR_FRAME] || !attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE]) return -EINVAL; sl->sec_levels = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_LEVELS]); sl->frame_type = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_FRAME]); sl->device_override = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE]); if (sl->frame_type > NL802154_FRAME_MAX || (sl->device_override != 0 && sl->device_override != 1)) return -EINVAL; if (sl->frame_type == NL802154_FRAME_CMD) { if (!attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME]) return -EINVAL; sl->cmd_frame_id = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME]); if (sl->cmd_frame_id > NL802154_CMD_FRAME_MAX) return -EINVAL; } return 0; } static int nl802154_add_llsec_seclevel(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct ieee802154_llsec_seclevel sl; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) return -EOPNOTSUPP; if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], &sl) < 0) return -EINVAL; return rdev_add_seclevel(rdev, wpan_dev, &sl); } static int nl802154_del_llsec_seclevel(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct ieee802154_llsec_seclevel sl; if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) return -EOPNOTSUPP; if (!info->attrs[NL802154_ATTR_SEC_LEVEL] || llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], &sl) < 0) return -EINVAL; return rdev_del_seclevel(rdev, wpan_dev, &sl); } #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ #define NL802154_FLAG_NEED_WPAN_PHY 0x01 #define NL802154_FLAG_NEED_NETDEV 0x02 #define NL802154_FLAG_NEED_RTNL 0x04 #define NL802154_FLAG_CHECK_NETDEV_UP 0x08 #define NL802154_FLAG_NEED_NETDEV_UP (NL802154_FLAG_NEED_NETDEV |\ NL802154_FLAG_CHECK_NETDEV_UP) #define NL802154_FLAG_NEED_WPAN_DEV 0x10 #define NL802154_FLAG_NEED_WPAN_DEV_UP (NL802154_FLAG_NEED_WPAN_DEV |\ NL802154_FLAG_CHECK_NETDEV_UP) static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev; struct wpan_dev *wpan_dev; struct net_device *dev; bool rtnl = ops->internal_flags & NL802154_FLAG_NEED_RTNL; if (rtnl) rtnl_lock(); if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_PHY) { rdev = cfg802154_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { if (rtnl) rtnl_unlock(); return PTR_ERR(rdev); } info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL802154_FLAG_NEED_NETDEV || ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) { ASSERT_RTNL(); wpan_dev = __cfg802154_wpan_dev_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(wpan_dev)) { if (rtnl) rtnl_unlock(); return PTR_ERR(wpan_dev); } dev = wpan_dev->netdev; rdev = wpan_phy_to_rdev(wpan_dev->wpan_phy); if (ops->internal_flags & NL802154_FLAG_NEED_NETDEV) { if (!dev) { if (rtnl) rtnl_unlock(); return -EINVAL; } info->user_ptr[1] = dev; } else { info->user_ptr[1] = wpan_dev; } if (dev) { if (ops->internal_flags & NL802154_FLAG_CHECK_NETDEV_UP && !netif_running(dev)) { if (rtnl) rtnl_unlock(); return -ENETDOWN; } dev_hold(dev); } info->user_ptr[0] = rdev; } return 0; } static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { if (info->user_ptr[1]) { if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) { struct wpan_dev *wpan_dev = info->user_ptr[1]; if (wpan_dev->netdev) dev_put(wpan_dev->netdev); } else { dev_put(info->user_ptr[1]); } } if (ops->internal_flags & NL802154_FLAG_NEED_RTNL) rtnl_unlock(); } static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_GET_WPAN_PHY, .doit = nl802154_get_wpan_phy, .dumpit = nl802154_dump_wpan_phy, .done = nl802154_dump_wpan_phy_done, .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_GET_INTERFACE, .doit = nl802154_get_interface, .dumpit = nl802154_dump_interface, .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_INTERFACE, .doit = nl802154_new_interface, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_INTERFACE, .doit = nl802154_del_interface, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_CHANNEL, .doit = nl802154_set_channel, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_CCA_MODE, .doit = nl802154_set_cca_mode, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_CCA_ED_LEVEL, .doit = nl802154_set_cca_ed_level, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_TX_POWER, .doit = nl802154_set_tx_power, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_WPAN_PHY_NETNS, .doit = nl802154_wpan_phy_netns, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_PAN_ID, .doit = nl802154_set_pan_id, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_SHORT_ADDR, .doit = nl802154_set_short_addr, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_BACKOFF_EXPONENT, .doit = nl802154_set_backoff_exponent, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_MAX_CSMA_BACKOFFS, .doit = nl802154_set_max_csma_backoffs, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_MAX_FRAME_RETRIES, .doit = nl802154_set_max_frame_retries, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_LBT_MODE, .doit = nl802154_set_lbt_mode, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_ACKREQ_DEFAULT, .doit = nl802154_set_ackreq_default, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL { .cmd = NL802154_CMD_SET_SEC_PARAMS, .doit = nl802154_set_llsec_params, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_GET_SEC_KEY, /* TODO .doit by matching key id? */ .dumpit = nl802154_dump_llsec_key, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_KEY, .doit = nl802154_add_llsec_key, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_KEY, .doit = nl802154_del_llsec_key, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, /* TODO unique identifier must short+pan OR extended_addr */ { .cmd = NL802154_CMD_GET_SEC_DEV, /* TODO .doit by matching extended_addr? */ .dumpit = nl802154_dump_llsec_dev, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_DEV, .doit = nl802154_add_llsec_dev, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_DEV, .doit = nl802154_del_llsec_dev, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, /* TODO remove complete devkey, put it as nested? */ { .cmd = NL802154_CMD_GET_SEC_DEVKEY, /* TODO doit by matching ??? */ .dumpit = nl802154_dump_llsec_devkey, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_DEVKEY, .doit = nl802154_add_llsec_devkey, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_DEVKEY, .doit = nl802154_del_llsec_devkey, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_GET_SEC_LEVEL, /* TODO .doit by matching frame_type? */ .dumpit = nl802154_dump_llsec_seclevel, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_LEVEL, .doit = nl802154_add_llsec_seclevel, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_LEVEL, /* TODO match frame_type only? */ .doit = nl802154_del_llsec_seclevel, .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; static struct genl_family nl802154_fam __ro_after_init = { .name = NL802154_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL802154_ATTR_MAX, .netnsok = true, .pre_doit = nl802154_pre_doit, .post_doit = nl802154_post_doit, .module = THIS_MODULE, .ops = nl802154_ops, .n_ops = ARRAY_SIZE(nl802154_ops), .mcgrps = nl802154_mcgrps, .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps), }; /* initialisation/exit functions */ int __init nl802154_init(void) { return genl_register_family(&nl802154_fam); } void nl802154_exit(void) { genl_unregister_family(&nl802154_fam); }
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM v4l2 #if !defined(_TRACE_V4L2_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_V4L2_H #include <linux/tracepoint.h> #include <media/videobuf2-v4l2.h> /* Enums require being exported to userspace, for user tool parsing */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); #define show_type(type) \ __print_symbolic(type, SHOW_TYPE) #define SHOW_TYPE \ EM( V4L2_BUF_TYPE_VIDEO_CAPTURE, "VIDEO_CAPTURE" ) \ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT, "VIDEO_OUTPUT" ) \ EM( V4L2_BUF_TYPE_VIDEO_OVERLAY, "VIDEO_OVERLAY" ) \ EM( V4L2_BUF_TYPE_VBI_CAPTURE, "VBI_CAPTURE" ) \ EM( V4L2_BUF_TYPE_VBI_OUTPUT, "VBI_OUTPUT" ) \ EM( V4L2_BUF_TYPE_SLICED_VBI_CAPTURE, "SLICED_VBI_CAPTURE" ) \ EM( V4L2_BUF_TYPE_SLICED_VBI_OUTPUT, "SLICED_VBI_OUTPUT" ) \ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY, "VIDEO_OUTPUT_OVERLAY" ) \ EM( V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, "VIDEO_CAPTURE_MPLANE" ) \ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, "VIDEO_OUTPUT_MPLANE" ) \ EM( V4L2_BUF_TYPE_SDR_CAPTURE, "SDR_CAPTURE" ) \ EM( V4L2_BUF_TYPE_SDR_OUTPUT, "SDR_OUTPUT" ) \ EM( V4L2_BUF_TYPE_META_CAPTURE, "META_CAPTURE" ) \ EMe(V4L2_BUF_TYPE_PRIVATE, "PRIVATE" ) SHOW_TYPE #define show_field(field) \ __print_symbolic(field, SHOW_FIELD) #define SHOW_FIELD \ EM( V4L2_FIELD_ANY, "ANY" ) \ EM( V4L2_FIELD_NONE, "NONE" ) \ EM( V4L2_FIELD_TOP, "TOP" ) \ EM( V4L2_FIELD_BOTTOM, "BOTTOM" ) \ EM( V4L2_FIELD_INTERLACED, "INTERLACED" ) \ EM( V4L2_FIELD_SEQ_TB, "SEQ_TB" ) \ EM( V4L2_FIELD_SEQ_BT, "SEQ_BT" ) \ EM( V4L2_FIELD_ALTERNATE, "ALTERNATE" ) \ EM( V4L2_FIELD_INTERLACED_TB, "INTERLACED_TB" ) \ EMe( V4L2_FIELD_INTERLACED_BT, "INTERLACED_BT" ) SHOW_FIELD /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) {a, b}, #define EMe(a, b) {a, b} /* V4L2_TC_TYPE_* are macros, not defines, they do not need processing */ #define show_timecode_type(type) \ __print_symbolic(type, \ { V4L2_TC_TYPE_24FPS, "24FPS" }, \ { V4L2_TC_TYPE_25FPS, "25FPS" }, \ { V4L2_TC_TYPE_30FPS, "30FPS" }, \ { V4L2_TC_TYPE_50FPS, "50FPS" }, \ { V4L2_TC_TYPE_60FPS, "60FPS" }) #define show_flags(flags) \ __print_flags(flags, "|", \ { V4L2_BUF_FLAG_MAPPED, "MAPPED" }, \ { V4L2_BUF_FLAG_QUEUED, "QUEUED" }, \ { V4L2_BUF_FLAG_DONE, "DONE" }, \ { V4L2_BUF_FLAG_KEYFRAME, "KEYFRAME" }, \ { V4L2_BUF_FLAG_PFRAME, "PFRAME" }, \ { V4L2_BUF_FLAG_BFRAME, "BFRAME" }, \ { V4L2_BUF_FLAG_ERROR, "ERROR" }, \ { V4L2_BUF_FLAG_TIMECODE, "TIMECODE" }, \ { V4L2_BUF_FLAG_PREPARED, "PREPARED" }, \ { V4L2_BUF_FLAG_NO_CACHE_INVALIDATE, "NO_CACHE_INVALIDATE" }, \ { V4L2_BUF_FLAG_NO_CACHE_CLEAN, "NO_CACHE_CLEAN" }, \ { V4L2_BUF_FLAG_TIMESTAMP_MASK, "TIMESTAMP_MASK" }, \ { V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN, "TIMESTAMP_UNKNOWN" }, \ { V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, "TIMESTAMP_MONOTONIC" }, \ { V4L2_BUF_FLAG_TIMESTAMP_COPY, "TIMESTAMP_COPY" }, \ { V4L2_BUF_FLAG_LAST, "LAST" }) #define show_timecode_flags(flags) \ __print_flags(flags, "|", \ { V4L2_TC_FLAG_DROPFRAME, "DROPFRAME" }, \ { V4L2_TC_FLAG_COLORFRAME, "COLORFRAME" }, \ { V4L2_TC_USERBITS_USERDEFINED, "USERBITS_USERDEFINED" }, \ { V4L2_TC_USERBITS_8BITCHARS, "USERBITS_8BITCHARS" }) DECLARE_EVENT_CLASS(v4l2_event_class, TP_PROTO(int minor, struct v4l2_buffer *buf), TP_ARGS(minor, buf), TP_STRUCT__entry( __field(int, minor) __field(u32, index) __field(u32, type) __field(u32, bytesused) __field(u32, flags) __field(u32, field) __field(s64, timestamp) __field(u32, timecode_type) __field(u32, timecode_flags) __field(u8, timecode_frames) __field(u8, timecode_seconds) __field(u8, timecode_minutes) __field(u8, timecode_hours) __field(u8, timecode_userbits0) __field(u8, timecode_userbits1) __field(u8, timecode_userbits2) __field(u8, timecode_userbits3) __field(u32, sequence) ), TP_fast_assign( __entry->minor = minor; __entry->index = buf->index; __entry->type = buf->type; __entry->bytesused = buf->bytesused; __entry->flags = buf->flags; __entry->field = buf->field; __entry->timestamp = timeval_to_ns(&buf->timestamp); __entry->timecode_type = buf->timecode.type; __entry->timecode_flags = buf->timecode.flags; __entry->timecode_frames = buf->timecode.frames; __entry->timecode_seconds = buf->timecode.seconds; __entry->timecode_minutes = buf->timecode.minutes; __entry->timecode_hours = buf->timecode.hours; __entry->timecode_userbits0 = buf->timecode.userbits[0]; __entry->timecode_userbits1 = buf->timecode.userbits[1]; __entry->timecode_userbits2 = buf->timecode.userbits[2]; __entry->timecode_userbits3 = buf->timecode.userbits[3]; __entry->sequence = buf->sequence; ), TP_printk("minor = %d, index = %u, type = %s, bytesused = %u, " "flags = %s, field = %s, timestamp = %llu, " "timecode = { type = %s, flags = %s, frames = %u, " "seconds = %u, minutes = %u, hours = %u, " "userbits = { %u %u %u %u } }, sequence = %u", __entry->minor, __entry->index, show_type(__entry->type), __entry->bytesused, show_flags(__entry->flags), show_field(__entry->field), __entry->timestamp, show_timecode_type(__entry->timecode_type), show_timecode_flags(__entry->timecode_flags), __entry->timecode_frames, __entry->timecode_seconds, __entry->timecode_minutes, __entry->timecode_hours, __entry->timecode_userbits0, __entry->timecode_userbits1, __entry->timecode_userbits2, __entry->timecode_userbits3, __entry->sequence ) ) DEFINE_EVENT(v4l2_event_class, v4l2_dqbuf, TP_PROTO(int minor, struct v4l2_buffer *buf), TP_ARGS(minor, buf) ); DEFINE_EVENT(v4l2_event_class, v4l2_qbuf, TP_PROTO(int minor, struct v4l2_buffer *buf), TP_ARGS(minor, buf) ); DECLARE_EVENT_CLASS(vb2_v4l2_event_class, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb), TP_STRUCT__entry( __field(int, minor) __field(u32, flags) __field(u32, field) __field(u64, timestamp) __field(u32, timecode_type) __field(u32, timecode_flags) __field(u8, timecode_frames) __field(u8, timecode_seconds) __field(u8, timecode_minutes) __field(u8, timecode_hours) __field(u8, timecode_userbits0) __field(u8, timecode_userbits1) __field(u8, timecode_userbits2) __field(u8, timecode_userbits3) __field(u32, sequence) ), TP_fast_assign( struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct v4l2_fh *owner = q->owner; __entry->minor = owner ? owner->vdev->minor : -1; __entry->flags = vbuf->flags; __entry->field = vbuf->field; __entry->timestamp = vb->timestamp; __entry->timecode_type = vbuf->timecode.type; __entry->timecode_flags = vbuf->timecode.flags; __entry->timecode_frames = vbuf->timecode.frames; __entry->timecode_seconds = vbuf->timecode.seconds; __entry->timecode_minutes = vbuf->timecode.minutes; __entry->timecode_hours = vbuf->timecode.hours; __entry->timecode_userbits0 = vbuf->timecode.userbits[0]; __entry->timecode_userbits1 = vbuf->timecode.userbits[1]; __entry->timecode_userbits2 = vbuf->timecode.userbits[2]; __entry->timecode_userbits3 = vbuf->timecode.userbits[3]; __entry->sequence = vbuf->sequence; ), TP_printk("minor=%d flags = %s, field = %s, " "timestamp = %llu, timecode = { type = %s, flags = %s, " "frames = %u, seconds = %u, minutes = %u, hours = %u, " "userbits = { %u %u %u %u } }, sequence = %u", __entry->minor, show_flags(__entry->flags), show_field(__entry->field), __entry->timestamp, show_timecode_type(__entry->timecode_type), show_timecode_flags(__entry->timecode_flags), __entry->timecode_frames, __entry->timecode_seconds, __entry->timecode_minutes, __entry->timecode_hours, __entry->timecode_userbits0, __entry->timecode_userbits1, __entry->timecode_userbits2, __entry->timecode_userbits3, __entry->sequence ) ) DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_buf_done, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_buf_queue, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_dqbuf, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_qbuf, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); #endif /* if !defined(_TRACE_V4L2_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ #include <trace/define_trace.h>
8 16 6 10 3 2 7 6 16 16 35 35 22 35 5 5 1 17 1 1 1 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 /* * CIPSO - Commercial IP Security Option * * This is an implementation of the CIPSO 2.2 protocol as specified in * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in * FIPS-188. While CIPSO never became a full IETF RFC standard many vendors * have chosen to adopt the protocol and over the years it has become a * de-facto standard for labeled networking. * * The CIPSO draft specification can be found in the kernel's Documentation * directory as well as the following URL: * http://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt * The FIPS-188 specification can be found at the following URL: * http://www.itl.nist.gov/fipspubs/fip188.htm * * Author: Paul Moore <paul.moore@hp.com> * */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ #include <linux/init.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/jhash.h> #include <linux/audit.h> #include <linux/slab.h> #include <net/ip.h> #include <net/icmp.h> #include <net/tcp.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <linux/atomic.h> #include <linux/bug.h> #include <asm/unaligned.h> /* List of available DOI definitions */ /* XXX - This currently assumes a minimal number of different DOIs in use, * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we * can do quick lookups. */ static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); static LIST_HEAD(cipso_v4_doi_list); /* Label mapping cache */ int cipso_v4_cache_enabled = 1; int cipso_v4_cache_bucketsize = 10; #define CIPSO_V4_CACHE_BUCKETBITS 7 #define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS) #define CIPSO_V4_CACHE_REORDERLIMIT 10 struct cipso_v4_map_cache_bkt { spinlock_t lock; u32 size; struct list_head list; }; struct cipso_v4_map_cache_entry { u32 hash; unsigned char *key; size_t key_len; struct netlbl_lsm_cache *lsm_data; u32 activity; struct list_head list; }; static struct cipso_v4_map_cache_bkt *cipso_v4_cache; /* Restricted bitmap (tag #1) flags */ int cipso_v4_rbm_optfmt = 0; int cipso_v4_rbm_strictvalid = 1; /* * Protocol Constants */ /* Maximum size of the CIPSO IP option, derived from the fact that the maximum * IPv4 header size is 60 bytes and the base IPv4 header is 20 bytes long. */ #define CIPSO_V4_OPT_LEN_MAX 40 /* Length of the base CIPSO option, this includes the option type (1 byte), the * option length (1 byte), and the DOI (4 bytes). */ #define CIPSO_V4_HDR_LEN 6 /* Base length of the restrictive category bitmap tag (tag #1). */ #define CIPSO_V4_TAG_RBM_BLEN 4 /* Base length of the enumerated category tag (tag #2). */ #define CIPSO_V4_TAG_ENUM_BLEN 4 /* Base length of the ranged categories bitmap tag (tag #5). */ #define CIPSO_V4_TAG_RNG_BLEN 4 /* The maximum number of category ranges permitted in the ranged category tag * (tag #5). You may note that the IETF draft states that the maximum number * of category ranges is 7, but if the low end of the last category range is * zero then it is possible to fit 8 category ranges because the zero should * be omitted. */ #define CIPSO_V4_TAG_RNG_CAT_MAX 8 /* Base length of the local tag (non-standard tag). * Tag definition (may change between kernel versions) * * 0 8 16 24 32 * +----------+----------+----------+----------+ * | 10000000 | 00000110 | 32-bit secid value | * +----------+----------+----------+----------+ * | in (host byte order)| * +----------+----------+ * */ #define CIPSO_V4_TAG_LOC_BLEN 6 /* * Helper Functions */ /** * cipso_v4_cache_entry_free - Frees a cache entry * @entry: the entry to free * * Description: * This function frees the memory associated with a cache entry including the * LSM cache data if there are no longer any users, i.e. reference count == 0. * */ static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) { if (entry->lsm_data) netlbl_secattr_cache_free(entry->lsm_data); kfree(entry->key); kfree(entry); } /** * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache * @key: the hash key * @key_len: the length of the key in bytes * * Description: * The CIPSO tag hashing function. Returns a 32-bit hash value. * */ static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) { return jhash(key, key_len, 0); } /* * Label Mapping Cache Functions */ /** * cipso_v4_cache_init - Initialize the CIPSO cache * * Description: * Initializes the CIPSO label mapping cache, this function should be called * before any of the other functions defined in this file. Returns zero on * success, negative values on error. * */ static int __init cipso_v4_cache_init(void) { u32 iter; cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, sizeof(struct cipso_v4_map_cache_bkt), GFP_KERNEL); if (!cipso_v4_cache) return -ENOMEM; for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { spin_lock_init(&cipso_v4_cache[iter].lock); cipso_v4_cache[iter].size = 0; INIT_LIST_HEAD(&cipso_v4_cache[iter].list); } return 0; } /** * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache * * Description: * Invalidates and frees any entries in the CIPSO cache. Returns zero on * success and negative values on failure. * */ void cipso_v4_cache_invalidate(void) { struct cipso_v4_map_cache_entry *entry, *tmp_entry; u32 iter; for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { spin_lock_bh(&cipso_v4_cache[iter].lock); list_for_each_entry_safe(entry, tmp_entry, &cipso_v4_cache[iter].list, list) { list_del(&entry->list); cipso_v4_cache_entry_free(entry); } cipso_v4_cache[iter].size = 0; spin_unlock_bh(&cipso_v4_cache[iter].lock); } } /** * cipso_v4_cache_check - Check the CIPSO cache for a label mapping * @key: the buffer to check * @key_len: buffer length in bytes * @secattr: the security attribute struct to use * * Description: * This function checks the cache to see if a label mapping already exists for * the given key. If there is a match then the cache is adjusted and the * @secattr struct is populated with the correct LSM security attributes. The * cache is adjusted in the following manner if the entry is not already the * first in the cache bucket: * * 1. The cache entry's activity counter is incremented * 2. The previous (higher ranking) entry's activity counter is decremented * 3. If the difference between the two activity counters is geater than * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped * * Returns zero on success, -ENOENT for a cache miss, and other negative values * on error. * */ static int cipso_v4_cache_check(const unsigned char *key, u32 key_len, struct netlbl_lsm_secattr *secattr) { u32 bkt; struct cipso_v4_map_cache_entry *entry; struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); bkt = hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { if (entry->hash == hash && entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; refcount_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CIPSOV4; if (!prev_entry) { spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } if (prev_entry->activity > 0) prev_entry->activity -= 1; if (entry->activity > prev_entry->activity && entry->activity - prev_entry->activity > CIPSO_V4_CACHE_REORDERLIMIT) { __list_del(entry->list.prev, entry->list.next); __list_add(&entry->list, prev_entry->list.prev, &prev_entry->list); } spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } prev_entry = entry; } spin_unlock_bh(&cipso_v4_cache[bkt].lock); return -ENOENT; } /** * cipso_v4_cache_add - Add an entry to the CIPSO cache * @skb: the packet * @secattr: the packet's security attributes * * Description: * Add a new entry into the CIPSO label mapping cache. Add the new entry to * head of the cache bucket's list, if the cache bucket is out of room remove * the last entry in the list first. It is important to note that there is * currently no checking for duplicate keys. Returns zero on success, * negative values on failure. * */ int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; entry->key = kmemdup(cipso_ptr, cipso_ptr_len, GFP_ATOMIC); if (!entry->key) { ret_val = -ENOMEM; goto cache_add_failure; } entry->key_len = cipso_ptr_len; entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); refcount_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { old_entry = list_entry(cipso_v4_cache[bkt].list.prev, struct cipso_v4_map_cache_entry, list); list_del(&old_entry->list); list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache_entry_free(old_entry); } spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; cache_add_failure: if (entry) cipso_v4_cache_entry_free(entry); return ret_val; } /* * DOI List Functions */ /** * cipso_v4_doi_search - Searches for a DOI definition * @doi: the DOI to search for * * Description: * Search the DOI definition list for a DOI definition with a DOI value that * matches @doi. The caller is responsible for calling rcu_read_[un]lock(). * Returns a pointer to the DOI definition on success and NULL on failure. */ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) { struct cipso_v4_doi *iter; list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) if (iter->doi == doi && refcount_read(&iter->refcount)) return iter; return NULL; } /** * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine * @doi_def: the DOI structure * @audit_info: NetLabel audit information * * Description: * The caller defines a new DOI for use by the CIPSO engine and calls this * function to add it to the list of acceptable domains. The caller must * ensure that the mapping table specified in @doi_def->map meets all of the * requirements of the mapping type (see cipso_ipv4.h for details). Returns * zero on success and non-zero on failure. * */ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def, struct netlbl_audit *audit_info) { int ret_val = -EINVAL; u32 iter; u32 doi; u32 doi_type; struct audit_buffer *audit_buf; doi = doi_def->doi; doi_type = doi_def->type; if (doi_def->doi == CIPSO_V4_DOI_UNKNOWN) goto doi_add_return; for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) { switch (doi_def->tags[iter]) { case CIPSO_V4_TAG_RBITMAP: break; case CIPSO_V4_TAG_RANGE: case CIPSO_V4_TAG_ENUM: if (doi_def->type != CIPSO_V4_MAP_PASS) goto doi_add_return; break; case CIPSO_V4_TAG_LOCAL: if (doi_def->type != CIPSO_V4_MAP_LOCAL) goto doi_add_return; break; case CIPSO_V4_TAG_INVALID: if (iter == 0) goto doi_add_return; break; default: goto doi_add_return; } } refcount_set(&doi_def->refcount, 1); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi)) { spin_unlock(&cipso_v4_doi_list_lock); ret_val = -EEXIST; goto doi_add_return; } list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); spin_unlock(&cipso_v4_doi_list_lock); ret_val = 0; doi_add_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CIPSOV4_ADD, audit_info); if (audit_buf) { const char *type_str; switch (doi_type) { case CIPSO_V4_MAP_TRANS: type_str = "trans"; break; case CIPSO_V4_MAP_PASS: type_str = "pass"; break; case CIPSO_V4_MAP_LOCAL: type_str = "local"; break; default: type_str = "(unknown)"; } audit_log_format(audit_buf, " cipso_doi=%u cipso_type=%s res=%u", doi, type_str, ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * cipso_v4_doi_free - Frees a DOI definition * @doi_def: the DOI definition * * Description: * This function frees all of the memory associated with a DOI definition. * */ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) { if (!doi_def) return; switch (doi_def->type) { case CIPSO_V4_MAP_TRANS: kfree(doi_def->map.std->lvl.cipso); kfree(doi_def->map.std->lvl.local); kfree(doi_def->map.std->cat.cipso); kfree(doi_def->map.std->cat.local); kfree(doi_def->map.std); break; } kfree(doi_def); } /** * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer * @entry: the entry's RCU field * * Description: * This function is designed to be used as a callback to the call_rcu() * function so that the memory allocated to the DOI definition can be released * safely. * */ static void cipso_v4_doi_free_rcu(struct rcu_head *entry) { struct cipso_v4_doi *doi_def; doi_def = container_of(entry, struct cipso_v4_doi, rcu); cipso_v4_doi_free(doi_def); } /** * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine * @doi: the DOI value * @audit_secid: the LSM secid to use in the audit message * * Description: * Removes a DOI definition from the CIPSO engine. The NetLabel routines will * be called to release their own LSM domain mappings as well as our own * domain list. Returns zero on success and negative values on failure. * */ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) { int ret_val; struct cipso_v4_doi *doi_def; struct audit_buffer *audit_buf; spin_lock(&cipso_v4_doi_list_lock); doi_def = cipso_v4_doi_search(doi); if (!doi_def) { spin_unlock(&cipso_v4_doi_list_lock); ret_val = -ENOENT; goto doi_remove_return; } list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); cipso_v4_doi_putdef(doi_def); ret_val = 0; doi_remove_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CIPSOV4_DEL, audit_info); if (audit_buf) { audit_log_format(audit_buf, " cipso_doi=%u res=%u", doi, ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition * @doi: the DOI value * * Description: * Searches for a valid DOI definition and if one is found it is returned to * the caller. Otherwise NULL is returned. The caller must ensure that * rcu_read_lock() is held while accessing the returned definition and the DOI * definition reference count is decremented when the caller is done. * */ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) { struct cipso_v4_doi *doi_def; rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); if (!doi_def) goto doi_getdef_return; if (!refcount_inc_not_zero(&doi_def->refcount)) doi_def = NULL; doi_getdef_return: rcu_read_unlock(); return doi_def; } /** * cipso_v4_doi_putdef - Releases a reference for the given DOI definition * @doi_def: the DOI definition * * Description: * Releases a DOI definition reference obtained from cipso_v4_doi_getdef(). * */ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) { if (!doi_def) return; if (!refcount_dec_and_test(&doi_def->refcount)) return; cipso_v4_cache_invalidate(); call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); } /** * cipso_v4_doi_walk - Iterate through the DOI definitions * @skip_cnt: skip past this number of DOI definitions, updated * @callback: callback for each DOI definition * @cb_arg: argument for the callback function * * Description: * Iterate over the DOI definition list, skipping the first @skip_cnt entries. * For each entry call @callback, if @callback returns a negative value stop * 'walking' through the list and return. Updates the value in @skip_cnt upon * return. Returns zero on success, negative values on failure. * */ int cipso_v4_doi_walk(u32 *skip_cnt, int (*callback) (struct cipso_v4_doi *doi_def, void *arg), void *cb_arg) { int ret_val = -ENOENT; u32 doi_cnt = 0; struct cipso_v4_doi *iter_doi; rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) if (refcount_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); if (ret_val < 0) { doi_cnt--; goto doi_walk_return; } } doi_walk_return: rcu_read_unlock(); *skip_cnt = doi_cnt; return ret_val; } /* * Label Mapping Functions */ /** * cipso_v4_map_lvl_valid - Checks to see if the given level is understood * @doi_def: the DOI definition * @level: the level to check * * Description: * Checks the given level against the given DOI definition and returns a * negative value if the level does not have a valid mapping and a zero value * if the level is defined by the DOI. * */ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) { switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; case CIPSO_V4_MAP_TRANS: if ((level < doi_def->map.std->lvl.cipso_size) && (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)) return 0; break; } return -EFAULT; } /** * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network * @doi_def: the DOI definition * @host_lvl: the host MLS level * @net_lvl: the network/CIPSO MLS level * * Description: * Perform a label mapping to translate a local MLS level to the correct * CIPSO level using the given DOI definition. Returns zero on success, * negative values otherwise. * */ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, u32 host_lvl, u32 *net_lvl) { switch (doi_def->type) { case CIPSO_V4_MAP_PASS: *net_lvl = host_lvl; return 0; case CIPSO_V4_MAP_TRANS: if (host_lvl < doi_def->map.std->lvl.local_size && doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { *net_lvl = doi_def->map.std->lvl.local[host_lvl]; return 0; } return -EPERM; } return -EINVAL; } /** * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host * @doi_def: the DOI definition * @net_lvl: the network/CIPSO MLS level * @host_lvl: the host MLS level * * Description: * Perform a label mapping to translate a CIPSO level to the correct local MLS * level using the given DOI definition. Returns zero on success, negative * values otherwise. * */ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, u32 net_lvl, u32 *host_lvl) { struct cipso_v4_std_map_tbl *map_tbl; switch (doi_def->type) { case CIPSO_V4_MAP_PASS: *host_lvl = net_lvl; return 0; case CIPSO_V4_MAP_TRANS: map_tbl = doi_def->map.std; if (net_lvl < map_tbl->lvl.cipso_size && map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { *host_lvl = doi_def->map.std->lvl.cipso[net_lvl]; return 0; } return -EPERM; } return -EINVAL; } /** * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid * @doi_def: the DOI definition * @bitmap: category bitmap * @bitmap_len: bitmap length in bytes * * Description: * Checks the given category bitmap against the given DOI definition and * returns a negative value if any of the categories in the bitmap do not have * a valid mapping and a zero value if all of the categories are valid. * */ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, const unsigned char *bitmap, u32 bitmap_len) { int cat = -1; u32 bitmap_len_bits = bitmap_len * 8; u32 cipso_cat_size; u32 *cipso_array; switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; case CIPSO_V4_MAP_TRANS: cipso_cat_size = doi_def->map.std->cat.cipso_size; cipso_array = doi_def->map.std->cat.cipso; for (;;) { cat = netlbl_bitmap_walk(bitmap, bitmap_len_bits, cat + 1, 1); if (cat < 0) break; if (cat >= cipso_cat_size || cipso_array[cat] >= CIPSO_V4_INV_CAT) return -EFAULT; } if (cat == -1) return 0; break; } return -EFAULT; } /** * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network * @doi_def: the DOI definition * @secattr: the security attributes * @net_cat: the zero'd out category bitmap in network/CIPSO format * @net_cat_len: the length of the CIPSO bitmap in bytes * * Description: * Perform a label mapping to translate a local MLS category bitmap to the * correct CIPSO bitmap using the given DOI definition. Returns the minimum * size in bytes of the network bitmap on success, negative values otherwise. * */ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *net_cat, u32 net_cat_len) { int host_spot = -1; u32 net_spot = CIPSO_V4_INV_CAT; u32 net_spot_max = 0; u32 net_clen_bits = net_cat_len * 8; u32 host_cat_size = 0; u32 *host_cat_array = NULL; if (doi_def->type == CIPSO_V4_MAP_TRANS) { host_cat_size = doi_def->map.std->cat.local_size; host_cat_array = doi_def->map.std->cat.local; } for (;;) { host_spot = netlbl_catmap_walk(secattr->attr.mls.cat, host_spot + 1); if (host_spot < 0) break; switch (doi_def->type) { case CIPSO_V4_MAP_PASS: net_spot = host_spot; break; case CIPSO_V4_MAP_TRANS: if (host_spot >= host_cat_size) return -EPERM; net_spot = host_cat_array[host_spot]; if (net_spot >= CIPSO_V4_INV_CAT) return -EPERM; break; } if (net_spot >= net_clen_bits) return -ENOSPC; netlbl_bitmap_setbit(net_cat, net_spot, 1); if (net_spot > net_spot_max) net_spot_max = net_spot; } if (++net_spot_max % 8) return net_spot_max / 8 + 1; return net_spot_max / 8; } /** * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host * @doi_def: the DOI definition * @net_cat: the category bitmap in network/CIPSO format * @net_cat_len: the length of the CIPSO bitmap in bytes * @secattr: the security attributes * * Description: * Perform a label mapping to translate a CIPSO bitmap to the correct local * MLS category bitmap using the given DOI definition. Returns zero on * success, negative values on failure. * */ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, const unsigned char *net_cat, u32 net_cat_len, struct netlbl_lsm_secattr *secattr) { int ret_val; int net_spot = -1; u32 host_spot = CIPSO_V4_INV_CAT; u32 net_clen_bits = net_cat_len * 8; u32 net_cat_size = 0; u32 *net_cat_array = NULL; if (doi_def->type == CIPSO_V4_MAP_TRANS) { net_cat_size = doi_def->map.std->cat.cipso_size; net_cat_array = doi_def->map.std->cat.cipso; } for (;;) { net_spot = netlbl_bitmap_walk(net_cat, net_clen_bits, net_spot + 1, 1); if (net_spot < 0) { if (net_spot == -2) return -EFAULT; return 0; } switch (doi_def->type) { case CIPSO_V4_MAP_PASS: host_spot = net_spot; break; case CIPSO_V4_MAP_TRANS: if (net_spot >= net_cat_size) return -EPERM; host_spot = net_cat_array[net_spot]; if (host_spot >= CIPSO_V4_INV_CAT) return -EPERM; break; } ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, host_spot, GFP_ATOMIC); if (ret_val != 0) return ret_val; } return -EINVAL; } /** * cipso_v4_map_cat_enum_valid - Checks to see if the categories are valid * @doi_def: the DOI definition * @enumcat: category list * @enumcat_len: length of the category list in bytes * * Description: * Checks the given categories against the given DOI definition and returns a * negative value if any of the categories do not have a valid mapping and a * zero value if all of the categories are valid. * */ static int cipso_v4_map_cat_enum_valid(const struct cipso_v4_doi *doi_def, const unsigned char *enumcat, u32 enumcat_len) { u16 cat; int cat_prev = -1; u32 iter; if (doi_def->type != CIPSO_V4_MAP_PASS || enumcat_len & 0x01) return -EFAULT; for (iter = 0; iter < enumcat_len; iter += 2) { cat = get_unaligned_be16(&enumcat[iter]); if (cat <= cat_prev) return -EFAULT; cat_prev = cat; } return 0; } /** * cipso_v4_map_cat_enum_hton - Perform a category mapping from host to network * @doi_def: the DOI definition * @secattr: the security attributes * @net_cat: the zero'd out category list in network/CIPSO format * @net_cat_len: the length of the CIPSO category list in bytes * * Description: * Perform a label mapping to translate a local MLS category bitmap to the * correct CIPSO category list using the given DOI definition. Returns the * size in bytes of the network category bitmap on success, negative values * otherwise. * */ static int cipso_v4_map_cat_enum_hton(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *net_cat, u32 net_cat_len) { int cat = -1; u32 cat_iter = 0; for (;;) { cat = netlbl_catmap_walk(secattr->attr.mls.cat, cat + 1); if (cat < 0) break; if ((cat_iter + 2) > net_cat_len) return -ENOSPC; *((__be16 *)&net_cat[cat_iter]) = htons(cat); cat_iter += 2; } return cat_iter; } /** * cipso_v4_map_cat_enum_ntoh - Perform a category mapping from network to host * @doi_def: the DOI definition * @net_cat: the category list in network/CIPSO format * @net_cat_len: the length of the CIPSO bitmap in bytes * @secattr: the security attributes * * Description: * Perform a label mapping to translate a CIPSO category list to the correct * local MLS category bitmap using the given DOI definition. Returns zero on * success, negative values on failure. * */ static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def, const unsigned char *net_cat, u32 net_cat_len, struct netlbl_lsm_secattr *secattr) { int ret_val; u32 iter; for (iter = 0; iter < net_cat_len; iter += 2) { ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, get_unaligned_be16(&net_cat[iter]), GFP_ATOMIC); if (ret_val != 0) return ret_val; } return 0; } /** * cipso_v4_map_cat_rng_valid - Checks to see if the categories are valid * @doi_def: the DOI definition * @rngcat: category list * @rngcat_len: length of the category list in bytes * * Description: * Checks the given categories against the given DOI definition and returns a * negative value if any of the categories do not have a valid mapping and a * zero value if all of the categories are valid. * */ static int cipso_v4_map_cat_rng_valid(const struct cipso_v4_doi *doi_def, const unsigned char *rngcat, u32 rngcat_len) { u16 cat_high; u16 cat_low; u32 cat_prev = CIPSO_V4_MAX_REM_CATS + 1; u32 iter; if (doi_def->type != CIPSO_V4_MAP_PASS || rngcat_len & 0x01) return -EFAULT; for (iter = 0; iter < rngcat_len; iter += 4) { cat_high = get_unaligned_be16(&rngcat[iter]); if ((iter + 4) <= rngcat_len) cat_low = get_unaligned_be16(&rngcat[iter + 2]); else cat_low = 0; if (cat_high > cat_prev) return -EFAULT; cat_prev = cat_low; } return 0; } /** * cipso_v4_map_cat_rng_hton - Perform a category mapping from host to network * @doi_def: the DOI definition * @secattr: the security attributes * @net_cat: the zero'd out category list in network/CIPSO format * @net_cat_len: the length of the CIPSO category list in bytes * * Description: * Perform a label mapping to translate a local MLS category bitmap to the * correct CIPSO category list using the given DOI definition. Returns the * size in bytes of the network category bitmap on success, negative values * otherwise. * */ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *net_cat, u32 net_cat_len) { int iter = -1; u16 array[CIPSO_V4_TAG_RNG_CAT_MAX * 2]; u32 array_cnt = 0; u32 cat_size = 0; /* make sure we don't overflow the 'array[]' variable */ if (net_cat_len > (CIPSO_V4_OPT_LEN_MAX - CIPSO_V4_HDR_LEN - CIPSO_V4_TAG_RNG_BLEN)) return -ENOSPC; for (;;) { iter = netlbl_catmap_walk(secattr->attr.mls.cat, iter + 1); if (iter < 0) break; cat_size += (iter == 0 ? 0 : sizeof(u16)); if (cat_size > net_cat_len) return -ENOSPC; array[array_cnt++] = iter; iter = netlbl_catmap_walkrng(secattr->attr.mls.cat, iter); if (iter < 0) return -EFAULT; cat_size += sizeof(u16); if (cat_size > net_cat_len) return -ENOSPC; array[array_cnt++] = iter; } for (iter = 0; array_cnt > 0;) { *((__be16 *)&net_cat[iter]) = htons(array[--array_cnt]); iter += 2; array_cnt--; if (array[array_cnt] != 0) { *((__be16 *)&net_cat[iter]) = htons(array[array_cnt]); iter += 2; } } return cat_size; } /** * cipso_v4_map_cat_rng_ntoh - Perform a category mapping from network to host * @doi_def: the DOI definition * @net_cat: the category list in network/CIPSO format * @net_cat_len: the length of the CIPSO bitmap in bytes * @secattr: the security attributes * * Description: * Perform a label mapping to translate a CIPSO category list to the correct * local MLS category bitmap using the given DOI definition. Returns zero on * success, negative values on failure. * */ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def, const unsigned char *net_cat, u32 net_cat_len, struct netlbl_lsm_secattr *secattr) { int ret_val; u32 net_iter; u16 cat_low; u16 cat_high; for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) { cat_high = get_unaligned_be16(&net_cat[net_iter]); if ((net_iter + 4) <= net_cat_len) cat_low = get_unaligned_be16(&net_cat[net_iter + 2]); else cat_low = 0; ret_val = netlbl_catmap_setrng(&secattr->attr.mls.cat, cat_low, cat_high, GFP_ATOMIC); if (ret_val != 0) return ret_val; } return 0; } /* * Protocol Handling Functions */ /** * cipso_v4_gentag_hdr - Generate a CIPSO option header * @doi_def: the DOI definition * @len: the total tag length in bytes, not including this header * @buf: the CIPSO option buffer * * Description: * Write a CIPSO header into the beginning of @buffer. * */ static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, unsigned char *buf, u32 len) { buf[0] = IPOPT_CIPSO; buf[1] = CIPSO_V4_HDR_LEN + len; *(__be32 *)&buf[2] = htonl(doi_def->doi); } /** * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) * @doi_def: the DOI definition * @secattr: the security attributes * @buffer: the option buffer * @buffer_len: length of buffer in bytes * * Description: * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The * actual buffer length may be larger than the indicated size due to * translation between host and network category bitmaps. Returns the size of * the tag on success, negative values on failure. * */ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *buffer, u32 buffer_len) { int ret_val; u32 tag_len; u32 level; if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0) return -EPERM; ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->attr.mls.lvl, &level); if (ret_val != 0) return ret_val; if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { ret_val = cipso_v4_map_cat_rbm_hton(doi_def, secattr, &buffer[4], buffer_len - 4); if (ret_val < 0) return ret_val; /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; } else tag_len = 4; buffer[0] = CIPSO_V4_TAG_RBITMAP; buffer[1] = tag_len; buffer[3] = level; return tag_len; } /** * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag * @doi_def: the DOI definition * @tag: the CIPSO tag * @secattr: the security attributes * * Description: * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security * attributes in @secattr. Return zero on success, negatives values on * failure. * */ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, const unsigned char *tag, struct netlbl_lsm_secattr *secattr) { int ret_val; u8 tag_len = tag[1]; u32 level; ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); if (ret_val != 0) return ret_val; secattr->attr.mls.lvl = level; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, &tag[4], tag_len - 4, secattr); if (ret_val != 0) { netlbl_catmap_free(secattr->attr.mls.cat); return ret_val; } if (secattr->attr.mls.cat) secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; } /** * cipso_v4_gentag_enum - Generate a CIPSO enumerated tag (type #2) * @doi_def: the DOI definition * @secattr: the security attributes * @buffer: the option buffer * @buffer_len: length of buffer in bytes * * Description: * Generate a CIPSO option using the enumerated tag, tag type #2. Returns the * size of the tag on success, negative values on failure. * */ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *buffer, u32 buffer_len) { int ret_val; u32 tag_len; u32 level; if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) return -EPERM; ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->attr.mls.lvl, &level); if (ret_val != 0) return ret_val; if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { ret_val = cipso_v4_map_cat_enum_hton(doi_def, secattr, &buffer[4], buffer_len - 4); if (ret_val < 0) return ret_val; tag_len = 4 + ret_val; } else tag_len = 4; buffer[0] = CIPSO_V4_TAG_ENUM; buffer[1] = tag_len; buffer[3] = level; return tag_len; } /** * cipso_v4_parsetag_enum - Parse a CIPSO enumerated tag * @doi_def: the DOI definition * @tag: the CIPSO tag * @secattr: the security attributes * * Description: * Parse a CIPSO enumerated tag (tag type #2) and return the security * attributes in @secattr. Return zero on success, negatives values on * failure. * */ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, const unsigned char *tag, struct netlbl_lsm_secattr *secattr) { int ret_val; u8 tag_len = tag[1]; u32 level; ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); if (ret_val != 0) return ret_val; secattr->attr.mls.lvl = level; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { ret_val = cipso_v4_map_cat_enum_ntoh(doi_def, &tag[4], tag_len - 4, secattr); if (ret_val != 0) { netlbl_catmap_free(secattr->attr.mls.cat); return ret_val; } secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; } /** * cipso_v4_gentag_rng - Generate a CIPSO ranged tag (type #5) * @doi_def: the DOI definition * @secattr: the security attributes * @buffer: the option buffer * @buffer_len: length of buffer in bytes * * Description: * Generate a CIPSO option using the ranged tag, tag type #5. Returns the * size of the tag on success, negative values on failure. * */ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *buffer, u32 buffer_len) { int ret_val; u32 tag_len; u32 level; if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) return -EPERM; ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->attr.mls.lvl, &level); if (ret_val != 0) return ret_val; if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { ret_val = cipso_v4_map_cat_rng_hton(doi_def, secattr, &buffer[4], buffer_len - 4); if (ret_val < 0) return ret_val; tag_len = 4 + ret_val; } else tag_len = 4; buffer[0] = CIPSO_V4_TAG_RANGE; buffer[1] = tag_len; buffer[3] = level; return tag_len; } /** * cipso_v4_parsetag_rng - Parse a CIPSO ranged tag * @doi_def: the DOI definition * @tag: the CIPSO tag * @secattr: the security attributes * * Description: * Parse a CIPSO ranged tag (tag type #5) and return the security attributes * in @secattr. Return zero on success, negatives values on failure. * */ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, const unsigned char *tag, struct netlbl_lsm_secattr *secattr) { int ret_val; u8 tag_len = tag[1]; u32 level; ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); if (ret_val != 0) return ret_val; secattr->attr.mls.lvl = level; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { ret_val = cipso_v4_map_cat_rng_ntoh(doi_def, &tag[4], tag_len - 4, secattr); if (ret_val != 0) { netlbl_catmap_free(secattr->attr.mls.cat); return ret_val; } if (secattr->attr.mls.cat) secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; } /** * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard) * @doi_def: the DOI definition * @secattr: the security attributes * @buffer: the option buffer * @buffer_len: length of buffer in bytes * * Description: * Generate a CIPSO option using the local tag. Returns the size of the tag * on success, negative values on failure. * */ static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *buffer, u32 buffer_len) { if (!(secattr->flags & NETLBL_SECATTR_SECID)) return -EPERM; buffer[0] = CIPSO_V4_TAG_LOCAL; buffer[1] = CIPSO_V4_TAG_LOC_BLEN; *(u32 *)&buffer[2] = secattr->attr.secid; return CIPSO_V4_TAG_LOC_BLEN; } /** * cipso_v4_parsetag_loc - Parse a CIPSO local tag * @doi_def: the DOI definition * @tag: the CIPSO tag * @secattr: the security attributes * * Description: * Parse a CIPSO local tag and return the security attributes in @secattr. * Return zero on success, negatives values on failure. * */ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, const unsigned char *tag, struct netlbl_lsm_secattr *secattr) { secattr->attr.secid = *(u32 *)&tag[2]; secattr->flags |= NETLBL_SECATTR_SECID; return 0; } /** * cipso_v4_optptr - Find the CIPSO option in the packet * @skb: the packet * * Description: * Parse the packet's IP header looking for a CIPSO option. Returns a pointer * to the start of the CIPSO option on success, NULL if one is not found. * */ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); unsigned char *optptr = (unsigned char *)&(ip_hdr(skb)[1]); int optlen; int taglen; for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 1; ) { switch (optptr[0]) { case IPOPT_END: return NULL; case IPOPT_NOOP: taglen = 1; break; default: taglen = optptr[1]; } if (!taglen || taglen > optlen) return NULL; if (optptr[0] == IPOPT_CIPSO) return optptr; optlen -= taglen; optptr += taglen; } return NULL; } /** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error * * Description: * This routine is called to validate a CIPSO option, it checks all of the * fields to ensure that they are at least valid, see the draft snippet below * for details. If the option is valid then a zero value is returned and * the value of @option is unchanged. If the option is invalid then a * non-zero value is returned and @option is adjusted to point to the * offending portion of the option. From the IETF draft ... * * "If any field within the CIPSO options, such as the DOI identifier, is not * recognized the IP datagram is discarded and an ICMP 'parameter problem' * (type 12) is generated and returned. The ICMP code field is set to 'bad * parameter' (code 0) and the pointer is set to the start of the CIPSO field * that is unrecognized." * */ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) { unsigned char *opt = *option; unsigned char *tag; unsigned char opt_iter; unsigned char err_offset = 0; u8 opt_len; u8 tag_len; struct cipso_v4_doi *doi_def = NULL; u32 tag_iter; /* caller already checks for length values that are too large */ opt_len = opt[1]; if (opt_len < 8) { err_offset = 1; goto validate_return; } rcu_read_lock(); doi_def = cipso_v4_doi_search(get_unaligned_be32(&opt[2])); if (!doi_def) { err_offset = 2; goto validate_return_locked; } opt_iter = CIPSO_V4_HDR_LEN; tag = opt + opt_iter; while (opt_iter < opt_len) { for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID || ++tag_iter == CIPSO_V4_TAG_MAXCNT) { err_offset = opt_iter; goto validate_return_locked; } if (opt_iter + 1 == opt_len) { err_offset = opt_iter; goto validate_return_locked; } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; goto validate_return_locked; } switch (tag[0]) { case CIPSO_V4_TAG_RBITMAP: if (tag_len < CIPSO_V4_TAG_RBM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } /* We are already going to do all the verification * necessary at the socket layer so from our point of * view it is safe to turn these checks off (and less * work), however, the CIPSO draft says we should do * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; goto validate_return_locked; } if (tag_len > CIPSO_V4_TAG_RBM_BLEN && cipso_v4_map_cat_rbm_valid(doi_def, &tag[4], tag_len - 4) < 0) { err_offset = opt_iter + 4; goto validate_return_locked; } } break; case CIPSO_V4_TAG_ENUM: if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; goto validate_return_locked; } if (tag_len > CIPSO_V4_TAG_ENUM_BLEN && cipso_v4_map_cat_enum_valid(doi_def, &tag[4], tag_len - 4) < 0) { err_offset = opt_iter + 4; goto validate_return_locked; } break; case CIPSO_V4_TAG_RANGE: if (tag_len < CIPSO_V4_TAG_RNG_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; goto validate_return_locked; } if (tag_len > CIPSO_V4_TAG_RNG_BLEN && cipso_v4_map_cat_rng_valid(doi_def, &tag[4], tag_len - 4) < 0) { err_offset = opt_iter + 4; goto validate_return_locked; } break; case CIPSO_V4_TAG_LOCAL: /* This is a non-standard tag that we only allow for * local connections, so if the incoming interface is * not the loopback device drop the packet. Further, * there is no legitimate reason for setting this from * userspace so reject it if skb is NULL. */ if (!skb || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } if (tag_len != CIPSO_V4_TAG_LOC_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } break; default: err_offset = opt_iter; goto validate_return_locked; } tag += tag_len; opt_iter += tag_len; } validate_return_locked: rcu_read_unlock(); validate_return: *option = opt + err_offset; return err_offset; } /** * cipso_v4_error - Send the correct response for a bad packet * @skb: the packet * @error: the error code * @gateway: CIPSO gateway flag * * Description: * Based on the error code given in @error, send an ICMP error message back to * the originating host. From the IETF draft ... * * "If the contents of the CIPSO [option] are valid but the security label is * outside of the configured host or port label range, the datagram is * discarded and an ICMP 'destination unreachable' (type 3) is generated and * returned. The code field of the ICMP is set to 'communication with * destination network administratively prohibited' (code 9) or to * 'communication with destination host administratively prohibited' * (code 10). The value of the code is dependent on whether the originator * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The * recipient of the ICMP message MUST be able to handle either value. The * same procedure is performed if a CIPSO [option] can not be added to an * IP packet because it is too large to fit in the IP options area." * * "If the error is triggered by receipt of an ICMP message, the message is * discarded and no response is permitted (consistent with general ICMP * processing rules)." * */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; /* * We might be called above the IP layer, * so we can not use icmp_send and IPCB here. */ memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); rcu_read_lock(); res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); rcu_read_unlock(); if (res) return; if (gateway) __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); else __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); } /** * cipso_v4_genopt - Generate a CIPSO option * @buf: the option buffer * @buf_len: the size of opt_buf * @doi_def: the CIPSO DOI to use * @secattr: the security attributes * * Description: * Generate a CIPSO option using the DOI definition and security attributes * passed to the function. Returns the length of the option on success and * negative values on failure. * */ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; u32 iter; if (buf_len <= CIPSO_V4_HDR_LEN) return -ENOSPC; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC * tags right now it is a safe assumption. */ iter = 0; do { memset(buf, 0, buf_len); switch (doi_def->tags[iter]) { case CIPSO_V4_TAG_RBITMAP: ret_val = cipso_v4_gentag_rbm(doi_def, secattr, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; case CIPSO_V4_TAG_ENUM: ret_val = cipso_v4_gentag_enum(doi_def, secattr, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; case CIPSO_V4_TAG_RANGE: ret_val = cipso_v4_gentag_rng(doi_def, secattr, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; case CIPSO_V4_TAG_LOCAL: ret_val = cipso_v4_gentag_loc(doi_def, secattr, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; default: return -EPERM; } iter++; } while (ret_val < 0 && iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); if (ret_val < 0) return ret_val; cipso_v4_gentag_hdr(doi_def, buf, ret_val); return CIPSO_V4_HDR_LEN + ret_val; } /** * cipso_v4_sock_setattr - Add a CIPSO option to a socket * @sk: the socket * @doi_def: the CIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Set the CIPSO option on the given socket using the DOI definition and * security attributes passed to the function. This function requires * exclusive access to @sk, which means it either needs to be in the * process of being created or locked. Returns zero on success and negative * values on failure. * */ int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; unsigned char *buf = NULL; u32 buf_len; u32 opt_len; struct ip_options_rcu *old, *opt = NULL; struct inet_sock *sk_inet; struct inet_connection_sock *sk_conn; /* In the case of sock_create_lite(), the sock->sk field is not * defined yet but it is not a problem as the only users of these * "lite" PF_INET sockets are functions which do an accept() call * afterwards so we will label the socket as part of the accept(). */ if (!sk) return 0; /* We allocate the maximum CIPSO option size here so we are probably * being a little wasteful, but it makes our life _much_ easier later * on and after all we are only talking about 40 bytes. */ buf_len = CIPSO_V4_OPT_LEN_MAX; buf = kmalloc(buf_len, GFP_ATOMIC); if (!buf) { ret_val = -ENOMEM; goto socket_setattr_failure; } ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); if (ret_val < 0) goto socket_setattr_failure; buf_len = ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and * we won't always have CAP_NET_RAW even though we _always_ want to * set the IPOPT_CIPSO option. */ opt_len = (buf_len + 3) & ~3; opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); if (!opt) { ret_val = -ENOMEM; goto socket_setattr_failure; } memcpy(opt->opt.__data, buf, buf_len); opt->opt.optlen = opt_len; opt->opt.cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; sk_inet = inet_sk(sk); old = rcu_dereference_protected(sk_inet->inet_opt, lockdep_sock_is_held(sk)); if (sk_inet->is_icsk) { sk_conn = inet_csk(sk); if (old) sk_conn->icsk_ext_hdr_len -= old->opt.optlen; sk_conn->icsk_ext_hdr_len += opt->opt.optlen; sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); } rcu_assign_pointer(sk_inet->inet_opt, opt); if (old) kfree_rcu(old, rcu); return 0; socket_setattr_failure: kfree(buf); kfree(opt); return ret_val; } /** * cipso_v4_req_setattr - Add a CIPSO option to a connection request socket * @req: the connection request socket * @doi_def: the CIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Set the CIPSO option on the given socket using the DOI definition and * security attributes passed to the function. Returns zero on success and * negative values on failure. * */ int cipso_v4_req_setattr(struct request_sock *req, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; unsigned char *buf = NULL; u32 buf_len; u32 opt_len; struct ip_options_rcu *opt = NULL; struct inet_request_sock *req_inet; /* We allocate the maximum CIPSO option size here so we are probably * being a little wasteful, but it makes our life _much_ easier later * on and after all we are only talking about 40 bytes. */ buf_len = CIPSO_V4_OPT_LEN_MAX; buf = kmalloc(buf_len, GFP_ATOMIC); if (!buf) { ret_val = -ENOMEM; goto req_setattr_failure; } ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); if (ret_val < 0) goto req_setattr_failure; buf_len = ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and * we won't always have CAP_NET_RAW even though we _always_ want to * set the IPOPT_CIPSO option. */ opt_len = (buf_len + 3) & ~3; opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); if (!opt) { ret_val = -ENOMEM; goto req_setattr_failure; } memcpy(opt->opt.__data, buf, buf_len); opt->opt.optlen = opt_len; opt->opt.cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; req_inet = inet_rsk(req); opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); if (opt) kfree_rcu(opt, rcu); return 0; req_setattr_failure: kfree(buf); kfree(opt); return ret_val; } /** * cipso_v4_delopt - Delete the CIPSO option from a set of IP options * @opt_ptr: IP option pointer * * Description: * Deletes the CIPSO IP option from a set of IP options and makes the necessary * adjustments to the IP option structure. Returns zero on success, negative * values on failure. * */ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) { struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); int hdr_delta = 0; if (!opt || opt->opt.cipso == 0) return 0; if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { u8 cipso_len; u8 cipso_off; unsigned char *cipso_ptr; int iter; int optlen_new; cipso_off = opt->opt.cipso - sizeof(struct iphdr); cipso_ptr = &opt->opt.__data[cipso_off]; cipso_len = cipso_ptr[1]; if (opt->opt.srr > opt->opt.cipso) opt->opt.srr -= cipso_len; if (opt->opt.rr > opt->opt.cipso) opt->opt.rr -= cipso_len; if (opt->opt.ts > opt->opt.cipso) opt->opt.ts -= cipso_len; if (opt->opt.router_alert > opt->opt.cipso) opt->opt.router_alert -= cipso_len; opt->opt.cipso = 0; memmove(cipso_ptr, cipso_ptr + cipso_len, opt->opt.optlen - cipso_off - cipso_len); /* determining the new total option length is tricky because of * the padding necessary, the only thing i can think to do at * this point is walk the options one-by-one, skipping the * padding at the end to determine the actual option size and * from there we can determine the new total option length */ iter = 0; optlen_new = 0; while (iter < opt->opt.optlen) if (opt->opt.__data[iter] != IPOPT_NOP) { iter += opt->opt.__data[iter + 1]; optlen_new = iter; } else iter++; hdr_delta = opt->opt.optlen; opt->opt.optlen = (optlen_new + 3) & ~3; hdr_delta -= opt->opt.optlen; } else { /* only the cipso option was present on the socket so we can * remove the entire option struct */ *opt_ptr = NULL; hdr_delta = opt->opt.optlen; kfree_rcu(opt, rcu); } return hdr_delta; } /** * cipso_v4_sock_delattr - Delete the CIPSO option from a socket * @sk: the socket * * Description: * Removes the CIPSO option from a socket, if present. * */ void cipso_v4_sock_delattr(struct sock *sk) { struct inet_sock *sk_inet; int hdr_delta; sk_inet = inet_sk(sk); hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); if (sk_inet->is_icsk && hdr_delta > 0) { struct inet_connection_sock *sk_conn = inet_csk(sk); sk_conn->icsk_ext_hdr_len -= hdr_delta; sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); } } /** * cipso_v4_req_delattr - Delete the CIPSO option from a request socket * @reg: the request socket * * Description: * Removes the CIPSO option from a request socket, if present. * */ void cipso_v4_req_delattr(struct request_sock *req) { cipso_v4_delopt(&inet_rsk(req)->ireq_opt); } /** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option * @secattr: the security attributes * * Description: * Inspect @cipso and return the security attributes in @secattr. Returns zero * on success and negative values on failure. * */ int cipso_v4_getattr(const unsigned char *cipso, struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; u32 doi; struct cipso_v4_doi *doi_def; if (cipso_v4_cache_check(cipso, cipso[1], secattr) == 0) return 0; doi = get_unaligned_be32(&cipso[2]); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); if (!doi_def) goto getattr_return; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC * tags right now it is a safe assumption. */ switch (cipso[6]) { case CIPSO_V4_TAG_RBITMAP: ret_val = cipso_v4_parsetag_rbm(doi_def, &cipso[6], secattr); break; case CIPSO_V4_TAG_ENUM: ret_val = cipso_v4_parsetag_enum(doi_def, &cipso[6], secattr); break; case CIPSO_V4_TAG_RANGE: ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; case CIPSO_V4_TAG_LOCAL: ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr); break; } if (ret_val == 0) secattr->type = NETLBL_NLTYPE_CIPSOV4; getattr_return: rcu_read_unlock(); return ret_val; } /** * cipso_v4_sock_getattr - Get the security attributes from a sock * @sk: the sock * @secattr: the security attributes * * Description: * Query @sk to see if there is a CIPSO option attached to the sock and if * there is return the CIPSO security attributes in @secattr. This function * requires that @sk be locked, or privately held, but it does not do any * locking itself. Returns zero on success and negative values on failure. * */ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { struct ip_options_rcu *opt; int res = -ENOMSG; rcu_read_lock(); opt = rcu_dereference(inet_sk(sk)->inet_opt); if (opt && opt->opt.cipso) res = cipso_v4_getattr(opt->opt.__data + opt->opt.cipso - sizeof(struct iphdr), secattr); rcu_read_unlock(); return res; } /** * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet * @skb: the packet * @secattr: the security attributes * * Description: * Set the CIPSO option on the given packet based on the security attributes. * Returns a pointer to the IP header on success and NULL on failure. * */ int cipso_v4_skbuff_setattr(struct sk_buff *skb, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; struct iphdr *iph; struct ip_options *opt = &IPCB(skb)->opt; unsigned char buf[CIPSO_V4_OPT_LEN_MAX]; u32 buf_len = CIPSO_V4_OPT_LEN_MAX; u32 opt_len; int len_delta; ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); if (ret_val < 0) return ret_val; buf_len = ret_val; opt_len = (buf_len + 3) & ~3; /* we overwrite any existing options to ensure that we have enough * room for the CIPSO option, the reason is that we _need_ to guarantee * that the security label is applied to the packet - we do the same * thing when using the socket options and it hasn't caused a problem, * if we need to we can always revisit this choice later */ len_delta = opt_len - opt->optlen; /* if we don't ensure enough headroom we could panic on the skb_push() * call below so make sure we have enough, we are also "mangling" the * packet so we should probably do a copy-on-write call anyway */ ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); if (ret_val < 0) return ret_val; if (len_delta > 0) { /* we assume that the header + opt->optlen have already been * "pushed" in ip_options_build() or similar */ iph = ip_hdr(skb); skb_push(skb, len_delta); memmove((char *)iph - len_delta, iph, iph->ihl << 2); skb_reset_network_header(skb); iph = ip_hdr(skb); } else if (len_delta < 0) { iph = ip_hdr(skb); memset(iph + 1, IPOPT_NOP, opt->optlen); } else iph = ip_hdr(skb); if (opt->optlen > 0) memset(opt, 0, sizeof(*opt)); opt->optlen = opt_len; opt->cipso = sizeof(struct iphdr); opt->is_changed = 1; /* we have to do the following because we are being called from a * netfilter hook which means the packet already has had the header * fields populated and the checksum calculated - yes this means we * are doing more work than needed but we do it to keep the core * stack clean and tidy */ memcpy(iph + 1, buf, buf_len); if (opt_len > buf_len) memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len); if (len_delta != 0) { iph->ihl = 5 + (opt_len >> 2); iph->tot_len = htons(skb->len); } ip_send_check(iph); return 0; } /** * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet * @skb: the packet * * Description: * Removes any and all CIPSO options from the given packet. Returns zero on * success, negative values on failure. * */ int cipso_v4_skbuff_delattr(struct sk_buff *skb) { int ret_val; struct iphdr *iph; struct ip_options *opt = &IPCB(skb)->opt; unsigned char *cipso_ptr; if (opt->cipso == 0) return 0; /* since we are changing the packet we should make a copy */ ret_val = skb_cow(skb, skb_headroom(skb)); if (ret_val < 0) return ret_val; /* the easiest thing to do is just replace the cipso option with noop * options since we don't change the size of the packet, although we * still need to recalculate the checksum */ iph = ip_hdr(skb); cipso_ptr = (unsigned char *)iph + opt->cipso; memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); opt->cipso = 0; opt->is_changed = 1; ip_send_check(iph); return 0; } /* * Setup Functions */ /** * cipso_v4_init - Initialize the CIPSO module * * Description: * Initialize the CIPSO module and prepare it for use. Returns zero on success * and negative values on failure. * */ static int __init cipso_v4_init(void) { int ret_val; ret_val = cipso_v4_cache_init(); if (ret_val != 0) panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n", ret_val); return 0; } subsys_initcall(cipso_v4_init);
15 15 14 12 11 11 11 3 8 8 12 15 7 7 6 4 3 3 5 7 2 1 2 12 12 6 6 5 5 3 2 5 5 4 4 3 2 11 12 6 6 5 3 2 1 1 1 1 2 6 1 1 1 5 5 3 5 1 15 15 14 14 14 14 14 14 14 15 6 5 2 1 2 2 2 2 2 7 7 5 4 4 3 4 7 12 12 9 1 9 2 2 12 5 4 4 1 1 1 1 5 5 5 11 11 11 5 12 11 3 3 8 11 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 /* * Copyright (C) 2011 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "llcp: %s: " fmt, __func__ #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/nfc.h> #include <linux/sched/signal.h> #include "nfc.h" #include "llcp.h" static int sock_wait_state(struct sock *sk, int state, unsigned long timeo) { DECLARE_WAITQUEUE(wait, current); int err = 0; pr_debug("sk %p", sk); add_wait_queue(sk_sleep(sk), &wait); set_current_state(TASK_INTERRUPTIBLE); while (sk->sk_state != state) { if (!timeo) { err = -EINPROGRESS; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } static struct proto llcp_sock_proto = { .name = "NFC_LLCP", .owner = THIS_MODULE, .obj_size = sizeof(struct nfc_llcp_sock), }; static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); struct nfc_llcp_local *local; struct nfc_dev *dev; struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; if (!addr || alen < offsetofend(struct sockaddr, sa_family) || addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); memset(&llcp_addr, 0, sizeof(llcp_addr)); len = min_t(unsigned int, sizeof(llcp_addr), alen); memcpy(&llcp_addr, addr, len); /* This is going to be a listening socket, dsap must be 0 */ if (llcp_addr.dsap != 0) return -EINVAL; lock_sock(sk); if (sk->sk_state != LLCP_CLOSED) { ret = -EBADFD; goto error; } dev = nfc_get_device(llcp_addr.dev_idx); if (dev == NULL) { ret = -ENODEV; goto error; } local = nfc_llcp_find_local(dev); if (local == NULL) { ret = -ENODEV; goto put_dev; } llcp_sock->dev = dev; llcp_sock->local = nfc_llcp_local_get(local); llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; llcp_sock->service_name_len = min_t(unsigned int, llcp_addr.service_name_len, NFC_LLCP_MAX_SERVICE_NAME); llcp_sock->service_name = kmemdup(llcp_addr.service_name, llcp_sock->service_name_len, GFP_KERNEL); if (!llcp_sock->service_name) { nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; llcp_sock->dev = NULL; ret = -ENOMEM; goto put_dev; } llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock); if (llcp_sock->ssap == LLCP_SAP_MAX) { nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; kfree(llcp_sock->service_name); llcp_sock->service_name = NULL; llcp_sock->dev = NULL; ret = -EADDRINUSE; goto put_dev; } llcp_sock->reserved_ssap = llcp_sock->ssap; nfc_llcp_sock_link(&local->sockets, sk); pr_debug("Socket bound to SAP %d\n", llcp_sock->ssap); sk->sk_state = LLCP_BOUND; put_dev: nfc_put_device(dev); error: release_sock(sk); return ret; } static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); struct nfc_llcp_local *local; struct nfc_dev *dev; struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; if (!addr || alen < offsetofend(struct sockaddr, sa_family) || addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); memset(&llcp_addr, 0, sizeof(llcp_addr)); len = min_t(unsigned int, sizeof(llcp_addr), alen); memcpy(&llcp_addr, addr, len); lock_sock(sk); if (sk->sk_state != LLCP_CLOSED) { ret = -EBADFD; goto error; } dev = nfc_get_device(llcp_addr.dev_idx); if (dev == NULL) { ret = -ENODEV; goto error; } local = nfc_llcp_find_local(dev); if (local == NULL) { ret = -ENODEV; goto put_dev; } llcp_sock->dev = dev; llcp_sock->local = nfc_llcp_local_get(local); llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; nfc_llcp_sock_link(&local->raw_sockets, sk); sk->sk_state = LLCP_BOUND; put_dev: nfc_put_device(dev); error: release_sock(sk); return ret; } static int llcp_sock_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int ret = 0; pr_debug("sk %p backlog %d\n", sk, backlog); lock_sock(sk); if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) || sk->sk_state != LLCP_BOUND) { ret = -EBADFD; goto error; } sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; pr_debug("Socket listening\n"); sk->sk_state = LLCP_LISTEN; error: release_sock(sk); return ret; } static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); u32 opt; int err = 0; pr_debug("%p optname %d\n", sk, optname); if (level != SOL_NFC) return -ENOPROTOOPT; lock_sock(sk); switch (optname) { case NFC_LLCP_RW: if (sk->sk_state == LLCP_CONNECTED || sk->sk_state == LLCP_BOUND || sk->sk_state == LLCP_LISTEN) { err = -EINVAL; break; } if (get_user(opt, (u32 __user *) optval)) { err = -EFAULT; break; } if (opt > LLCP_MAX_RW) { err = -EINVAL; break; } llcp_sock->rw = (u8) opt; break; case NFC_LLCP_MIUX: if (sk->sk_state == LLCP_CONNECTED || sk->sk_state == LLCP_BOUND || sk->sk_state == LLCP_LISTEN) { err = -EINVAL; break; } if (get_user(opt, (u32 __user *) optval)) { err = -EFAULT; break; } if (opt > LLCP_MAX_MIUX) { err = -EINVAL; break; } llcp_sock->miux = cpu_to_be16((u16) opt); break; default: err = -ENOPROTOOPT; break; } release_sock(sk); pr_debug("%p rw %d miux %d\n", llcp_sock, llcp_sock->rw, llcp_sock->miux); return err; } static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct nfc_llcp_local *local; struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); int len, err = 0; u16 miux, remote_miu; u8 rw; pr_debug("%p optname %d\n", sk, optname); if (level != SOL_NFC) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; local = llcp_sock->local; if (!local) return -ENODEV; len = min_t(u32, len, sizeof(u32)); lock_sock(sk); switch (optname) { case NFC_LLCP_RW: rw = llcp_sock->rw > LLCP_MAX_RW ? local->rw : llcp_sock->rw; if (put_user(rw, (u32 __user *) optval)) err = -EFAULT; break; case NFC_LLCP_MIUX: miux = be16_to_cpu(llcp_sock->miux) > LLCP_MAX_MIUX ? be16_to_cpu(local->miux) : be16_to_cpu(llcp_sock->miux); if (put_user(miux, (u32 __user *) optval)) err = -EFAULT; break; case NFC_LLCP_REMOTE_MIU: remote_miu = llcp_sock->remote_miu > LLCP_MAX_MIU ? local->remote_miu : llcp_sock->remote_miu; if (put_user(remote_miu, (u32 __user *) optval)) err = -EFAULT; break; case NFC_LLCP_REMOTE_LTO: if (put_user(local->remote_lto / 10, (u32 __user *) optval)) err = -EFAULT; break; case NFC_LLCP_REMOTE_RW: if (put_user(llcp_sock->remote_rw, (u32 __user *) optval)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); if (put_user(len, optlen)) return -EFAULT; return err; } void nfc_llcp_accept_unlink(struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); pr_debug("state %d\n", sk->sk_state); list_del_init(&llcp_sock->accept_queue); sk_acceptq_removed(llcp_sock->parent); llcp_sock->parent = NULL; sock_put(sk); } void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); struct nfc_llcp_sock *llcp_sock_parent = nfc_llcp_sock(parent); /* Lock will be free from unlink */ sock_hold(sk); list_add_tail(&llcp_sock->accept_queue, &llcp_sock_parent->accept_queue); llcp_sock->parent = parent; sk_acceptq_added(parent); } struct sock *nfc_llcp_accept_dequeue(struct sock *parent, struct socket *newsock) { struct nfc_llcp_sock *lsk, *n, *llcp_parent; struct sock *sk; llcp_parent = nfc_llcp_sock(parent); list_for_each_entry_safe(lsk, n, &llcp_parent->accept_queue, accept_queue) { sk = &lsk->sk; lock_sock(sk); if (sk->sk_state == LLCP_CLOSED) { release_sock(sk); nfc_llcp_accept_unlink(sk); continue; } if (sk->sk_state == LLCP_CONNECTED || !newsock) { list_del_init(&lsk->accept_queue); sock_put(sk); if (newsock) sock_graft(sk, newsock); release_sock(sk); pr_debug("Returning sk state %d\n", sk->sk_state); sk_acceptq_removed(parent); return sk; } release_sock(sk); } return NULL; } static int llcp_sock_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *new_sk; long timeo; int ret = 0; pr_debug("parent %p\n", sk); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_state != LLCP_LISTEN) { ret = -EBADFD; goto error; } timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* Wait for an incoming connection. */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(new_sk = nfc_llcp_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (ret) goto error; newsock->state = SS_CONNECTED; pr_debug("new socket %p\n", new_sk); error: release_sock(sk); return ret; } static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, llcp_addr, uaddr); if (llcp_sock == NULL || llcp_sock->dev == NULL) return -EBADFD; pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx, llcp_sock->dsap, llcp_sock->ssap); memset(llcp_addr, 0, sizeof(*llcp_addr)); lock_sock(sk); if (!llcp_sock->dev) { release_sock(sk); return -EBADFD; } llcp_addr->sa_family = AF_NFC; llcp_addr->dev_idx = llcp_sock->dev->idx; llcp_addr->target_idx = llcp_sock->target_idx; llcp_addr->nfc_protocol = llcp_sock->nfc_protocol; llcp_addr->dsap = llcp_sock->dsap; llcp_addr->ssap = llcp_sock->ssap; llcp_addr->service_name_len = llcp_sock->service_name_len; memcpy(llcp_addr->service_name, llcp_sock->service_name, llcp_addr->service_name_len); release_sock(sk); return sizeof(struct sockaddr_nfc_llcp); } static inline __poll_t llcp_accept_poll(struct sock *parent) { struct nfc_llcp_sock *llcp_sock, *parent_sock; struct sock *sk; parent_sock = nfc_llcp_sock(parent); list_for_each_entry(llcp_sock, &parent_sock->accept_queue, accept_queue) { sk = &llcp_sock->sk; if (sk->sk_state == LLCP_CONNECTED) return EPOLLIN | EPOLLRDNORM; } return 0; } static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; pr_debug("%p\n", sk); sock_poll_wait(file, sock, wait); if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == LLCP_CLOSED) mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); pr_debug("mask 0x%x\n", mask); return mask; } static int llcp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct nfc_llcp_local *local; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); int err = 0; if (!sk) return 0; pr_debug("%p\n", sk); local = llcp_sock->local; if (local == NULL) { err = -ENODEV; goto out; } lock_sock(sk); /* Send a DISC */ if (sk->sk_state == LLCP_CONNECTED) nfc_llcp_send_disconnect(llcp_sock); if (sk->sk_state == LLCP_LISTEN) { struct nfc_llcp_sock *lsk, *n; struct sock *accept_sk; list_for_each_entry_safe(lsk, n, &llcp_sock->accept_queue, accept_queue) { accept_sk = &lsk->sk; lock_sock(accept_sk); nfc_llcp_send_disconnect(lsk); nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); } } if (llcp_sock->reserved_ssap < LLCP_SAP_MAX) nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap); release_sock(sk); /* Keep this sock alive and therefore do not remove it from the sockets * list until the DISC PDU has been actually sent. Otherwise we would * reply with DM PDUs before sending the DISC one. */ if (sk->sk_state == LLCP_DISCONNECTING) return err; if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); else nfc_llcp_sock_unlink(&local->sockets, sk); out: sock_orphan(sk); sock_put(sk); return err; } static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, int len, int flags) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); struct sockaddr_nfc_llcp *addr = (struct sockaddr_nfc_llcp *)_addr; struct nfc_dev *dev; struct nfc_llcp_local *local; int ret = 0; pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC) return -EINVAL; if (addr->service_name_len == 0 && addr->dsap == 0) return -EINVAL; pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", addr->dev_idx, addr->target_idx, addr->nfc_protocol); lock_sock(sk); if (sk->sk_state == LLCP_CONNECTED) { ret = -EISCONN; goto error; } if (sk->sk_state == LLCP_CONNECTING) { ret = -EINPROGRESS; goto error; } dev = nfc_get_device(addr->dev_idx); if (dev == NULL) { ret = -ENODEV; goto error; } local = nfc_llcp_find_local(dev); if (local == NULL) { ret = -ENODEV; goto put_dev; } device_lock(&dev->dev); if (dev->dep_link_up == false) { ret = -ENOLINK; device_unlock(&dev->dev); goto put_dev; } device_unlock(&dev->dev); if (local->rf_mode == NFC_RF_INITIATOR && addr->target_idx != local->target_idx) { ret = -ENOLINK; goto put_dev; } llcp_sock->dev = dev; llcp_sock->local = nfc_llcp_local_get(local); llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; ret = -ENOMEM; goto put_dev; } llcp_sock->reserved_ssap = llcp_sock->ssap; if (addr->service_name_len == 0) llcp_sock->dsap = addr->dsap; else llcp_sock->dsap = LLCP_SAP_SDP; llcp_sock->nfc_protocol = addr->nfc_protocol; llcp_sock->service_name_len = min_t(unsigned int, addr->service_name_len, NFC_LLCP_MAX_SERVICE_NAME); llcp_sock->service_name = kmemdup(addr->service_name, llcp_sock->service_name_len, GFP_KERNEL); nfc_llcp_sock_link(&local->connecting_sockets, sk); ret = nfc_llcp_send_connect(llcp_sock); if (ret) goto sock_unlink; sk->sk_state = LLCP_CONNECTING; ret = sock_wait_state(sk, LLCP_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); if (ret && ret != -EINPROGRESS) goto sock_unlink; release_sock(sk); return ret; sock_unlink: nfc_llcp_put_ssap(local, llcp_sock->ssap); nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; nfc_llcp_sock_unlink(&local->connecting_sockets, sk); kfree(llcp_sock->service_name); llcp_sock->service_name = NULL; put_dev: nfc_put_device(dev); error: release_sock(sk); return ret; } static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); int ret; pr_debug("sock %p sk %p", sock, sk); ret = sock_error(sk); if (ret) return ret; if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; lock_sock(sk); if (!llcp_sock->local) { release_sock(sk); return -ENODEV; } if (sk->sk_type == SOCK_DGRAM) { DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); if (msg->msg_namelen < sizeof(*addr)) { release_sock(sk); return -EINVAL; } release_sock(sk); return nfc_llcp_send_ui_frame(llcp_sock, addr->dsap, addr->ssap, msg, len); } if (sk->sk_state != LLCP_CONNECTED) { release_sock(sk); return -ENOTCONN; } release_sock(sk); return nfc_llcp_send_i_frame(llcp_sock, msg, len); } static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; unsigned int copied, rlen; struct sk_buff *skb, *cskb; int err = 0; pr_debug("%p %zu\n", sk, len); lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && skb_queue_empty(&sk->sk_receive_queue)) { release_sock(sk); return 0; } release_sock(sk); if (flags & (MSG_OOB)) return -EOPNOTSUPP; skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { pr_err("Recv datagram failed state %d %d %d", sk->sk_state, err, sock_error(sk)); if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; return err; } rlen = skb->len; /* real length of skb */ copied = min_t(unsigned int, rlen, len); cskb = skb; if (skb_copy_datagram_msg(cskb, 0, msg, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; } sock_recv_timestamp(msg, sk, skb); if (sk->sk_type == SOCK_DGRAM && msg->msg_name) { struct nfc_llcp_ui_cb *ui_cb = nfc_llcp_ui_skb_cb(skb); DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, sockaddr, msg->msg_name); msg->msg_namelen = sizeof(struct sockaddr_nfc_llcp); pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); memset(sockaddr, 0, sizeof(*sockaddr)); sockaddr->sa_family = AF_NFC; sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; sockaddr->dsap = ui_cb->dsap; sockaddr->ssap = ui_cb->ssap; } /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { /* SOCK_STREAM: re-queue skb if it contains unreceived data */ if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { skb_pull(skb, copied); if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); goto done; } } kfree_skb(skb); } /* XXX Queue backlogged skbs */ done: /* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */ if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC)) copied = rlen; return copied; } static const struct proto_ops llcp_sock_ops = { .family = PF_NFC, .owner = THIS_MODULE, .bind = llcp_sock_bind, .connect = llcp_sock_connect, .release = llcp_sock_release, .socketpair = sock_no_socketpair, .accept = llcp_sock_accept, .getname = llcp_sock_getname, .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = llcp_sock_listen, .shutdown = sock_no_shutdown, .setsockopt = nfc_llcp_setsockopt, .getsockopt = nfc_llcp_getsockopt, .sendmsg = llcp_sock_sendmsg, .recvmsg = llcp_sock_recvmsg, .mmap = sock_no_mmap, }; static const struct proto_ops llcp_rawsock_ops = { .family = PF_NFC, .owner = THIS_MODULE, .bind = llcp_raw_sock_bind, .connect = sock_no_connect, .release = llcp_sock_release, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = llcp_sock_getname, .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = sock_no_sendmsg, .recvmsg = llcp_sock_recvmsg, .mmap = sock_no_mmap, }; static void llcp_sock_destruct(struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); pr_debug("%p\n", sk); if (sk->sk_state == LLCP_CONNECTED) nfc_put_device(llcp_sock->dev); skb_queue_purge(&sk->sk_receive_queue); nfc_llcp_sock_free(llcp_sock); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Freeing alive NFC LLCP socket %p\n", sk); return; } } struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern) { struct sock *sk; struct nfc_llcp_sock *llcp_sock; sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern); if (!sk) return NULL; llcp_sock = nfc_llcp_sock(sk); sock_init_data(sock, sk); sk->sk_state = LLCP_CLOSED; sk->sk_protocol = NFC_SOCKPROTO_LLCP; sk->sk_type = type; sk->sk_destruct = llcp_sock_destruct; llcp_sock->ssap = 0; llcp_sock->dsap = LLCP_SAP_SDP; llcp_sock->rw = LLCP_MAX_RW + 1; llcp_sock->miux = cpu_to_be16(LLCP_MAX_MIUX + 1); llcp_sock->send_n = llcp_sock->send_ack_n = 0; llcp_sock->recv_n = llcp_sock->recv_ack_n = 0; llcp_sock->remote_ready = 1; llcp_sock->reserved_ssap = LLCP_SAP_MAX; nfc_llcp_socket_remote_param_init(llcp_sock); skb_queue_head_init(&llcp_sock->tx_queue); skb_queue_head_init(&llcp_sock->tx_pending_queue); INIT_LIST_HEAD(&llcp_sock->accept_queue); if (sock != NULL) sock->state = SS_UNCONNECTED; return sk; } void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) { kfree(sock->service_name); skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); list_del_init(&sock->accept_queue); sock->parent = NULL; nfc_llcp_local_put(sock->local); } static int llcp_sock_create(struct net *net, struct socket *sock, const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; pr_debug("%p\n", sock); if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; if (sock->type == SOCK_RAW) { if (!capable(CAP_NET_RAW)) return -EPERM; sock->ops = &llcp_rawsock_ops; } else { sock->ops = &llcp_sock_ops; } sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern); if (sk == NULL) return -ENOMEM; return 0; } static const struct nfc_protocol llcp_nfc_proto = { .id = NFC_SOCKPROTO_LLCP, .proto = &llcp_sock_proto, .owner = THIS_MODULE, .create = llcp_sock_create }; int __init nfc_llcp_sock_init(void) { return nfc_proto_register(&llcp_nfc_proto); } void nfc_llcp_sock_exit(void) { nfc_proto_unregister(&llcp_nfc_proto); }
5 276 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_ICMPV6_H #define _LINUX_ICMPV6_H #include <linux/skbuff.h> #include <linux/ipv6.h> #include <uapi/linux/icmpv6.h> static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) { return (struct icmp6hdr *)skb_transport_header(skb); } #include <linux/netdevice.h> #if IS_ENABLED(CONFIG_IPV6) typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm); void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm); #if IS_BUILTIN(CONFIG_IPV6) static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct inet6_skb_parm *parm) { icmp6_send(skb, type, code, info, NULL, parm); } static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) { BUILD_BUG_ON(fn != icmp6_send); return 0; } static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) { BUILD_BUG_ON(fn != icmp6_send); return 0; } #else extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct inet6_skb_parm *parm); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); #endif static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { __icmpv6_send(skb, type, code, info, IP6CB(skb)); } int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len); #if IS_ENABLED(CONFIG_NF_NAT) void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); #else static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { struct inet6_skb_parm parm = { 0 }; __icmpv6_send(skb_in, type, code, info, &parm); } #endif #else static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { } static inline void icmpv6_ndo_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { } #endif extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); extern void icmpv6_cleanup(void); extern void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos); struct flowi6; struct in6_addr; extern void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif); #endif
7 6 7 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 /* * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFLOG.h> #include <net/netfilter/nf_log.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; struct net *net = xt_net(par); struct nf_loginfo li; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; li.u.ulog.flags = 0; if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par), xt_out(par), &li, "%s", info->prefix); return XT_CONTINUE; } static int nflog_tg_check(const struct xt_tgchk_param *par) { const struct xt_nflog_info *info = par->targinfo; if (info->flags & ~XT_NFLOG_MASK) return -EINVAL; if (info->prefix[sizeof(info->prefix) - 1] != '\0') return -EINVAL; return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG); } static void nflog_tg_destroy(const struct xt_tgdtor_param *par) { nf_logger_put(par->family, NF_LOG_TYPE_ULOG); } static struct xt_target nflog_tg_reg __read_mostly = { .name = "NFLOG", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = nflog_tg_check, .destroy = nflog_tg_destroy, .target = nflog_tg, .targetsize = sizeof(struct xt_nflog_info), .me = THIS_MODULE, }; static int __init nflog_tg_init(void) { return xt_register_target(&nflog_tg_reg); } static void __exit nflog_tg_exit(void) { xt_unregister_target(&nflog_tg_reg); } module_init(nflog_tg_init); module_exit(nflog_tg_exit);
300 458 346 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 /* * Copyright (C) 2011 IBM Corporation * * Author: * Mimi Zohar <zohar@us.ibm.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 2 of the License. */ #include <linux/module.h> #include <linux/xattr.h> #include <linux/evm.h> int posix_xattr_acl(const char *xattr) { int xattr_len = strlen(xattr); if ((strlen(XATTR_NAME_POSIX_ACL_ACCESS) == xattr_len) && (strncmp(XATTR_NAME_POSIX_ACL_ACCESS, xattr, xattr_len) == 0)) return 1; if ((strlen(XATTR_NAME_POSIX_ACL_DEFAULT) == xattr_len) && (strncmp(XATTR_NAME_POSIX_ACL_DEFAULT, xattr, xattr_len) == 0)) return 1; return 0; }
1 1 516 123 123 40 40 39 39 123 73 72 70 3 2 107 107 107 69 6 107 6 3 6 129 129 1 1 1 128 128 129 291 395 395 102 102 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic INET6 transport hashtables * * Authors: Lotsa people, from code originally in tcp, generalised here * by Arnaldo Carvalho de Melo <acme@mandriva.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> #include <linux/random.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> #include <net/sock_reuseport.h> u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { static u32 inet6_ehash_secret __read_mostly; static u32 ipv6_hash_secret __read_mostly; u32 lhash, fhash; net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); } /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif) { struct sock *sk; const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } goto found; } if (get_nulls_value(node) != slot) goto begin; out: sk = NULL; found: return sk; } EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, const int dif, const int sdif, bool exact_dif) { int score = -1; if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { score = 1; if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } if (sk->sk_bound_dev_if || exact_dif) { bool dev_match = (sk->sk_bound_dev_if == dif || sk->sk_bound_dev_if == sdif); if (!dev_match) return -1; if (sk->sk_bound_dev_if) score++; } if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; } /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_listen_hashbucket *ilb2, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { bool exact_dif = inet6_exact_dif_match(net, skb); struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; int score, hiscore = 0; u32 phash = 0; inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { sk = (struct sock *)icsk; score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { if (sk->sk_reuseport) { phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, phash, skb, doff); if (result) return result; } result = sk; hiscore = score; } } return result; } struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; bool exact_dif = inet6_exact_dif_match(net, skb); struct inet_listen_hashbucket *ilb2; struct sock *sk, *result = NULL; struct hlist_nulls_node *node; int score, hiscore = 0; unsigned int hash2; u32 phash = 0; if (ilb->count <= 10 || !hashinfo->lhash2) goto port_lookup; /* Too many sk in the ilb bucket (which is hashed by port alone). * Try lhash2 (which is hashed by port and addr) instead. */ hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); if (ilb2->count > ilb->count) goto port_lookup; result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); if (result) goto done; /* Lookup lhash2 with in6addr_any */ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); if (ilb2->count > ilb->count) goto port_lookup; result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); goto done; port_lookup: sk_nulls_for_each(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { if (sk->sk_reuseport) { phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, phash, skb, doff); if (result) goto done; } result = sk; hiscore = score; } } done: if (unlikely(IS_ERR(result))) return NULL; return result; } EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif) { struct sock *sk; bool refcounted; sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; struct net *net = sock_net(sk); const int sdif = l3mdev_master_ifindex_by_index(net, dif); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) break; } goto not_unique; } } /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); return __inet_hash_connect(death_row, sk, port_offset, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); int inet6_hash(struct sock *sk) { int err = 0; if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); err = __inet_hash(sk, NULL); local_bh_enable(); } return err; } EXPORT_SYMBOL_GPL(inet6_hash);
97 97 97 97 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 /* * HT handling * * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ /** * DOC: RX A-MPDU aggregation * * Aggregation on the RX side requires only implementing the * @ampdu_action callback that is invoked to start/stop any * block-ack sessions for RX aggregation. * * When RX aggregation is started by the peer, the driver is * notified via @ampdu_action function, with the * %IEEE80211_AMPDU_RX_START action, and may reject the request * in which case a negative response is sent to the peer, if it * accepts it a positive response is sent. * * While the session is active, the device/driver are required * to de-aggregate frames and pass them up one by one to mac80211, * which will handle the reorder buffer. * * When the aggregation session is stopped again by the peer or * ourselves, the driver's @ampdu_action function will be called * with the action %IEEE80211_AMPDU_RX_STOP. In this case, the * call must not fail. */ #include <linux/ieee80211.h> #include <linux/slab.h> #include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" static void ieee80211_free_tid_rx(struct rcu_head *h) { struct tid_ampdu_rx *tid_rx = container_of(h, struct tid_ampdu_rx, rcu_head); int i; for (i = 0; i < tid_rx->buf_size; i++) __skb_queue_purge(&tid_rx->reorder_buf[i]); kfree(tid_rx->reorder_buf); kfree(tid_rx->reorder_time); kfree(tid_rx); } void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool tx) { struct ieee80211_local *local = sta->local; struct tid_ampdu_rx *tid_rx; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_RX_STOP, .tid = tid, .amsdu = false, .timeout = 0, .ssn = 0, }; lockdep_assert_held(&sta->ampdu_mlme.mtx); tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid], lockdep_is_held(&sta->ampdu_mlme.mtx)); if (!test_bit(tid, sta->ampdu_mlme.agg_session_valid)) return; RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); __clear_bit(tid, sta->ampdu_mlme.agg_session_valid); ht_dbg(sta->sdata, "Rx BA session stop requested for %pM tid %u %s reason: %d\n", sta->sta.addr, tid, initiator == WLAN_BACK_RECIPIENT ? "recipient" : "initiator", (int)reason); if (drv_ampdu_action(local, sta->sdata, &params)) sdata_info(sta->sdata, "HW problem - can not stop rx aggregation for %pM tid %d\n", sta->sta.addr, tid); /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) ieee80211_send_delba(sta->sdata, sta->sta.addr, tid, WLAN_BACK_RECIPIENT, reason); /* * return here in case tid_rx is not assigned - which will happen if * IEEE80211_HW_SUPPORTS_REORDERING_BUFFER is set. */ if (!tid_rx) return; del_timer_sync(&tid_rx->session_timer); /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */ spin_lock_bh(&tid_rx->reorder_lock); tid_rx->removed = true; spin_unlock_bh(&tid_rx->reorder_lock); del_timer_sync(&tid_rx->reorder_timer); call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool tx) { mutex_lock(&sta->ampdu_mlme.mtx); ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, tx); mutex_unlock(&sta->ampdu_mlme.mtx); } void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, const u8 *addr) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct sta_info *sta; int i; rcu_read_lock(); sta = sta_info_get_bss(sdata, addr); if (!sta) { rcu_read_unlock(); return; } for (i = 0; i < IEEE80211_NUM_TIDS; i++) if (ba_rx_bitmap & BIT(i)) set_bit(i, sta->ampdu_mlme.tid_rx_stop_requested); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_stop_rx_ba_session); /* * After accepting the AddBA Request we activated a timer, * resetting it after each frame that arrives from the originator. */ static void sta_rx_agg_session_timer_expired(struct timer_list *t) { struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, session_timer); struct sta_info *sta = tid_rx->sta; u8 tid = tid_rx->tid; unsigned long timeout; timeout = tid_rx->last_rx + TU_TO_JIFFIES(tid_rx->timeout); if (time_is_after_jiffies(timeout)) { mod_timer(&tid_rx->session_timer, timeout); return; } ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", sta->sta.addr, tid); set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); } static void sta_rx_agg_reorder_timer_expired(struct timer_list *t) { struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, reorder_timer); rcu_read_lock(); ieee80211_release_reorder_timeout(tid_rx->sta, tid_rx->tid); rcu_read_unlock(); } static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, u8 dialog_token, u16 status, u16 policy, u16 buf_size, u16 timeout) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU); u16 capab; skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = skb_put_zero(skb, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN || sdata->vif.type == NL80211_IFTYPE_MESH_POINT) memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_STATION) memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); mgmt->u.action.category = WLAN_CATEGORY_BACK; mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; mgmt->u.action.u.addba_resp.dialog_token = dialog_token; capab = (u16)(amsdu << 0); /* bit 0 A-MSDU support */ capab |= (u16)(policy << 1); /* bit 1 aggregation policy */ capab |= (u16)(tid << 2); /* bit 5:2 TID number */ capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); ieee80211_tx_skb(sdata, skb); } void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq) { struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_RX_START, .tid = tid, .amsdu = false, .timeout = timeout, .ssn = start_seq_num, }; int i, ret = -EOPNOTSUPP; u16 status = WLAN_STATUS_REQUEST_DECLINED; u16 max_buf_size; if (tid >= IEEE80211_FIRST_TSPEC_TSID) { ht_dbg(sta->sdata, "STA %pM requests BA session on unsupported tid %d\n", sta->sta.addr, tid); goto end; } if (!sta->sta.ht_cap.ht_supported) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", sta->sta.addr, tid); /* send a response anyway, it's an error case if we get here */ goto end; } if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request (%pM tid %d)\n", sta->sta.addr, tid); goto end; } if (sta->sta.he_cap.has_he) max_buf_size = IEEE80211_MAX_AMPDU_BUF; else max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT; /* sanity check for incoming parameters: * check if configuration can support the BA policy * and if buffer size does not exceeds max value */ /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > max_buf_size)) { status = WLAN_STATUS_INVALID_QOS_PARAM; ht_dbg_ratelimited(sta->sdata, "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", sta->sta.addr, tid, ba_policy, buf_size); goto end; } /* determine default buffer size */ if (buf_size == 0) buf_size = max_buf_size; /* make sure the size doesn't exceed the maximum supported by the hw */ if (buf_size > sta->sta.max_rx_aggregation_subframes) buf_size = sta->sta.max_rx_aggregation_subframes; params.buf_size = buf_size; ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n", buf_size, sta->sta.addr); /* examine state machine */ lockdep_assert_held(&sta->ampdu_mlme.mtx); if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { struct tid_ampdu_rx *tid_rx; ht_dbg_ratelimited(sta->sdata, "updated AddBA Req from %pM on tid %u\n", sta->sta.addr, tid); /* We have no API to update the timeout value in the * driver so reject the timeout update if the timeout * changed. If if did not change, i.e., no real update, * just reply with success. */ rcu_read_lock(); tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); if (tid_rx && tid_rx->timeout == timeout) status = WLAN_STATUS_SUCCESS; else status = WLAN_STATUS_REQUEST_DECLINED; rcu_read_unlock(); goto end; } ht_dbg_ratelimited(sta->sdata, "unexpected AddBA Req from %pM on tid %u\n", sta->sta.addr, tid); /* delete existing Rx BA session on the same tid */ ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, WLAN_STATUS_UNSPECIFIED_QOS, false); } if (ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) { ret = drv_ampdu_action(local, sta->sdata, &params); ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", sta->sta.addr, tid, ret); if (!ret) status = WLAN_STATUS_SUCCESS; goto end; } /* prepare A-MPDU MLME for Rx aggregation */ tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL); if (!tid_agg_rx) goto end; spin_lock_init(&tid_agg_rx->reorder_lock); /* rx timer */ timer_setup(&tid_agg_rx->session_timer, sta_rx_agg_session_timer_expired, TIMER_DEFERRABLE); /* rx reorder timer */ timer_setup(&tid_agg_rx->reorder_timer, sta_rx_agg_reorder_timer_expired, 0); /* prepare reordering buffer */ tid_agg_rx->reorder_buf = kcalloc(buf_size, sizeof(struct sk_buff_head), GFP_KERNEL); tid_agg_rx->reorder_time = kcalloc(buf_size, sizeof(unsigned long), GFP_KERNEL); if (!tid_agg_rx->reorder_buf || !tid_agg_rx->reorder_time) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); kfree(tid_agg_rx); goto end; } for (i = 0; i < buf_size; i++) __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]); ret = drv_ampdu_action(local, sta->sdata, &params); ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", sta->sta.addr, tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); kfree(tid_agg_rx); goto end; } /* update data */ tid_agg_rx->ssn = start_seq_num; tid_agg_rx->head_seq_num = start_seq_num; tid_agg_rx->buf_size = buf_size; tid_agg_rx->timeout = timeout; tid_agg_rx->stored_mpdu_num = 0; tid_agg_rx->auto_seq = auto_seq; tid_agg_rx->started = false; tid_agg_rx->reorder_buf_filtered = 0; tid_agg_rx->tid = tid; tid_agg_rx->sta = sta; status = WLAN_STATUS_SUCCESS; /* activate it for RX */ rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); if (timeout) { mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout)); tid_agg_rx->last_rx = jiffies; } end: if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); sta->ampdu_mlme.tid_rx_token[tid] = dialog_token; } if (tx) ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, dialog_token, status, 1, buf_size, timeout); } static void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq) { mutex_lock(&sta->ampdu_mlme.mtx); ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, buf_size, tx, auto_seq); mutex_unlock(&sta->ampdu_mlme.mtx); } void ieee80211_process_addba_request(struct ieee80211_local *local, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; u8 dialog_token; /* extract session parameters from addba request frame */ dialog_token = mgmt->u.action.u.addba_req.dialog_token; timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); start_seq_num = le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, buf_size, true, false); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); sta = sta_info_get_bss(sdata, addr); if (!sta) goto unlock; set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl); ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl); void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); sta = sta_info_get_bss(sdata, addr); if (!sta) goto unlock; set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_rx_ba_timer_expired);
1655 1655 1655 1656 1654 2032 2035 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/sock.h> #include <net/route.h> #include <linux/ip.h> #include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables mangle table"); #define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) static int __net_init iptable_mangle_table_init(struct net *net); static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_MANGLE, .table_init = iptable_mangle_table_init, }; static unsigned int ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; const struct iphdr *iph; u_int8_t tos; __be32 saddr, daddr; u_int32_t mark; int err; /* Save things which could affect route */ mark = skb->mark; iph = ip_hdr(skb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } } return ret; } /* The work comes in here from netfilter.c. */ static unsigned int iptable_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, state); return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init iptable_mangle_table_init(struct net *net) { struct ipt_replace *repl; int ret; if (net->ipv4.iptable_mangle) return 0; repl = ipt_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops, &net->ipv4.iptable_mangle); kfree(repl); return ret; } static void __net_exit iptable_mangle_net_exit(struct net *net) { if (!net->ipv4.iptable_mangle) return; ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops); net->ipv4.iptable_mangle = NULL; } static struct pernet_operations iptable_mangle_net_ops = { .exit = iptable_mangle_net_exit, }; static int __init iptable_mangle_init(void) { int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); if (IS_ERR(mangle_ops)) { ret = PTR_ERR(mangle_ops); return ret; } ret = register_pernet_subsys(&iptable_mangle_net_ops); if (ret < 0) { kfree(mangle_ops); return ret; } ret = iptable_mangle_table_init(&init_net); if (ret) { unregister_pernet_subsys(&iptable_mangle_net_ops); kfree(mangle_ops); } return ret; } static void __exit iptable_mangle_fini(void) { unregister_pernet_subsys(&iptable_mangle_net_ops); kfree(mangle_ops); } module_init(iptable_mangle_init); module_exit(iptable_mangle_fini);
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 /* SPDX-License-Identifier: GPL-2.0 */ /* * QNX6 file system, Linux implementation. * * Version : 1.0.0 * * History : * * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release. * 16-02-2012 page map extension by Al Viro * */ #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/fs.h> #include <linux/pagemap.h> typedef __u16 __bitwise __fs16; typedef __u32 __bitwise __fs32; typedef __u64 __bitwise __fs64; #include <linux/qnx6_fs.h> struct qnx6_sb_info { struct buffer_head *sb_buf; /* superblock buffer */ struct qnx6_super_block *sb; /* our superblock */ int s_blks_off; /* blkoffset fs-startpoint */ int s_ptrbits; /* indirect pointer bitfield */ unsigned long s_mount_opt; /* all mount options */ int s_bytesex; /* holds endianess info */ struct inode * inodes; struct inode * longfile; }; struct qnx6_inode_info { __fs32 di_block_ptr[QNX6_NO_DIRECT_POINTERS]; __u8 di_filelevels; __u32 i_dir_start_lookup; struct inode vfs_inode; }; extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino); extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); #ifdef CONFIG_QNX6FS_DEBUG extern void qnx6_superblock_debug(struct qnx6_super_block *, struct super_block *); #endif extern const struct inode_operations qnx6_dir_inode_operations; extern const struct file_operations qnx6_dir_operations; static inline struct qnx6_sb_info *QNX6_SB(struct super_block *sb) { return sb->s_fs_info; } static inline struct qnx6_inode_info *QNX6_I(struct inode *inode) { return container_of(inode, struct qnx6_inode_info, vfs_inode); } #define clear_opt(o, opt) (o &= ~(QNX6_MOUNT_##opt)) #define set_opt(o, opt) (o |= (QNX6_MOUNT_##opt)) #define test_opt(sb, opt) (QNX6_SB(sb)->s_mount_opt & \ QNX6_MOUNT_##opt) enum { BYTESEX_LE, BYTESEX_BE, }; static inline __u64 fs64_to_cpu(struct qnx6_sb_info *sbi, __fs64 n) { if (sbi->s_bytesex == BYTESEX_LE) return le64_to_cpu((__force __le64)n); else return be64_to_cpu((__force __be64)n); } static inline __fs64 cpu_to_fs64(struct qnx6_sb_info *sbi, __u64 n) { if (sbi->s_bytesex == BYTESEX_LE) return (__force __fs64)cpu_to_le64(n); else return (__force __fs64)cpu_to_be64(n); } static inline __u32 fs32_to_cpu(struct qnx6_sb_info *sbi, __fs32 n) { if (sbi->s_bytesex == BYTESEX_LE) return le32_to_cpu((__force __le32)n); else return be32_to_cpu((__force __be32)n); } static inline __fs32 cpu_to_fs32(struct qnx6_sb_info *sbi, __u32 n) { if (sbi->s_bytesex == BYTESEX_LE) return (__force __fs32)cpu_to_le32(n); else return (__force __fs32)cpu_to_be32(n); } static inline __u16 fs16_to_cpu(struct qnx6_sb_info *sbi, __fs16 n) { if (sbi->s_bytesex == BYTESEX_LE) return le16_to_cpu((__force __le16)n); else return be16_to_cpu((__force __be16)n); } static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n) { if (sbi->s_bytesex == BYTESEX_LE) return (__force __fs16)cpu_to_le16(n); else return (__force __fs16)cpu_to_be16(n); } extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent); static inline void qnx6_put_page(struct page *page) { kunmap(page); put_page(page); } extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name, struct page **res_page);
1 1 2 6 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Steven Whitehouse GW7RRM (stevew@acm.org) * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) */ #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/timer.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/seq_file.h> #include <linux/export.h> static ax25_route *ax25_route_list; DEFINE_RWLOCK(ax25_route_lock); void ax25_rt_device_down(struct net_device *dev) { ax25_route *s, *t, *ax25_rt; write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { s = ax25_rt; ax25_rt = ax25_rt->next; if (s->dev == dev) { if (ax25_route_list == s) { ax25_route_list = s->next; kfree(s->digipeat); kfree(s); } else { for (t = ax25_route_list; t != NULL; t = t->next) { if (t->next == s) { t->next = s->next; kfree(s->digipeat); kfree(s); break; } } } } } write_unlock_bh(&ax25_route_lock); } static int __must_check ax25_rt_add(struct ax25_routes_struct *route) { ax25_route *ax25_rt; ax25_dev *ax25_dev; int i; if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; ax25_dev = ax25_addr_ax25dev(&route->port_addr); if (!ax25_dev) return -EINVAL; write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { if (ax25cmp(&ax25_rt->callsign, &route->dest_addr) == 0 && ax25_rt->dev == ax25_dev->dev) { kfree(ax25_rt->digipeat); ax25_rt->digipeat = NULL; if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; ax25_rt->digipeat->ndigi = route->digi_count; for (i = 0; i < route->digi_count; i++) { ax25_rt->digipeat->repeated[i] = 0; ax25_rt->digipeat->calls[i] = route->digi_addr[i]; } } write_unlock_bh(&ax25_route_lock); ax25_dev_put(ax25_dev); return 0; } ax25_rt = ax25_rt->next; } if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); ax25_dev_put(ax25_dev); return -ENOMEM; } refcount_set(&ax25_rt->refcount, 1); ax25_rt->callsign = route->dest_addr; ax25_rt->dev = ax25_dev->dev; ax25_rt->digipeat = NULL; ax25_rt->ip_mode = ' '; if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; ax25_rt->digipeat->ndigi = route->digi_count; for (i = 0; i < route->digi_count; i++) { ax25_rt->digipeat->repeated[i] = 0; ax25_rt->digipeat->calls[i] = route->digi_addr[i]; } } ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; write_unlock_bh(&ax25_route_lock); ax25_dev_put(ax25_dev); return 0; } void __ax25_put_route(ax25_route *ax25_rt) { kfree(ax25_rt->digipeat); kfree(ax25_rt); } static int ax25_rt_del(struct ax25_routes_struct *route) { ax25_route *s, *t, *ax25_rt; ax25_dev *ax25_dev; if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) return -EINVAL; write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { s = ax25_rt; ax25_rt = ax25_rt->next; if (s->dev == ax25_dev->dev && ax25cmp(&route->dest_addr, &s->callsign) == 0) { if (ax25_route_list == s) { ax25_route_list = s->next; ax25_put_route(s); } else { for (t = ax25_route_list; t != NULL; t = t->next) { if (t->next == s) { t->next = s->next; ax25_put_route(s); break; } } } } } write_unlock_bh(&ax25_route_lock); ax25_dev_put(ax25_dev); return 0; } static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) { ax25_route *ax25_rt; ax25_dev *ax25_dev; int err = 0; if ((ax25_dev = ax25_addr_ax25dev(&rt_option->port_addr)) == NULL) return -EINVAL; write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; while (ax25_rt != NULL) { if (ax25_rt->dev == ax25_dev->dev && ax25cmp(&rt_option->dest_addr, &ax25_rt->callsign) == 0) { switch (rt_option->cmd) { case AX25_SET_RT_IPMODE: switch (rt_option->arg) { case ' ': case 'D': case 'V': ax25_rt->ip_mode = rt_option->arg; break; default: err = -EINVAL; goto out; } break; default: err = -EINVAL; goto out; } } ax25_rt = ax25_rt->next; } out: write_unlock_bh(&ax25_route_lock); ax25_dev_put(ax25_dev); return err; } int ax25_rt_ioctl(unsigned int cmd, void __user *arg) { struct ax25_route_opt_struct rt_option; struct ax25_routes_struct route; switch (cmd) { case SIOCADDRT: if (copy_from_user(&route, arg, sizeof(route))) return -EFAULT; return ax25_rt_add(&route); case SIOCDELRT: if (copy_from_user(&route, arg, sizeof(route))) return -EFAULT; return ax25_rt_del(&route); case SIOCAX25OPTRT: if (copy_from_user(&rt_option, arg, sizeof(rt_option))) return -EFAULT; return ax25_rt_opt(&rt_option); default: return -EINVAL; } } #ifdef CONFIG_PROC_FS static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos) __acquires(ax25_route_lock) { struct ax25_route *ax25_rt; int i = 1; read_lock(&ax25_route_lock); if (*pos == 0) return SEQ_START_TOKEN; for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) { if (i == *pos) return ax25_rt; ++i; } return NULL; } static void *ax25_rt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (v == SEQ_START_TOKEN) ? ax25_route_list : ((struct ax25_route *) v)->next; } static void ax25_rt_seq_stop(struct seq_file *seq, void *v) __releases(ax25_route_lock) { read_unlock(&ax25_route_lock); } static int ax25_rt_seq_show(struct seq_file *seq, void *v) { char buf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, "callsign dev mode digipeaters\n"); else { struct ax25_route *ax25_rt = v; const char *callsign; int i; if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0) callsign = "default"; else callsign = ax2asc(buf, &ax25_rt->callsign); seq_printf(seq, "%-9s %-4s", callsign, ax25_rt->dev ? ax25_rt->dev->name : "???"); switch (ax25_rt->ip_mode) { case 'V': seq_puts(seq, " vc"); break; case 'D': seq_puts(seq, " dg"); break; default: seq_puts(seq, " *"); break; } if (ax25_rt->digipeat != NULL) for (i = 0; i < ax25_rt->digipeat->ndigi; i++) seq_printf(seq, " %s", ax2asc(buf, &ax25_rt->digipeat->calls[i])); seq_puts(seq, "\n"); } return 0; } const struct seq_operations ax25_rt_seqops = { .start = ax25_rt_seq_start, .next = ax25_rt_seq_next, .stop = ax25_rt_seq_stop, .show = ax25_rt_seq_show, }; #endif /* * Find AX.25 route * * Only routes with a reference count of zero can be destroyed. * Must be called with ax25_route_lock read locked. */ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) { ax25_route *ax25_spe_rt = NULL; ax25_route *ax25_def_rt = NULL; ax25_route *ax25_rt; /* * Bind to the physical interface we heard them on, or the default * route if none is found; */ for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) { if (dev == NULL) { if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev != NULL) ax25_spe_rt = ax25_rt; if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev != NULL) ax25_def_rt = ax25_rt; } else { if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev == dev) ax25_spe_rt = ax25_rt; if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev == dev) ax25_def_rt = ax25_rt; } } ax25_rt = ax25_def_rt; if (ax25_spe_rt != NULL) ax25_rt = ax25_spe_rt; return ax25_rt; } /* * Adjust path: If you specify a default route and want to connect * a target on the digipeater path but w/o having a special route * set before, the path has to be truncated from your target on. */ static inline void ax25_adjust_path(ax25_address *addr, ax25_digi *digipeat) { int k; for (k = 0; k < digipeat->ndigi; k++) { if (ax25cmp(addr, &digipeat->calls[k]) == 0) break; } digipeat->ndigi = k; } /* * Find which interface to use. */ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) { ax25_uid_assoc *user; ax25_route *ax25_rt; int err = 0; ax25_route_lock_use(); ax25_rt = ax25_get_route(addr, NULL); if (!ax25_rt) { ax25_route_lock_unuse(); return -EHOSTUNREACH; } if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) { err = -EHOSTUNREACH; goto put; } user = ax25_findbyuid(current_euid()); if (user) { ax25->source_addr = user->call; ax25_uid_put(user); } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { err = -EPERM; goto put; } ax25->source_addr = *(ax25_address *)ax25->ax25_dev->dev->dev_addr; } if (ax25_rt->digipeat != NULL) { ax25->digipeat = kmemdup(ax25_rt->digipeat, sizeof(ax25_digi), GFP_ATOMIC); if (ax25->digipeat == NULL) { err = -ENOMEM; goto put; } ax25_adjust_path(addr, ax25->digipeat); } if (ax25->sk != NULL) { local_bh_disable(); bh_lock_sock(ax25->sk); sock_reset_flag(ax25->sk, SOCK_ZAPPED); bh_unlock_sock(ax25->sk); local_bh_enable(); } put: ax25_route_lock_unuse(); return err; } struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_address *dest, ax25_digi *digi) { struct sk_buff *skbn; unsigned char *bp; int len; len = digi->ndigi * AX25_ADDR_LEN; if (skb_headroom(skb) < len) { if ((skbn = skb_realloc_headroom(skb, len)) == NULL) { printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n"); return NULL; } if (skb->sk != NULL) skb_set_owner_w(skbn, skb->sk); consume_skb(skb); skb = skbn; } bp = skb_push(skb, len); ax25_addr_build(bp, src, dest, digi, AX25_COMMAND, AX25_MODULUS); return skb; } /* * Free all memory associated with routing structures. */ void __exit ax25_rt_free(void) { ax25_route *s, *ax25_rt = ax25_route_list; write_lock_bh(&ax25_route_lock); while (ax25_rt != NULL) { s = ax25_rt; ax25_rt = ax25_rt->next; kfree(s->digipeat); kfree(s); } write_unlock_bh(&ax25_route_lock); }
5282 13035 1225 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM skb #if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SKB_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/tracepoint.h> /* * Tracepoint for free an sk_buff: */ TRACE_EVENT(kfree_skb, TP_PROTO(struct sk_buff *skb, void *location), TP_ARGS(skb, location), TP_STRUCT__entry( __field( void *, skbaddr ) __field( void *, location ) __field( unsigned short, protocol ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; __entry->protocol = ntohs(skb->protocol); ), TP_printk("skbaddr=%p protocol=%u location=%p", __entry->skbaddr, __entry->protocol, __entry->location) ); TRACE_EVENT(consume_skb, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field( void *, skbaddr ) ), TP_fast_assign( __entry->skbaddr = skb; ), TP_printk("skbaddr=%p", __entry->skbaddr) ); TRACE_EVENT(skb_copy_datagram_iovec, TP_PROTO(const struct sk_buff *skb, int len), TP_ARGS(skb, len), TP_STRUCT__entry( __field( const void *, skbaddr ) __field( int, len ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = len; ), TP_printk("skbaddr=%p len=%d", __entry->skbaddr, __entry->len) ); #endif /* _TRACE_SKB_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
31 112 7 5 136 134 127 102 136 26 31 12 12 31 41 41 8 8 33 33 253 184 42 33 9 7 6 189 189 189 53 32 9 228 14 3 14 228 6 228 11 159 159 27 27 27 14 14 5 13 3 13 14 5 5 5 320 320 320 295 294 84 17 186 18 9 286 219 214 219 8 8 7 7 7 7 8 8 104 86 50 49 39 41 9 56 104 33 33 29 29 3 47 46 45 1 43 43 34 4 38 38 36 36 27 8 35 3 4 45 10 27 51 50 22 2 21 14 14 12 12 28 28 25 9 9 60 60 58 43 4 3 41 41 40 41 31 30 4 4 4 29 1 1 27 18 27 27 20 43 126 93 126 1 1 15 15 15 14 2 1 13 42 26 1 37 42 18 257 37 37 257 228 207 7 34 5 34 34 198 191 189 48 185 3 3 184 183 5 184 184 184 183 183 42 42 12 173 9 8 8 171 171 132 171 166 59 56 54 4 4 4 4 168 168 2 67 73 196 64 61 58 8 52 51 48 40 30 40 40 2 40 39 4 5 49 35 35 34 34 34 34 34 34 34 31 31 34 30 1 34 34 34 34 1 30 35 49 48 46 2 49 38 38 2 103 102 102 102 32 32 28 8 3 2 44 89 44 89 86 10 89 2 30 11 1 61 89 89 101 41 1 41 41 41 11 55 67 66 66 66 54 48 43 41 37 1 3 54 8 9 54 54 46 46 3 43 9 7 2 42 52 55 46 46 54 8 13 13 1 13 23 23 14 14 5 24 12 8 1 1 12 6 5 4 4 5 7 458 4 1 3 7 460 28 28 27 7 28 8 28 1 28 28 1 25 28 64 64 64 10 64 8 64 7 64 15 3 15 38 29 28 8 8 6 8 8 8 8 4 8 8 8 8 6 6 2 4 3 1 4 4 8 569 568 565 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 /* * NET4: Implementation of BSD Unix domain sockets. * * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. * Carsten Paeth : PF_UNIX check, address fixes. * Alan Cox : Limit size of allocated blocks. * Alan Cox : Fixed the stupid socketpair bug. * Alan Cox : BSD compatibility fine tuning. * Alan Cox : Fixed a bug in connect when interrupted. * Alan Cox : Sorted out a proper draft version of * file descriptor passing hacked up from * Mike Shaver's work. * Marty Leisner : Fixes to fd passing * Nick Nevin : recvmsg bugfix. * Alan Cox : Started proper garbage collector * Heiko EiBfeldt : Missing verify_area check * Alan Cox : Started POSIXisms * Andreas Schwab : Replace inode by dentry for proper * reference counting * Kirk Petersen : Made this a module * Christoph Rohland : Elegant non-blocking accept/connect algorithm. * Lots of bug fixes. * Alexey Kuznetosv : Repaired (I hope) bugs introduces * by above two patches. * Andrea Arcangeli : If possible we block in connect(2) * if the max backlog of the listen socket * is been reached. This won't break * old apps and it will avoid huge amount * of socks hashed (this for unix_gc() * performances reasons). * Security fix that limits the max * number of socks to 2*max_files and * the number of skb queueable in the * dgram receiver. * Artur Skawina : Hash function optimizations * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) * Malcolm Beattie : Set peercred for socketpair * Michal Ostrowski : Module initialization cleanup. * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, * the core infrastructure is doing that * for all net proto families now (2.5.69+) * * * Known differences from reference BSD that was tested: * * [TO FIX] * ECONNREFUSED is not returned from one end of a connected() socket to the * other the moment one end closes. * fstat() doesn't return st_dev=0, and give the blksize as high water mark * and a fake inode identifier (nor the BSD first socket fstat twice bug). * [NOT TO FIX] * accept() returns a path name even if the connecting socket has closed * in the meantime (BSD loses the path and gives up). * accept() returns 0 length path for an unbound connector. BSD returns 16 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) * socketpair(...SOCK_RAW..) doesn't panic the kernel. * BSD af_unix apparently has connect forgetting to block properly. * (need to check this with the POSIX spec in detail) * * Differences from 2.0.0-11-... (ANK) * Bug fixes and improvements. * - client shutdown killed server socket. * - removed all useless cli/sti pairs. * * Semantic changes/extensions. * - generic control message passing. * - SCM_CREDENTIALS control message. * - "Abstract" (not FS based) socket bindings. * Abstract names are sequences of bytes (not zero terminated) * started by 0, so that this name space does not intersect * with BSD names. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/dcache.h> #include <linux/namei.h> #include <linux/socket.h> #include <linux/un.h> #include <linux/fcntl.h> #include <linux/termios.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/af_unix.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/scm.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/rtnetlink.h> #include <linux/mount.h> #include <net/checksum.h> #include <linux/security.h> #include <linux/freezer.h> #include <linux/file.h> #include "scm.h" struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; static struct hlist_head *unix_sockets_unbound(void *addr) { unsigned long hash = (unsigned long)addr; hash ^= hash >> 16; hash ^= hash >> 8; hash %= UNIX_HASH_SIZE; return &unix_socket_table[UNIX_HASH_SIZE + hash]; } #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { UNIXCB(skb).secid = scm->secid; } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { scm->secid = UNIXCB(skb).secid; } static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) { return (scm->secid == UNIXCB(skb).secid); } #else static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) { return true; } #endif /* CONFIG_SECURITY_NETWORK */ /* * SMP locking strategy: * hash table is protected with spinlock unix_table_lock * each socket state is protected by separate spin lock. */ static inline unsigned int unix_hash_fold(__wsum n) { unsigned int hash = (__force unsigned int)csum_fold(n); hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } #define unix_peer(sk) (unix_sk(sk)->peer) static inline int unix_our_peer(struct sock *sk, struct sock *osk) { return unix_peer(osk) == sk; } static inline int unix_may_send(struct sock *sk, struct sock *osk) { return unix_peer(osk) == NULL || unix_our_peer(sk, osk); } static inline int unix_recvq_full(const struct sock *sk) { return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } static inline int unix_recvq_full_lockless(const struct sock *sk) { return skb_queue_len_lockless(&sk->sk_receive_queue) > READ_ONCE(sk->sk_max_ack_backlog); } struct sock *unix_peer_get(struct sock *s) { struct sock *peer; unix_state_lock(s); peer = unix_peer(s); if (peer) sock_hold(peer); unix_state_unlock(s); return peer; } EXPORT_SYMBOL_GPL(unix_peer_get); static inline void unix_release_addr(struct unix_address *addr) { if (refcount_dec_and_test(&addr->refcnt)) kfree(addr); } /* * Check unix socket name: * - should be not zero length. * - if started by not zero, should be NULL terminated (FS object) * - if started by zero, it is abstract name. */ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { *hashp = 0; if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) return -EINVAL; if (sunaddr->sun_path[0]) { /* * This may look like an off by one error but it is a bit more * subtle. 108 is the longest valid AF_UNIX path for a binding. * sun_path[108] doesn't as such exist. However in kernel space * we are guaranteed that it is a valid memory location in our * kernel address buffer. */ ((char *)sunaddr)[len] = 0; len = strlen(sunaddr->sun_path)+1+sizeof(short); return len; } *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); return len; } static void __unix_remove_socket(struct sock *sk) { sk_del_node_init(sk); } static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) { WARN_ON(!sk_unhashed(sk)); sk_add_node(sk, list); } static inline void unix_remove_socket(struct sock *sk) { spin_lock(&unix_table_lock); __unix_remove_socket(sk); spin_unlock(&unix_table_lock); } static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) { spin_lock(&unix_table_lock); __unix_insert_socket(list, sk); spin_unlock(&unix_table_lock); } static struct sock *__unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, int len, int type, unsigned int hash) { struct sock *s; sk_for_each(s, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); if (!net_eq(sock_net(s), net)) continue; if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) goto found; } s = NULL; found: return s; } static inline struct sock *unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, int len, int type, unsigned int hash) { struct sock *s; spin_lock(&unix_table_lock); s = __unix_find_socket_byname(net, sunname, len, type, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); return s; } static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; spin_lock(&unix_table_lock); sk_for_each(s, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); goto found; } } s = NULL; found: spin_unlock(&unix_table_lock); return s; } /* Support code for asymmetrically connected dgram sockets * * If a datagram socket is connected to a socket not itself connected * to the first socket (eg, /dev/log), clients may only enqueue more * messages if the present receive queue of the server socket is not * "too large". This means there's a second writeability condition * poll and sendmsg need to test. The dgram recv code will do a wake * up on the peer_wait wait queue of a socket upon reception of a * datagram which needs to be propagated to sleeping would-be writers * since these might not have sent anything so far. This can't be * accomplished via poll_wait because the lifetime of the server * socket might be less than that of its clients if these break their * association with it or if the server socket is closed while clients * are still connected to it and there's no way to inform "a polling * implementation" that it should let go of a certain wait queue * * In order to propagate a wake up, a wait_queue_entry_t of the client * socket is enqueued on the peer_wait queue of the server socket * whose wake function does a wake_up on the ordinary client socket * wait queue. This connection is established whenever a write (or * poll for write) hit the flow control condition and broken when the * association to the server socket is dissolved or after a wake up * was relayed. */ static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, void *key) { struct unix_sock *u; wait_queue_head_t *u_sleep; u = container_of(q, struct unix_sock, peer_wake); __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, q); u->peer_wake.private = NULL; /* relaying can only happen while the wq still exists */ u_sleep = sk_sleep(&u->sk); if (u_sleep) wake_up_interruptible_poll(u_sleep, key_to_poll(key)); return 0; } static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) { struct unix_sock *u, *u_other; int rc; u = unix_sk(sk); u_other = unix_sk(other); rc = 0; spin_lock(&u_other->peer_wait.lock); if (!u->peer_wake.private) { u->peer_wake.private = other; __add_wait_queue(&u_other->peer_wait, &u->peer_wake); rc = 1; } spin_unlock(&u_other->peer_wait.lock); return rc; } static void unix_dgram_peer_wake_disconnect(struct sock *sk, struct sock *other) { struct unix_sock *u, *u_other; u = unix_sk(sk); u_other = unix_sk(other); spin_lock(&u_other->peer_wait.lock); if (u->peer_wake.private == other) { __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); u->peer_wake.private = NULL; } spin_unlock(&u_other->peer_wait.lock); } static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, struct sock *other) { unix_dgram_peer_wake_disconnect(sk, other); wake_up_interruptible_poll(sk_sleep(sk), EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); } /* preconditions: * - unix_peer(sk) == other * - association is stable */ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) { int connected; connected = unix_dgram_peer_wake_connect(sk, other); /* If other is SOCK_DEAD, we want to make sure we signal * POLLOUT, such that a subsequent write() can get a * -ECONNREFUSED. Otherwise, if we haven't queued any skbs * to other and its full, we will hang waiting for POLLOUT. */ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) return 1; if (connected) unix_dgram_peer_wake_disconnect(sk, other); return 0; } static int unix_writable(const struct sock *sk) { return sk->sk_state != TCP_LISTEN && (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; } static void unix_write_space(struct sock *sk) { struct socket_wq *wq; rcu_read_lock(); if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } /* When dgram socket disconnects (or changes its peer), we clear its receive * queue of packets arrived from previous peer. First, it allows to do * flow control based only on wmem_alloc; second, sk connected to peer * may receive messages only from that peer. */ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) { if (!skb_queue_empty(&sk->sk_receive_queue)) { skb_queue_purge(&sk->sk_receive_queue); wake_up_interruptible_all(&unix_sk(sk)->peer_wait); /* If one link of bidirectional dgram pipe is disconnected, * we signal error. Messages are lost. Do not make this, * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { other->sk_err = ECONNRESET; other->sk_error_report(other); } } } static void unix_sock_destructor(struct sock *sk) { struct unix_sock *u = unix_sk(sk); skb_queue_purge(&sk->sk_receive_queue); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { pr_info("Attempt to release alive unix socket: %p\n", sk); return; } if (u->addr) unix_release_addr(u->addr); atomic_long_dec(&unix_nr_socks); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, atomic_long_read(&unix_nr_socks)); #endif } static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; struct sock *skpair; struct sk_buff *skb; int state; unix_remove_socket(sk); /* Clear state */ unix_state_lock(sk); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; path = u->path; u->path.dentry = NULL; u->path.mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; skpair = unix_peer(sk); unix_peer(sk) = NULL; unix_state_unlock(sk); wake_up_interruptible_all(&u->peer_wait); if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_lock(skpair); /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) skpair->sk_err = ECONNRESET; unix_state_unlock(skpair); skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ } /* Try to flush out this socket. Throw out buffers at least */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (state == TCP_LISTEN) unix_release_sock(skb->sk, 1); /* passed fds are erased in the kfree_skb hook */ UNIXCB(skb).consumed = skb->len; kfree_skb(skb); } if (path.dentry) path_put(&path); sock_put(sk); /* ---- Socket is dead now and most probably destroyed ---- */ /* * Fixme: BSD difference: In BSD all sockets connected to us get * ECONNRESET and we die on the spot. In Linux we behave * like files and pipes do and wait for the last * dereference. * * Can't we simply set sock->err? * * What the above comment does talk about? --ANK(980817) */ if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ } static void init_peercred(struct sock *sk) { const struct cred *old_cred; struct pid *old_pid; spin_lock(&sk->sk_peer_lock); old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); spin_unlock(&sk->sk_peer_lock); put_pid(old_pid); put_cred(old_cred); } static void copy_peercred(struct sock *sk, struct sock *peersk) { const struct cred *old_cred; struct pid *old_pid; if (sk < peersk) { spin_lock(&sk->sk_peer_lock); spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); } else { spin_lock(&peersk->sk_peer_lock); spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); } old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); spin_unlock(&peersk->sk_peer_lock); put_pid(old_pid); put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) { int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); struct pid *old_pid = NULL; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; if (!u->addr) goto out; /* No listens on an unbound socket */ unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; if (backlog > sk->sk_max_ack_backlog) wake_up_interruptible_all(&u->peer_wait); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ init_peercred(sk); err = 0; out_unlock: unix_state_unlock(sk); put_pid(old_pid); out: return err; } static int unix_release(struct socket *); static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int); static __poll_t unix_poll(struct file *, struct socket *, poll_table *); static __poll_t unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); #ifdef CONFIG_COMPAT static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); #endif static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, size_t size, int flags); static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, struct pipe_inode_info *, size_t size, unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); if (mutex_lock_interruptible(&u->iolock)) return -EINTR; sk->sk_peek_off = val; mutex_unlock(&u->iolock); return 0; } static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_stream_connect, .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, .poll = unix_poll, .ioctl = unix_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = unix_compat_ioctl, #endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, .set_peek_off = unix_set_peek_off, }; static const struct proto_ops unix_dgram_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_dgram_connect, .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = unix_compat_ioctl, #endif .listen = sock_no_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = unix_dgram_sendmsg, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, }; static const struct proto_ops unix_seqpacket_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_stream_connect, .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = unix_compat_ioctl, #endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, }; static struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), }; static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { struct sock *sk = NULL; struct unix_sock *u; atomic_long_inc(&unix_nr_socks); if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); if (!sk) goto out; sock_init_data(sock, sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); else { local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); } return sk; } static int unix_create(struct net *net, struct socket *sock, int protocol, int kern) { if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; sock->state = SS_UNCONNECTED; switch (sock->type) { case SOCK_STREAM: sock->ops = &unix_stream_ops; break; /* * Believe it or not BSD has AF_UNIX, SOCK_RAW though * nothing uses it. */ case SOCK_RAW: sock->type = SOCK_DGRAM; /* fall through */ case SOCK_DGRAM: sock->ops = &unix_dgram_ops; break; case SOCK_SEQPACKET: sock->ops = &unix_seqpacket_ops; break; default: return -ESOCKTNOSUPPORT; } return unix_create1(net, sock, kern) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; unix_release_sock(sk, 0); sock->sk = NULL; return 0; } static int unix_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); static u32 ordernum = 1; struct unix_address *addr; int err; unsigned int retries = 0; err = mutex_lock_interruptible(&u->bindlock); if (err) return err; err = 0; if (u->addr) goto out; err = -ENOMEM; addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); if (!addr) goto out; addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1); retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); /* * __unix_find_socket_byname() may take long time if many names * are already in use. */ cond_resched(); /* Give up if all names seems to be in use. */ if (retries++ == 0xFFFFF) { err = -ENOSPC; kfree(addr); goto out; } goto retry; } addr->hash ^= sk->sk_type; __unix_remove_socket(sk); smp_store_release(&u->addr, addr); __unix_insert_socket(&unix_socket_table[addr->hash], sk); spin_unlock(&unix_table_lock); err = 0; out: mutex_unlock(&u->bindlock); return err; } static struct sock *unix_find_other(struct net *net, struct sockaddr_un *sunname, int len, int type, unsigned int hash, int *error) { struct sock *u; struct path path; int err = 0; if (sunname->sun_path[0]) { struct inode *inode; err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; inode = d_backing_inode(path.dentry); err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; err = -ECONNREFUSED; if (!S_ISSOCK(inode->i_mode)) goto put_fail; u = unix_find_socket_byinode(inode); if (!u) goto put_fail; if (u->sk_type == type) touch_atime(&path); path_put(&path); err = -EPROTOTYPE; if (u->sk_type != type) { sock_put(u); goto fail; } } else { err = -ECONNREFUSED; u = unix_find_socket_byname(net, sunname, len, type, hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->path.dentry; if (dentry) touch_atime(&unix_sk(u)->path); } else goto fail; } return u; put_fail: path_put(&path); fail: *error = err; return NULL; } static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) { struct dentry *dentry; struct path path; int err = 0; /* * Get the parent directory, calculate the hash for last * component. */ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); err = PTR_ERR(dentry); if (IS_ERR(dentry)) return err; /* * All right, let's create it. */ err = security_path_mknod(&path, dentry, mode, 0); if (!err) { err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); if (!err) { res->mnt = mntget(path.mnt); res->dentry = dget(dentry); } } done_path_create(&path, dentry); return err; } static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; int err; unsigned int hash; struct unix_address *addr; struct hlist_head *list; struct path path = { }; err = -EINVAL; if (addr_len < offsetofend(struct sockaddr_un, sun_family) || sunaddr->sun_family != AF_UNIX) goto out; if (addr_len == sizeof(short)) { err = unix_autobind(sock); goto out; } err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) goto out; addr_len = err; if (sun_path[0]) { umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); err = unix_mknod(sun_path, mode, &path); if (err) { if (err == -EEXIST) err = -EADDRINUSE; goto out; } } err = mutex_lock_interruptible(&u->bindlock); if (err) goto out_put; err = -EINVAL; if (u->addr) goto out_up; err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) goto out_up; memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; addr->hash = hash ^ sk->sk_type; refcount_set(&addr->refcnt, 1); if (sun_path[0]) { addr->hash = UNIX_HASH_SIZE; hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); u->path = path; list = &unix_socket_table[hash]; } else { spin_lock(&unix_table_lock); err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { unix_release_addr(addr); goto out_unlock; } list = &unix_socket_table[addr->hash]; } err = 0; __unix_remove_socket(sk); smp_store_release(&u->addr, addr); __unix_insert_socket(list, sk); out_unlock: spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->bindlock); out_put: if (err) path_put(&path); out: return err; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) { if (unlikely(sk1 == sk2) || !sk2) { unix_state_lock(sk1); return; } if (sk1 < sk2) { unix_state_lock(sk1); unix_state_lock_nested(sk2); } else { unix_state_lock(sk2); unix_state_lock_nested(sk1); } } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) { if (unlikely(sk1 == sk2) || !sk2) { unix_state_unlock(sk1); return; } unix_state_unlock(sk1); unix_state_unlock(sk2); } static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; struct sock *other; unsigned int hash; int err; err = -EINVAL; if (alen < offsetofend(struct sockaddr, sa_family)) goto out; if (addr->sa_family != AF_UNSPEC) { err = unix_mkname(sunaddr, alen, &hash); if (err < 0) goto out; alen = err; if (test_bit(SOCK_PASSCRED, &sock->flags) && !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) goto out; restart: other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); if (!other) goto out; unix_state_double_lock(sk, other); /* Apparently VFS overslept socket death. Retry. */ if (sock_flag(other, SOCK_DEAD)) { unix_state_double_unlock(sk, other); sock_put(other); goto restart; } err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; err = security_unix_may_send(sk->sk_socket, other->sk_socket); if (err) goto out_unlock; } else { /* * 1003.1g breaking connected state with AF_UNSPEC */ other = NULL; unix_state_double_lock(sk, other); } /* * If it was connected, reconnect. */ if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); unix_state_double_unlock(sk, other); if (other != old_peer) unix_dgram_disconnected(sk, old_peer); sock_put(old_peer); } else { unix_peer(sk) = other; unix_state_double_unlock(sk, other); } return 0; out_unlock: unix_state_double_unlock(sk, other); sock_put(other); out: return err; } static long unix_wait_for_peer(struct sock *other, long timeo) { struct unix_sock *u = unix_sk(other); int sched; DEFINE_WAIT(wait); prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); sched = !sock_flag(other, SOCK_DEAD) && !(other->sk_shutdown & RCV_SHUTDOWN) && unix_recvq_full(other); unix_state_unlock(other); if (sched) timeo = schedule_timeout(timeo); finish_wait(&u->peer_wait, &wait); return timeo; } static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct sock *newsk = NULL; struct sock *other = NULL; struct sk_buff *skb = NULL; unsigned int hash; int st; int err; long timeo; err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) goto out; addr_len = err; if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && (err = unix_autobind(sock)) != 0) goto out; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); /* First of all allocate resources. If we will make it after state is locked, we will have to recheck all again in any case. */ err = -ENOMEM; /* create new sock for complete connection */ newsk = unix_create1(sock_net(sk), NULL, 0); if (newsk == NULL) goto out; /* Allocate skb for sending to listening sock */ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); if (skb == NULL) goto out; restart: /* Find listening sock. */ other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); if (!other) goto out; /* Latch state of peer */ unix_state_lock(other); /* Apparently VFS overslept socket death. Retry. */ if (sock_flag(other, SOCK_DEAD)) { unix_state_unlock(other); sock_put(other); goto restart; } err = -ECONNREFUSED; if (other->sk_state != TCP_LISTEN) goto out_unlock; if (other->sk_shutdown & RCV_SHUTDOWN) goto out_unlock; if (unix_recvq_full(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; timeo = unix_wait_for_peer(other, timeo); err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; sock_put(other); goto restart; } /* Latch our state. It is tricky place. We need to grab our state lock and cannot drop lock on peer. It is dangerous because deadlock is possible. Connect to self case and simultaneous attempt to connect are eliminated by checking socket state. other is TCP_LISTEN, if sk is TCP_LISTEN we check this before attempt to grab lock. Well, and we have to recheck the state after socket locked. */ st = sk->sk_state; switch (st) { case TCP_CLOSE: /* This is ok... continue with connect */ break; case TCP_ESTABLISHED: /* Socket is already connected */ err = -EISCONN; goto out_unlock; default: err = -EINVAL; goto out_unlock; } unix_state_lock_nested(sk); if (sk->sk_state != st) { unix_state_unlock(sk); unix_state_unlock(other); sock_put(other); goto restart; } err = security_unix_stream_connect(sk, other, newsk); if (err) { unix_state_unlock(sk); goto out_unlock; } /* The way is open! Fastly set all the necessary fields... */ sock_hold(sk); unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); /* copy address information from listening to new sock * * The contents of *(otheru->addr) and otheru->path * are seen fully set up here, since we have found * otheru in hash under unix_table_lock. Insertion * into the hash chain we'd found it in had been done * in an earlier critical area protected by unix_table_lock, * the same one where we'd set *(otheru->addr) contents, * as well as otheru->path and otheru->addr itself. * * Using smp_store_release() here to set newu->addr * is enough to make those stores, as well as stores * to newu->path visible to anyone who gets newu->addr * by smp_load_acquire(). IOW, the same warranties * as for unix_sock instances bound in unix_bind() or * in unix_autobind(). */ if (otheru->path.dentry) { path_get(&otheru->path); newu->path = otheru->path; } refcount_inc(&otheru->addr->refcnt); smp_store_release(&newu->addr, otheru->addr); /* Set credentials */ copy_peercred(sk, other); sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; sock_hold(newsk); smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ unix_peer(sk) = newsk; unix_state_unlock(sk); /* take ten and and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); return 0; out_unlock: if (other) unix_state_unlock(other); out: kfree_skb(skb); if (newsk) unix_release_sock(newsk, 0); if (other) sock_put(other); return err; } static int unix_socketpair(struct socket *socka, struct socket *sockb) { struct sock *ska = socka->sk, *skb = sockb->sk; /* Join our sockets back to back */ sock_hold(ska); sock_hold(skb); unix_peer(ska) = skb; unix_peer(skb) = ska; init_peercred(ska); init_peercred(skb); if (ska->sk_type != SOCK_DGRAM) { ska->sk_state = TCP_ESTABLISHED; skb->sk_state = TCP_ESTABLISHED; socka->state = SS_CONNECTED; sockb->state = SS_CONNECTED; } return 0; } static void unix_sock_inherit_flags(const struct socket *old, struct socket *new) { if (test_bit(SOCK_PASSCRED, &old->flags)) set_bit(SOCK_PASSCRED, &new->flags); if (test_bit(SOCK_PASSSEC, &old->flags)) set_bit(SOCK_PASSSEC, &new->flags); } static int unix_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { struct sock *sk = sock->sk; struct sock *tsk; struct sk_buff *skb; int err; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; err = -EINVAL; if (sk->sk_state != TCP_LISTEN) goto out; /* If socket state is TCP_LISTEN it cannot change (for now...), * so that no locks are necessary. */ skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); if (!skb) { /* This means receive shutdown. */ if (err == 0) err = -EINVAL; goto out; } tsk = skb->sk; skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); /* attach accepted sock to socket */ unix_state_lock(tsk); newsock->state = SS_CONNECTED; unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; out: return err; } static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct unix_address *addr; DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; if (peer) { sk = unix_peer_get(sk); err = -ENOTCONN; if (!sk) goto out; err = 0; } else { sock_hold(sk); } addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; err = sizeof(short); } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); } sock_put(sk); out: return err; } static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); /* * Garbage collection of unix sockets starts by selecting a set of * candidate sockets which have reference only from being in flight * (total_refs == inflight_refs). This condition is checked once during * the candidate collection phase, and candidates are marked as such, so * that non-candidates can later be ignored. While inflight_refs is * protected by unix_gc_lock, total_refs (file count) is not, hence this * is an instantaneous decision. * * Once a candidate, however, the socket must not be reinstalled into a * file descriptor while the garbage collection is in progress. * * If the above conditions are met, then the directed graph of * candidates (*) does not change while unix_gc_lock is held. * * Any operations that changes the file count through file descriptors * (dup, close, sendmsg) does not change the graph since candidates are * not installed in fds. * * Dequeing a candidate via recvmsg would install it into an fd, but * that takes unix_gc_lock to decrement the inflight count, so it's * serialized with garbage collection. * * MSG_PEEK is special in that it does not change the inflight count, * yet does install the socket into an fd. The following lock/unlock * pair is to ensure serialization with garbage collection. It must be * done between incrementing the file count and installing the file into * an fd. * * If garbage collection starts after the barrier provided by the * lock/unlock, then it will see the elevated refcount and not mark this * as a candidate. If a garbage collection is already in progress * before the file count was incremented, then the lock/unlock pair will * ensure that garbage collection is finished before progressing to * installing the fd. * * (*) A -> B where B is on the queue of A or B is on the queue of C * which is on the queue of listening socket A. */ spin_lock(&unix_gc_lock); spin_unlock(&unix_gc_lock); } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; UNIXCB(skb).pid = get_pid(scm->pid); UNIXCB(skb).uid = scm->creds.uid; UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; unix_get_secdata(scm, skb); if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); skb->destructor = unix_destruct_scm; return err; } static bool unix_passcred_enabled(const struct socket *sock, const struct sock *other) { return test_bit(SOCK_PASSCRED, &sock->flags) || !other->sk_socket || test_bit(SOCK_PASSCRED, &other->sk_socket->flags); } /* * Some apps rely on write() giving SCM_CREDENTIALS * We include credentials if source or destination socket * asserted SOCK_PASSCRED. */ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, const struct sock *other) { if (UNIXCB(skb).pid) return; if (unix_passcred_enabled(sock, other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } static int maybe_init_creds(struct scm_cookie *scm, struct socket *socket, const struct sock *other) { int err; struct msghdr msg = { .msg_controllen = 0 }; err = scm_send(socket, &msg, scm, false); if (err) return err; if (unix_passcred_enabled(socket, other)) { scm->pid = get_pid(task_tgid(current)); current_uid_gid(&scm->creds.uid, &scm->creds.gid); } return err; } static bool unix_skb_scm_eq(struct sk_buff *skb, struct scm_cookie *scm) { const struct unix_skb_parms *u = &UNIXCB(skb); return u->pid == scm->pid && uid_eq(u->uid, scm->creds.uid) && gid_eq(u->gid, scm->creds.gid) && unix_secdata_eq(scm, skb); } /* * Send AF_UNIX data. */ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *other = NULL; int namelen = 0; /* fake GCC */ int err; unsigned int hash; struct sk_buff *skb; long timeo; struct scm_cookie scm; int data_len = 0; int sk_locked; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto out; if (msg->msg_namelen) { err = unix_mkname(sunaddr, msg->msg_namelen, &hash); if (err < 0) goto out; namelen = err; } else { sunaddr = NULL; err = -ENOTCONN; other = unix_peer_get(sk); if (!other) goto out; } if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && (err = unix_autobind(sock)) != 0) goto out; err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; if (len > SKB_MAX_ALLOC) { data_len = min_t(size_t, len - SKB_MAX_ALLOC, MAX_SKB_FRAGS * PAGE_SIZE); data_len = PAGE_ALIGN(data_len); BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); } skb = sock_alloc_send_pskb(sk, len - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, PAGE_ALLOC_COSTLY_ORDER); if (skb == NULL) goto out; err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; skb_put(skb, len - data_len); skb->data_len = data_len; skb->len = len; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); if (err) goto out_free; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); restart: if (!other) { err = -ECONNRESET; if (sunaddr == NULL) goto out_free; other = unix_find_other(net, sunaddr, namelen, sk->sk_type, hash, &err); if (other == NULL) goto out_free; } if (sk_filter(other, skb) < 0) { /* Toss the packet but do not return any error to the sender */ err = len; goto out_free; } sk_locked = 0; unix_state_lock(other); restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error */ unix_state_unlock(other); sock_put(other); if (!sk_locked) unix_state_lock(sk); err = 0; if (unix_peer(sk) == other) { unix_peer(sk) = NULL; unix_dgram_peer_wake_disconnect_wakeup(sk, other); unix_state_unlock(sk); unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; } else { unix_state_unlock(sk); } other = NULL; if (err) goto out_free; goto restart; } err = -EPIPE; if (other->sk_shutdown & RCV_SHUTDOWN) goto out_unlock; if (sk->sk_type != SOCK_SEQPACKET) { err = security_unix_may_send(sk->sk_socket, other->sk_socket); if (err) goto out_unlock; } /* other == sk && unix_peer(other) != sk if * - unix_peer(sk) == NULL, destination address bound to sk * - unix_peer(sk) == sk by time of get but disconnected before lock */ if (other != sk && unlikely(unix_peer(other) != sk && unix_recvq_full_lockless(other))) { if (timeo) { timeo = unix_wait_for_peer(other, timeo); err = sock_intr_errno(timeo); if (signal_pending(current)) goto out_free; goto restart; } if (!sk_locked) { unix_state_unlock(other); unix_state_double_lock(sk, other); } if (unix_peer(sk) != other || unix_dgram_peer_wake_me(sk, other)) { err = -EAGAIN; sk_locked = 1; goto out_unlock; } if (!sk_locked) { sk_locked = 1; goto restart_locked; } } if (unlikely(sk_locked)) unix_state_unlock(sk); if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); scm_destroy(&scm); return len; out_unlock: if (sk_locked) unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); out: if (other) sock_put(other); scm_destroy(&scm); return err; } /* We use paged skbs for stream sockets, and limit occupancy to 32768 * bytes, and a minimum of a full page. */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sock *other = NULL; int err, size; struct sk_buff *skb; int sent = 0; struct scm_cookie scm; bool fds_sent = false; int data_len; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto out_err; if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { err = -ENOTCONN; other = unix_peer(sk); if (!other) goto out_err; } if (sk->sk_shutdown & SEND_SHUTDOWN) goto pipe_err; while (sent < len) { size = len - sent; /* Keep two messages in the pipe so it schedules better */ size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); /* allow fallback to order-0 allocations */ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); skb = sock_alloc_send_pskb(sk, size - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, get_order(UNIX_SKB_FRAGS_SZ)); if (!skb) goto out_err; /* Only send the fds in the first buffer */ err = unix_scm_to_skb(&scm, skb, !fds_sent); if (err < 0) { kfree_skb(skb); goto out_err; } fds_sent = true; skb_put(skb, size - data_len); skb->data_len = data_len; skb->len = size; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); if (err) { kfree_skb(skb); goto out_err; } unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other); sent += size; } scm_destroy(&scm); return sent; pipe_err_free: unix_state_unlock(other); kfree_skb(skb); pipe_err: if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: scm_destroy(&scm); return sent ? : err; } static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, int offset, size_t size, int flags) { int err; bool send_sigpipe = false; bool init_scm = true; struct scm_cookie scm; struct sock *other, *sk = socket->sk; struct sk_buff *skb, *newskb = NULL, *tail = NULL; if (flags & MSG_OOB) return -EOPNOTSUPP; other = unix_peer(sk); if (!other || sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; if (false) { alloc_skb: unix_state_unlock(other); mutex_unlock(&unix_sk(other)->iolock); newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, &err, 0); if (!newskb) goto err; } /* we must acquire iolock as we modify already present * skbs in the sk_receive_queue and mess with skb->len */ err = mutex_lock_interruptible(&unix_sk(other)->iolock); if (err) { err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; goto err; } if (sk->sk_shutdown & SEND_SHUTDOWN) { err = -EPIPE; send_sigpipe = true; goto err_unlock; } unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || other->sk_shutdown & RCV_SHUTDOWN) { err = -EPIPE; send_sigpipe = true; goto err_state_unlock; } if (init_scm) { err = maybe_init_creds(&scm, socket, other); if (err) goto err_state_unlock; init_scm = false; } skb = skb_peek_tail(&other->sk_receive_queue); if (tail && tail == skb) { skb = newskb; } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { if (newskb) { skb = newskb; } else { tail = skb; goto alloc_skb; } } else if (newskb) { /* this is fast path, we don't necessarily need to * call to kfree_skb even though with newskb == NULL * this - does no harm */ consume_skb(newskb); newskb = NULL; } if (skb_append_pagefrags(skb, page, offset, size)) { tail = skb; goto alloc_skb; } skb->len += size; skb->data_len += size; skb->truesize += size; refcount_add(size, &sk->sk_wmem_alloc); if (newskb) { err = unix_scm_to_skb(&scm, skb, false); if (err) goto err_state_unlock; spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, newskb); spin_unlock(&other->sk_receive_queue.lock); } unix_state_unlock(other); mutex_unlock(&unix_sk(other)->iolock); other->sk_data_ready(other); scm_destroy(&scm); return size; err_state_unlock: unix_state_unlock(other); err_unlock: mutex_unlock(&unix_sk(other)->iolock); err: kfree_skb(newskb); if (send_sigpipe && !(flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); if (!init_scm) scm_destroy(&scm); return err; } static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { int err; struct sock *sk = sock->sk; err = sock_error(sk); if (err) return err; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; if (msg->msg_namelen) msg->msg_namelen = 0; return unix_dgram_sendmsg(sock, msg, len); } static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; return unix_dgram_recvmsg(sock, msg, size, flags); } static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (addr) { msg->msg_namelen = addr->len; memcpy(msg->msg_name, addr->name, addr->len); } } static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb, *last; long timeo; int err; int peeked, skip; err = -EOPNOTSUPP; if (flags&MSG_OOB) goto out; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip, &err, &last); if (skb) break; mutex_unlock(&u->iolock); if (err != -EAGAIN) break; } while (timeo && !__skb_wait_for_more_packets(sk, &err, &timeo, last)); if (!skb) { /* implies iolock unlocked */ unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN)) err = 0; unix_state_unlock(sk); goto out; } if (wq_has_sleeper(&u->peer_wait)) wake_up_interruptible_sync_poll(&u->peer_wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); if (msg->msg_name) unix_copy_addr(msg, skb->sk); if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) msg->msg_flags |= MSG_TRUNC; err = skb_copy_datagram_msg(skb, skip, msg, size); if (err) goto out_free; if (sock_flag(sk, SOCK_RCVTSTAMP)) __sock_recv_timestamp(msg, sk, skb); memset(&scm, 0, sizeof(scm)); scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); sk_peek_offset_bwd(sk, skb->len); } else { /* It is questionable: on PEEK we could: - do not return fds - good, but too simple 8) - return fds, and do not return them on read (old strategy, apparently wrong) - clone fds (I chose it for now, it is the most universal solution) POSIX 1003.1g does not actually define this clearly at all. POSIX 1003.1g doesn't define a lot of things clearly however! */ sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; scm_recv(sock, msg, &scm, flags); out_free: skb_free_datagram(sk, skb); mutex_unlock(&u->iolock); out: return err; } /* * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, struct sk_buff *last, unsigned int last_len, bool freezable) { struct sk_buff *tail; DEFINE_WAIT(wait); unix_state_lock(sk); for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); tail = skb_peek_tail(&sk->sk_receive_queue); if (tail != last || (tail && tail->len != last_len) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || !timeo) break; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); if (freezable) timeo = freezable_schedule_timeout(timeo); else timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) break; sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } finish_wait(sk_sleep(sk), &wait); unix_state_unlock(sk); return timeo; } static unsigned int unix_skb_len(const struct sk_buff *skb) { return skb->len - UNIXCB(skb).consumed; } struct unix_stream_read_state { int (*recv_actor)(struct sk_buff *, int, int, struct unix_stream_read_state *); struct socket *socket; struct msghdr *msg; struct pipe_inode_info *pipe; size_t size; int flags; unsigned int splice_flags; }; static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int copied = 0; int flags = state->flags; int noblock = flags & MSG_DONTWAIT; bool check_creds = false; int target; int err = 0; long timeo; int skip; size_t size = state->size; unsigned int last_len; if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { err = -EINVAL; goto out; } if (unlikely(flags & MSG_OOB)) { err = -EOPNOTSUPP; goto out; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); memset(&scm, 0, sizeof(scm)); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ mutex_lock(&u->iolock); skip = max(sk_peek_offset(sk, flags), 0); do { int chunk; bool drop_skb; struct sk_buff *skb, *last; redo: unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) { err = -ECONNRESET; goto unlock; } last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; again: if (skb == NULL) { if (copied >= target) goto unlock; /* * POSIX 1003.1g mandates this order. */ err = sock_error(sk); if (err) goto unlock; if (sk->sk_shutdown & RCV_SHUTDOWN) goto unlock; unix_state_unlock(sk); if (!timeo) { err = -EAGAIN; break; } mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); scm_destroy(&scm); goto out; } mutex_lock(&u->iolock); goto redo; unlock: unix_state_unlock(sk); break; } while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; last_len = skb->len; skb = skb_peek_next(skb, &sk->sk_receive_queue); if (!skb) goto again; } unix_state_unlock(sk); if (check_creds) { /* Never glue messages from different writers */ if (!unix_skb_scm_eq(skb, &scm)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); check_creds = true; } /* Copy address just once */ if (state->msg && state->msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); sunaddr = NULL; } chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); skb_get(skb); chunk = state->recv_actor(skb, skip, chunk, state); drop_skb = !unix_skb_len(skb); /* skb is only safe to use if !drop_skb */ consume_skb(skb); if (chunk < 0) { if (copied == 0) copied = -EFAULT; break; } copied += chunk; size -= chunk; if (drop_skb) { /* the skb was touched by a concurrent reader; * we should not expect anything from this skb * anymore and assume it invalid - we can be * sure it was dropped from the socket queue * * let's report a short read */ err = 0; break; } /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { UNIXCB(skb).consumed += chunk; sk_peek_offset_bwd(sk, chunk); if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); if (unix_skb_len(skb)) break; skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); if (scm.fp) break; } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); if (UNIXCB(skb).fp) break; skip = 0; last = skb; last_len = skb->len; unix_state_lock(sk); skb = skb_peek_next(skb, &sk->sk_receive_queue); if (skb) goto again; unix_state_unlock(sk); break; } } while (size); mutex_unlock(&u->iolock); if (state->msg) scm_recv(sock, state->msg, &scm, flags); else scm_destroy(&scm); out: return copied ? : err; } static int unix_stream_read_actor(struct sk_buff *skb, int skip, int chunk, struct unix_stream_read_state *state) { int ret; ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, state->msg, chunk); return ret ?: chunk; } static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct unix_stream_read_state state = { .recv_actor = unix_stream_read_actor, .socket = sock, .msg = msg, .size = size, .flags = flags }; return unix_stream_read_generic(&state, true); } static int unix_stream_splice_actor(struct sk_buff *skb, int skip, int chunk, struct unix_stream_read_state *state) { return skb_splice_bits(skb, state->socket->sk, UNIXCB(skb).consumed + skip, state->pipe, chunk, state->splice_flags); } static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t size, unsigned int flags) { struct unix_stream_read_state state = { .recv_actor = unix_stream_splice_actor, .socket = sock, .pipe = pipe, .size = size, .splice_flags = flags, }; if (unlikely(*ppos)) return -ESPIPE; if (sock->file->f_flags & O_NONBLOCK || flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode) { struct sock *sk = sock->sk; struct sock *other; if (mode < SHUT_RD || mode > SHUT_RDWR) return -EINVAL; /* This maps: * SHUT_RD (0) -> RCV_SHUTDOWN (1) * SHUT_WR (1) -> SEND_SHUTDOWN (2) * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) */ ++mode; unix_state_lock(sk); sk->sk_shutdown |= mode; other = unix_peer(sk); if (other) sock_hold(other); unix_state_unlock(sk); sk->sk_state_change(sk); if (other && (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { int peer_mode = 0; if (mode&RCV_SHUTDOWN) peer_mode |= SEND_SHUTDOWN; if (mode&SEND_SHUTDOWN) peer_mode |= RCV_SHUTDOWN; unix_state_lock(other); other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); } if (other) sock_put(other); return 0; } long unix_inq_len(struct sock *sk) { struct sk_buff *skb; long amount = 0; if (sk->sk_state == TCP_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { skb_queue_walk(&sk->sk_receive_queue, skb) amount += unix_skb_len(skb); } else { skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len; } spin_unlock(&sk->sk_receive_queue.lock); return amount; } EXPORT_SYMBOL_GPL(unix_inq_len); long unix_outq_len(struct sock *sk) { return sk_wmem_alloc_get(sk); } EXPORT_SYMBOL_GPL(unix_outq_len); static int unix_open_file(struct sock *sk) { struct path path; struct file *f; int fd; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (!smp_load_acquire(&unix_sk(sk)->addr)) return -ENOENT; path = unix_sk(sk)->path; if (!path.dentry) return -ENOENT; path_get(&path); fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) goto out; f = dentry_open(&path, O_PATH, current_cred()); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); goto out; } fd_install(fd, f); out: path_put(&path); return fd; } static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; long amount = 0; int err; switch (cmd) { case SIOCOUTQ: amount = unix_outq_len(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: amount = unix_inq_len(sk); if (amount < 0) err = amount; else err = put_user(amount, (int __user *)arg); break; case SIOCUNIXFILE: err = unix_open_file(sk); break; default: err = -ENOIOCTLCMD; break; } return err; } #ifdef CONFIG_COMPAT static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } #endif static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask; sock_poll_wait(file, sock, wait); mask = 0; /* exceptional events? */ if (sk->sk_err) mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE) mask |= EPOLLHUP; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ if (unix_writable(sk)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; } static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk, *other; unsigned int writable; __poll_t mask; sock_poll_wait(file, sock, wait); mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (sk->sk_type == SOCK_SEQPACKET) { if (sk->sk_state == TCP_CLOSE) mask |= EPOLLHUP; /* connection hasn't started yet? */ if (sk->sk_state == TCP_SYN_SENT) return mask; } /* No write status requested, avoid expensive OUT tests. */ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; writable = unix_writable(sk); if (writable) { unix_state_lock(sk); other = unix_peer(sk); if (other && unix_peer(other) != sk && unix_recvq_full_lockless(other) && unix_dgram_peer_wake_me(sk, other)) writable = 0; unix_state_unlock(sk); } if (writable) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } #ifdef CONFIG_PROC_FS #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) #define get_bucket(x) ((x) >> BUCKET_SPACE) #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { unsigned long offset = get_offset(*pos); unsigned long bucket = get_bucket(*pos); struct sock *sk; unsigned long count = 0; for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { if (sock_net(sk) != seq_file_net(seq)) continue; if (++count == offset) break; } return sk; } static struct sock *unix_next_socket(struct seq_file *seq, struct sock *sk, loff_t *pos) { unsigned long bucket; while (sk > (struct sock *)SEQ_START_TOKEN) { sk = sk_next(sk); if (!sk) goto next_bucket; if (sock_net(sk) == seq_file_net(seq)) return sk; } do { sk = unix_from_bucket(seq, pos); if (sk) return sk; next_bucket: bucket = get_bucket(*pos) + 1; *pos = set_bucket_offset(bucket, 1); } while (bucket < ARRAY_SIZE(unix_socket_table)); return NULL; } static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); if (!*pos) return SEQ_START_TOKEN; if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) return NULL; return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) __releases(unix_table_lock) { spin_unlock(&unix_table_lock); } static int unix_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "Num RefCount Protocol Flags Type St " "Inode Path\n"); else { struct sock *s = v; struct unix_sock *u = unix_sk(s); unix_state_lock(s); seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", s, refcount_read(&s->sk_refcnt), 0, s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, s->sk_type, s->sk_socket ? (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); if (u->addr) { // under unix_table_lock here int i, len; seq_putc(seq, ' '); i = 0; len = u->addr->len - sizeof(short); if (!UNIX_ABSTRACT(s)) len--; else { seq_putc(seq, '@'); i++; } for ( ; i < len; i++) seq_putc(seq, u->addr->name->sun_path[i] ?: '@'); } unix_state_unlock(s); seq_putc(seq, '\n'); } return 0; } static const struct seq_operations unix_seq_ops = { .start = unix_seq_start, .next = unix_seq_next, .stop = unix_seq_stop, .show = unix_seq_show, }; #endif static const struct net_proto_family unix_family_ops = { .family = PF_UNIX, .create = unix_create, .owner = THIS_MODULE, }; static int __net_init unix_net_init(struct net *net) { int error = -ENOMEM; net->unx.sysctl_max_dgram_qlen = 10; if (unix_sysctl_register(net)) goto out; #ifdef CONFIG_PROC_FS if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, sizeof(struct seq_net_private))) { unix_sysctl_unregister(net); goto out; } #endif error = 0; out: return error; } static void __net_exit unix_net_exit(struct net *net) { unix_sysctl_unregister(net); remove_proc_entry("unix", net->proc_net); } static struct pernet_operations unix_net_ops = { .init = unix_net_init, .exit = unix_net_exit, }; static int __init af_unix_init(void) { int rc = -1; BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); rc = proto_register(&unix_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); goto out; } sock_register(&unix_family_ops); register_pernet_subsys(&unix_net_ops); out: return rc; } static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); proto_unregister(&unix_proto); unregister_pernet_subsys(&unix_net_ops); } /* Earlier than device_initcall() so that other drivers invoking request_module() don't end up in a loop when modprobe tries to use a UNIX socket. But later than subsys_initcall() because we depend on stuff initialised there */ fs_initcall(af_unix_init); module_exit(af_unix_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_UNIX);
554 554 554 435 534 139 139 139 139 139 139 535 535 442 535 492 492 168 169 169 109 169 169 534 492 492 169 169 169 58 169 169 169 169 169 2 169 25 25 554 553 553 169 553 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/readpage.c * * Copyright (C) 2002, Linus Torvalds. * Copyright (C) 2015, Google, Inc. * * This was originally taken from fs/mpage.c * * The intent is the ext4_mpage_readpages() function here is intended * to replace mpage_readpages() in the general case, not just for * encrypted files. It has some limitations (see below), where it * will fall back to read_block_full_page(), but these limitations * should only be hit when page_size != block_size. * * This will allow us to attach a callback function to support ext4 * encryption. * * If anything unusual happens, such as: * * - encountering a page which has buffers * - encountering a page which has a non-hole after a hole * - encountering a page with non-contiguous blocks * * then this code just gives up and calls the buffer_head-based read function. * It does handle a page which has holes at the end - that is a common case: * the end-of-file on blocksize < PAGE_SIZE setups. * */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/kdev_t.h> #include <linux/gfp.h> #include <linux/bio.h> #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/highmem.h> #include <linux/prefetch.h> #include <linux/mpage.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> #include <linux/cleancache.h> #include "ext4.h" static inline bool ext4_bio_encrypted(struct bio *bio) { #ifdef CONFIG_EXT4_FS_ENCRYPTION return unlikely(bio->bi_private != NULL); #else return false; #endif } /* * I/O completion handler for multipage BIOs. * * The mpage code never puts partial pages into a BIO (except for end-of-file). * If a page does not map to a contiguous run of blocks then it simply falls * back to block_read_full_page(). * * Why is this? If a page's completion depends on a number of different BIOs * which can complete in any order (or at the same time) then determining the * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ static void mpage_end_io(struct bio *bio) { struct bio_vec *bv; int i; if (ext4_bio_encrypted(bio)) { if (bio->bi_status) { fscrypt_release_ctx(bio->bi_private); } else { fscrypt_enqueue_decrypt_bio(bio->bi_private, bio); return; } } bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; if (!bio->bi_status) { SetPageUptodate(page); } else { ClearPageUptodate(page); SetPageError(page); } unlock_page(page); } bio_put(bio); } int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t blocks[MAX_BUF_PER_PAGE]; unsigned page_block; struct block_device *bdev = inode->i_sb->s_bdev; int length; unsigned relative_block = 0; struct ext4_map_blocks map; map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; map.m_flags = 0; for (; nr_pages; nr_pages--) { int fully_mapped = 1; unsigned first_hole = blocks_per_page; prefetchw(&page->flags); if (pages) { page = list_entry(pages->prev, struct page, lru); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, readahead_gfp_mask(mapping))) goto next_page; } if (page_has_buffers(page)) goto confused; block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); last_block = block_in_file + nr_pages * blocks_per_page; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; /* * Map blocks using the previous result first. */ if ((map.m_flags & EXT4_MAP_MAPPED) && block_in_file > map.m_lblk && block_in_file < (map.m_lblk + map.m_len)) { unsigned map_offset = block_in_file - map.m_lblk; unsigned last = map.m_len - map_offset; for (relative_block = 0; ; relative_block++) { if (relative_block == last) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; } if (page_block == blocks_per_page) break; blocks[page_block] = map.m_pblk + map_offset + relative_block; page_block++; block_in_file++; } } /* * Then do more ext4_map_blocks() calls until we are * done with this page. */ while (page_block < blocks_per_page) { if (block_in_file < last_block) { map.m_lblk = block_in_file; map.m_len = last_block - block_in_file; if (ext4_map_blocks(NULL, inode, &map, 0) < 0) { set_error_page: SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); unlock_page(page); goto next_page; } } if ((map.m_flags & EXT4_MAP_MAPPED) == 0) { fully_mapped = 0; if (first_hole == blocks_per_page) first_hole = page_block; page_block++; block_in_file++; continue; } if (first_hole != blocks_per_page) goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ if (page_block && blocks[page_block-1] != map.m_pblk-1) goto confused; for (relative_block = 0; ; relative_block++) { if (relative_block == map.m_len) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; } else if (page_block == blocks_per_page) break; blocks[page_block] = map.m_pblk+relative_block; page_block++; block_in_file++; } } if (first_hole != blocks_per_page) { zero_user_segment(page, first_hole << blkbits, PAGE_SIZE); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); goto next_page; } } else if (fully_mapped) { SetPageMappedToDisk(page); } if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && cleancache_get_page(page) == 0) { SetPageUptodate(page); goto confused; } /* * This page will go to BIO. Do we need to send this * BIO off first? */ if (bio && (last_block_in_bio != blocks[0] - 1)) { submit_and_realloc: submit_bio(bio); bio = NULL; } if (bio == NULL) { struct fscrypt_ctx *ctx = NULL; if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) goto set_error_page; } bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); if (!bio) { if (ctx) fscrypt_release_ctx(ctx); goto set_error_page; } bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; bio_set_op_attrs(bio, REQ_OP_READ, is_readahead ? REQ_RAHEAD : 0); } length = first_hole << blkbits; if (bio_add_page(bio, page, length, 0) < length) goto submit_and_realloc; if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_page)) { submit_bio(bio); bio = NULL; } else last_block_in_bio = blocks[blocks_per_page - 1]; goto next_page; confused: if (bio) { submit_bio(bio); bio = NULL; } if (!PageUptodate(page)) block_read_full_page(page, ext4_get_block); else unlock_page(page); next_page: if (pages) put_page(page); } BUG_ON(pages && !list_empty(pages)); if (bio) submit_bio(bio); return 0; }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 /* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ /* * Written by Anatoly P. Pinchuk pap@namesys.botik.ru * Programm System Institute * Pereslavl-Zalessky Russia */ #include <linux/time.h> #include <linux/string.h> #include <linux/pagemap.h> #include <linux/bio.h> #include "reiserfs.h" #include <linux/buffer_head.h> #include <linux/quotaops.h> /* Does the buffer contain a disk block which is in the tree. */ inline int B_IS_IN_TREE(const struct buffer_head *bh) { RFALSE(B_LEVEL(bh) > MAX_HEIGHT, "PAP-1010: block (%b) has too big level (%z)", bh, bh); return (B_LEVEL(bh) != FREE_LEVEL); } /* to get item head in le form */ inline void copy_item_head(struct item_head *to, const struct item_head *from) { memcpy(to, from, IH_SIZE); } /* * k1 is pointer to on-disk structure which is stored in little-endian * form. k2 is pointer to cpu variable. For key of items of the same * object this returns 0. * Returns: -1 if key1 < key2 * 0 if key1 == key2 * 1 if key1 > key2 */ inline int comp_short_keys(const struct reiserfs_key *le_key, const struct cpu_key *cpu_key) { __u32 n; n = le32_to_cpu(le_key->k_dir_id); if (n < cpu_key->on_disk_key.k_dir_id) return -1; if (n > cpu_key->on_disk_key.k_dir_id) return 1; n = le32_to_cpu(le_key->k_objectid); if (n < cpu_key->on_disk_key.k_objectid) return -1; if (n > cpu_key->on_disk_key.k_objectid) return 1; return 0; } /* * k1 is pointer to on-disk structure which is stored in little-endian * form. k2 is pointer to cpu variable. * Compare keys using all 4 key fields. * Returns: -1 if key1 < key2 0 * if key1 = key2 1 if key1 > key2 */ static inline int comp_keys(const struct reiserfs_key *le_key, const struct cpu_key *cpu_key) { int retval; retval = comp_short_keys(le_key, cpu_key); if (retval) return retval; if (le_key_k_offset(le_key_version(le_key), le_key) < cpu_key_k_offset(cpu_key)) return -1; if (le_key_k_offset(le_key_version(le_key), le_key) > cpu_key_k_offset(cpu_key)) return 1; if (cpu_key->key_length == 3) return 0; /* this part is needed only when tail conversion is in progress */ if (le_key_k_type(le_key_version(le_key), le_key) < cpu_key_k_type(cpu_key)) return -1; if (le_key_k_type(le_key_version(le_key), le_key) > cpu_key_k_type(cpu_key)) return 1; return 0; } inline int comp_short_le_keys(const struct reiserfs_key *key1, const struct reiserfs_key *key2) { __u32 *k1_u32, *k2_u32; int key_length = REISERFS_SHORT_KEY_LEN; k1_u32 = (__u32 *) key1; k2_u32 = (__u32 *) key2; for (; key_length--; ++k1_u32, ++k2_u32) { if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32)) return -1; if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32)) return 1; } return 0; } inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from) { int version; to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); /* find out version of the key */ version = le_key_version(from); to->version = version; to->on_disk_key.k_offset = le_key_k_offset(version, from); to->on_disk_key.k_type = le_key_k_type(version, from); } /* * this does not say which one is bigger, it only returns 1 if keys * are not equal, 0 otherwise */ inline int comp_le_keys(const struct reiserfs_key *k1, const struct reiserfs_key *k2) { return memcmp(k1, k2, sizeof(struct reiserfs_key)); } /************************************************************************** * Binary search toolkit function * * Search for an item in the array by the item key * * Returns: 1 if found, 0 if not found; * * *pos = number of the searched element if found, else the * * number of the first element that is larger than key. * **************************************************************************/ /* * For those not familiar with binary search: lbound is the leftmost item * that it could be, rbound the rightmost item that it could be. We examine * the item halfway between lbound and rbound, and that tells us either * that we can increase lbound, or decrease rbound, or that we have found it, * or if lbound <= rbound that there are no possible items, and we have not * found it. With each examination we cut the number of possible items it * could be by one more than half rounded down, or we find it. */ static inline int bin_search(const void *key, /* Key to search for. */ const void *base, /* First item in the array. */ int num, /* Number of items in the array. */ /* * Item size in the array. searched. Lest the * reader be confused, note that this is crafted * as a general function, and when it is applied * specifically to the array of item headers in a * node, width is actually the item header size * not the item size. */ int width, int *pos /* Number of the searched for element. */ ) { int rbound, lbound, j; for (j = ((rbound = num - 1) + (lbound = 0)) / 2; lbound <= rbound; j = (rbound + lbound) / 2) switch (comp_keys ((struct reiserfs_key *)((char *)base + j * width), (struct cpu_key *)key)) { case -1: lbound = j + 1; continue; case 1: rbound = j - 1; continue; case 0: *pos = j; return ITEM_FOUND; /* Key found in the array. */ } /* * bin_search did not find given key, it returns position of key, * that is minimal and greater than the given one. */ *pos = lbound; return ITEM_NOT_FOUND; } /* Minimal possible key. It is never in the tree. */ const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; /* Maximal possible key. It is never in the tree. */ static const struct reiserfs_key MAX_KEY = { cpu_to_le32(0xffffffff), cpu_to_le32(0xffffffff), {{cpu_to_le32(0xffffffff), cpu_to_le32(0xffffffff)},} }; /* * Get delimiting key of the buffer by looking for it in the buffers in the * path, starting from the bottom of the path, and going upwards. We must * check the path's validity at each step. If the key is not in the path, * there is no delimiting key in the tree (buffer is first or last buffer * in tree), and in this case we return a special key, either MIN_KEY or * MAX_KEY. */ static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, const struct super_block *sb) { int position, path_offset = chk_path->path_length; struct buffer_head *parent; RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, "PAP-5010: invalid offset in the path"); /* While not higher in path than first element. */ while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { RFALSE(!buffer_uptodate (PATH_OFFSET_PBUFFER(chk_path, path_offset)), "PAP-5020: parent is not uptodate"); /* Parent at the path is not in the tree now. */ if (!B_IS_IN_TREE (parent = PATH_OFFSET_PBUFFER(chk_path, path_offset))) return &MAX_KEY; /* Check whether position in the parent is correct. */ if ((position = PATH_OFFSET_POSITION(chk_path, path_offset)) > B_NR_ITEMS(parent)) return &MAX_KEY; /* Check whether parent at the path really points to the child. */ if (B_N_CHILD_NUM(parent, position) != PATH_OFFSET_PBUFFER(chk_path, path_offset + 1)->b_blocknr) return &MAX_KEY; /* * Return delimiting key if position in the parent * is not equal to zero. */ if (position) return internal_key(parent, position - 1); } /* Return MIN_KEY if we are in the root of the buffer tree. */ if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> b_blocknr == SB_ROOT_BLOCK(sb)) return &MIN_KEY; return &MAX_KEY; } /* Get delimiting key of the buffer at the path and its right neighbor. */ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, const struct super_block *sb) { int position, path_offset = chk_path->path_length; struct buffer_head *parent; RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, "PAP-5030: invalid offset in the path"); while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { RFALSE(!buffer_uptodate (PATH_OFFSET_PBUFFER(chk_path, path_offset)), "PAP-5040: parent is not uptodate"); /* Parent at the path is not in the tree now. */ if (!B_IS_IN_TREE (parent = PATH_OFFSET_PBUFFER(chk_path, path_offset))) return &MIN_KEY; /* Check whether position in the parent is correct. */ if ((position = PATH_OFFSET_POSITION(chk_path, path_offset)) > B_NR_ITEMS(parent)) return &MIN_KEY; /* * Check whether parent at the path really points * to the child. */ if (B_N_CHILD_NUM(parent, position) != PATH_OFFSET_PBUFFER(chk_path, path_offset + 1)->b_blocknr) return &MIN_KEY; /* * Return delimiting key if position in the parent * is not the last one. */ if (position != B_NR_ITEMS(parent)) return internal_key(parent, position); } /* Return MAX_KEY if we are in the root of the buffer tree. */ if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> b_blocknr == SB_ROOT_BLOCK(sb)) return &MAX_KEY; return &MIN_KEY; } /* * Check whether a key is contained in the tree rooted from a buffer at a path. * This works by looking at the left and right delimiting keys for the buffer * in the last path_element in the path. These delimiting keys are stored * at least one level above that buffer in the tree. If the buffer is the * first or last node in the tree order then one of the delimiting keys may * be absent, and in this case get_lkey and get_rkey return a special key * which is MIN_KEY or MAX_KEY. */ static inline int key_in_buffer( /* Path which should be checked. */ struct treepath *chk_path, /* Key which should be checked. */ const struct cpu_key *key, struct super_block *sb ) { RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET || chk_path->path_length > MAX_HEIGHT, "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", key, chk_path->path_length); RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev, "PAP-5060: device must not be NODEV"); if (comp_keys(get_lkey(chk_path, sb), key) == 1) /* left delimiting key is bigger, that the key we look for */ return 0; /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */ if (comp_keys(get_rkey(chk_path, sb), key) != 1) /* key must be less than right delimitiing key */ return 0; return 1; } int reiserfs_check_path(struct treepath *p) { RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, "path not properly relsed"); return 0; } /* * Drop the reference to each buffer in a path and restore * dirty bits clean when preparing the buffer for the log. * This version should only be called from fix_nodes() */ void pathrelse_and_restore(struct super_block *sb, struct treepath *search_path) { int path_offset = search_path->path_length; RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, "clm-4000: invalid path offset"); while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { struct buffer_head *bh; bh = PATH_OFFSET_PBUFFER(search_path, path_offset--); reiserfs_restore_prepared_buffer(sb, bh); brelse(bh); } search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } /* Drop the reference to each buffer in a path */ void pathrelse(struct treepath *search_path) { int path_offset = search_path->path_length; RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, "PAP-5090: invalid path offset"); while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--)); search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih) { struct reiserfs_de_head *deh; int i; deh = B_I_DEH(bh, ih); for (i = 0; i < ih_entry_count(ih); i++) { if (deh_location(&deh[i]) > ih_item_len(ih)) { reiserfs_warning(NULL, "reiserfs-5094", "directory entry location seems wrong %h", &deh[i]); return 0; } } return 1; } static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) { struct block_head *blkh; struct item_head *ih; int used_space; int prev_location; int i; int nr; blkh = (struct block_head *)buf; if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { reiserfs_warning(NULL, "reiserfs-5080", "this should be caught earlier"); return 0; } nr = blkh_nr_item(blkh); if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { /* item number is too big or too small */ reiserfs_warning(NULL, "reiserfs-5081", "nr_item seems wrong: %z", bh); return 0; } ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); /* free space does not match to calculated amount of use space */ if (used_space != blocksize - blkh_free_space(blkh)) { reiserfs_warning(NULL, "reiserfs-5082", "free space seems wrong: %z", bh); return 0; } /* * FIXME: it is_leaf will hit performance too much - we may have * return 1 here */ /* check tables of item heads */ ih = (struct item_head *)(buf + BLKH_SIZE); prev_location = blocksize; for (i = 0; i < nr; i++, ih++) { if (le_ih_k_type(ih) == TYPE_ANY) { reiserfs_warning(NULL, "reiserfs-5083", "wrong item type for item %h", ih); return 0; } if (ih_location(ih) >= blocksize || ih_location(ih) < IH_SIZE * nr) { reiserfs_warning(NULL, "reiserfs-5084", "item location seems wrong: %h", ih); return 0; } if (ih_item_len(ih) < 1 || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { reiserfs_warning(NULL, "reiserfs-5085", "item length seems wrong: %h", ih); return 0; } if (prev_location - ih_location(ih) != ih_item_len(ih)) { reiserfs_warning(NULL, "reiserfs-5086", "item location seems wrong " "(second one): %h", ih); return 0; } if (is_direntry_le_ih(ih)) { if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) { reiserfs_warning(NULL, "reiserfs-5093", "item entry count seems wrong %h", ih); return 0; } return has_valid_deh_location(bh, ih); } prev_location = ih_location(ih); } /* one may imagine many more checks */ return 1; } /* returns 1 if buf looks like an internal node, 0 otherwise */ static int is_internal(char *buf, int blocksize, struct buffer_head *bh) { struct block_head *blkh; int nr; int used_space; blkh = (struct block_head *)buf; nr = blkh_level(blkh); if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { /* this level is not possible for internal nodes */ reiserfs_warning(NULL, "reiserfs-5087", "this should be caught earlier"); return 0; } nr = blkh_nr_item(blkh); /* for internal which is not root we might check min number of keys */ if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { reiserfs_warning(NULL, "reiserfs-5088", "number of key seems wrong: %z", bh); return 0; } used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); if (used_space != blocksize - blkh_free_space(blkh)) { reiserfs_warning(NULL, "reiserfs-5089", "free space seems wrong: %z", bh); return 0; } /* one may imagine many more checks */ return 1; } /* * make sure that bh contains formatted node of reiserfs tree of * 'level'-th level */ static int is_tree_node(struct buffer_head *bh, int level) { if (B_LEVEL(bh) != level) { reiserfs_warning(NULL, "reiserfs-5090", "node level %d does " "not match to the expected one %d", B_LEVEL(bh), level); return 0; } if (level == DISK_LEAF_NODE_LEVEL) return is_leaf(bh->b_data, bh->b_size, bh); return is_internal(bh->b_data, bh->b_size, bh); } #define SEARCH_BY_KEY_READA 16 /* * The function is NOT SCHEDULE-SAFE! * It might unlock the write lock if we needed to wait for a block * to be read. Note that in this case it won't recover the lock to avoid * high contention resulting from too much lock requests, especially * the caller (search_by_key) will perform other schedule-unsafe * operations just after calling this function. * * @return depth of lock to be restored after read completes */ static int search_by_key_reada(struct super_block *s, struct buffer_head **bh, b_blocknr_t *b, int num) { int i, j; int depth = -1; for (i = 0; i < num; i++) { bh[i] = sb_getblk(s, b[i]); } /* * We are going to read some blocks on which we * have a reference. It's safe, though we might be * reading blocks concurrently changed if we release * the lock. But it's still fine because we check later * if the tree changed */ for (j = 0; j < i; j++) { /* * note, this needs attention if we are getting rid of the BKL * you have to make sure the prepared bit isn't set on this * buffer */ if (!buffer_uptodate(bh[j])) { if (depth == -1) depth = reiserfs_write_unlock_nested(s); ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j); } brelse(bh[j]); } return depth; } /* * This function fills up the path from the root to the leaf as it * descends the tree looking for the key. It uses reiserfs_bread to * try to find buffers in the cache given their block number. If it * does not find them in the cache it reads them from disk. For each * node search_by_key finds using reiserfs_bread it then uses * bin_search to look through that node. bin_search will find the * position of the block_number of the next node if it is looking * through an internal node. If it is looking through a leaf node * bin_search will find the position of the item which has key either * equal to given key, or which is the maximal key less than the given * key. search_by_key returns a path that must be checked for the * correctness of the top of the path but need not be checked for the * correctness of the bottom of the path */ /* * search_by_key - search for key (and item) in stree * @sb: superblock * @key: pointer to key to search for * @search_path: Allocated and initialized struct treepath; Returned filled * on success. * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to * stop at leaf level. * * The function is NOT SCHEDULE-SAFE! */ int search_by_key(struct super_block *sb, const struct cpu_key *key, struct treepath *search_path, int stop_level) { b_blocknr_t block_number; int expected_level; struct buffer_head *bh; struct path_element *last_element; int node_level, retval; int right_neighbor_of_leaf_node; int fs_gen; struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA]; int reada_count = 0; #ifdef CONFIG_REISERFS_CHECK int repeat_counter = 0; #endif PROC_INFO_INC(sb, search_by_key); /* * As we add each node to a path we increase its count. This means * that we must be careful to release all nodes in a path before we * either discard the path struct or re-use the path struct, as we * do here. */ pathrelse(search_path); right_neighbor_of_leaf_node = 0; /* * With each iteration of this loop we search through the items in the * current node, and calculate the next current node(next path element) * for the next iteration of this loop.. */ block_number = SB_ROOT_BLOCK(sb); expected_level = -1; while (1) { #ifdef CONFIG_REISERFS_CHECK if (!(++repeat_counter % 50000)) reiserfs_warning(sb, "PAP-5100", "%s: there were %d iterations of " "while loop looking for key %K", current->comm, repeat_counter, key); #endif /* prep path to have another element added to it. */ last_element = PATH_OFFSET_PELEMENT(search_path, ++search_path->path_length); fs_gen = get_generation(sb); /* * Read the next tree node, and set the last element * in the path to have a pointer to it. */ if ((bh = last_element->pe_buffer = sb_getblk(sb, block_number))) { /* * We'll need to drop the lock if we encounter any * buffers that need to be read. If all of them are * already up to date, we don't need to drop the lock. */ int depth = -1; if (!buffer_uptodate(bh) && reada_count > 1) depth = search_by_key_reada(sb, reada_bh, reada_blocks, reada_count); if (!buffer_uptodate(bh) && depth == -1) depth = reiserfs_write_unlock_nested(sb); ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); if (depth != -1) reiserfs_write_lock_nested(sb, depth); if (!buffer_uptodate(bh)) goto io_error; } else { io_error: search_path->path_length--; pathrelse(search_path); return IO_ERROR; } reada_count = 0; if (expected_level == -1) expected_level = SB_TREE_HEIGHT(sb); expected_level--; /* * It is possible that schedule occurred. We must check * whether the key to search is still in the tree rooted * from the current buffer. If not then repeat search * from the root. */ if (fs_changed(fs_gen, sb) && (!B_IS_IN_TREE(bh) || B_LEVEL(bh) != expected_level || !key_in_buffer(search_path, key, sb))) { PROC_INFO_INC(sb, search_by_key_fs_changed); PROC_INFO_INC(sb, search_by_key_restarted); PROC_INFO_INC(sb, sbk_restarted[expected_level - 1]); pathrelse(search_path); /* * Get the root block number so that we can * repeat the search starting from the root. */ block_number = SB_ROOT_BLOCK(sb); expected_level = -1; right_neighbor_of_leaf_node = 0; /* repeat search from the root */ continue; } /* * only check that the key is in the buffer if key is not * equal to the MAX_KEY. Latter case is only possible in * "finish_unfinished()" processing during mount. */ RFALSE(comp_keys(&MAX_KEY, key) && !key_in_buffer(search_path, key, sb), "PAP-5130: key is not in the buffer"); #ifdef CONFIG_REISERFS_CHECK if (REISERFS_SB(sb)->cur_tb) { print_cur_tb("5140"); reiserfs_panic(sb, "PAP-5140", "schedule occurred in do_balance!"); } #endif /* * make sure, that the node contents look like a node of * certain level */ if (!is_tree_node(bh, expected_level)) { reiserfs_error(sb, "vs-5150", "invalid format found in block %ld. " "Fsck?", bh->b_blocknr); pathrelse(search_path); return IO_ERROR; } /* ok, we have acquired next formatted node in the tree */ node_level = B_LEVEL(bh); PROC_INFO_BH_STAT(sb, bh, node_level - 1); RFALSE(node_level < stop_level, "vs-5152: tree level (%d) is less than stop level (%d)", node_level, stop_level); retval = bin_search(key, item_head(bh, 0), B_NR_ITEMS(bh), (node_level == DISK_LEAF_NODE_LEVEL) ? IH_SIZE : KEY_SIZE, &last_element->pe_position); if (node_level == stop_level) { return retval; } /* we are not in the stop level */ /* * item has been found, so we choose the pointer which * is to the right of the found one */ if (retval == ITEM_FOUND) last_element->pe_position++; /* * if item was not found we choose the position which is to * the left of the found item. This requires no code, * bin_search did it already. */ /* * So we have chosen a position in the current node which is * an internal node. Now we calculate child block number by * position in the node. */ block_number = B_N_CHILD_NUM(bh, last_element->pe_position); /* * if we are going to read leaf nodes, try for read * ahead as well */ if ((search_path->reada & PATH_READA) && node_level == DISK_LEAF_NODE_LEVEL + 1) { int pos = last_element->pe_position; int limit = B_NR_ITEMS(bh); struct reiserfs_key *le_key; if (search_path->reada & PATH_READA_BACK) limit = 0; while (reada_count < SEARCH_BY_KEY_READA) { if (pos == limit) break; reada_blocks[reada_count++] = B_N_CHILD_NUM(bh, pos); if (search_path->reada & PATH_READA_BACK) pos--; else pos++; /* * check to make sure we're in the same object */ le_key = internal_key(bh, pos); if (le32_to_cpu(le_key->k_objectid) != key->on_disk_key.k_objectid) { break; } } } } } /* * Form the path to an item and position in this item which contains * file byte defined by key. If there is no such item * corresponding to the key, we point the path to the item with * maximal key less than key, and *pos_in_item is set to one * past the last entry/byte in the item. If searching for entry in a * directory item, and it is not found, *pos_in_item is set to one * entry more than the entry with maximal key which is less than the * sought key. * * Note that if there is no entry in this same node which is one more, * then we point to an imaginary entry. for direct items, the * position is in units of bytes, for indirect items the position is * in units of blocknr entries, for directory items the position is in * units of directory entries. */ /* The function is NOT SCHEDULE-SAFE! */ int search_for_position_by_key(struct super_block *sb, /* Key to search (cpu variable) */ const struct cpu_key *p_cpu_key, /* Filled up by this function. */ struct treepath *search_path) { struct item_head *p_le_ih; /* pointer to on-disk structure */ int blk_size; loff_t item_offset, offset; struct reiserfs_dir_entry de; int retval; /* If searching for directory entry. */ if (is_direntry_cpu_key(p_cpu_key)) return search_by_entry_key(sb, p_cpu_key, search_path, &de); /* If not searching for directory entry. */ /* If item is found. */ retval = search_item(sb, p_cpu_key, search_path); if (retval == IO_ERROR) return retval; if (retval == ITEM_FOUND) { RFALSE(!ih_item_len (item_head (PATH_PLAST_BUFFER(search_path), PATH_LAST_POSITION(search_path))), "PAP-5165: item length equals zero"); pos_in_item(search_path) = 0; return POSITION_FOUND; } RFALSE(!PATH_LAST_POSITION(search_path), "PAP-5170: position equals zero"); /* Item is not found. Set path to the previous item. */ p_le_ih = item_head(PATH_PLAST_BUFFER(search_path), --PATH_LAST_POSITION(search_path)); blk_size = sb->s_blocksize; if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key)) return FILE_NOT_FOUND; /* FIXME: quite ugly this far */ item_offset = le_ih_k_offset(p_le_ih); offset = cpu_key_k_offset(p_cpu_key); /* Needed byte is contained in the item pointed to by the path. */ if (item_offset <= offset && item_offset + op_bytes_number(p_le_ih, blk_size) > offset) { pos_in_item(search_path) = offset - item_offset; if (is_indirect_le_ih(p_le_ih)) { pos_in_item(search_path) /= blk_size; } return POSITION_FOUND; } /* * Needed byte is not contained in the item pointed to by the * path. Set pos_in_item out of the item. */ if (is_indirect_le_ih(p_le_ih)) pos_in_item(search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE; else pos_in_item(search_path) = ih_item_len(p_le_ih); return POSITION_NOT_FOUND; } /* Compare given item and item pointed to by the path. */ int comp_items(const struct item_head *stored_ih, const struct treepath *path) { struct buffer_head *bh = PATH_PLAST_BUFFER(path); struct item_head *ih; /* Last buffer at the path is not in the tree. */ if (!B_IS_IN_TREE(bh)) return 1; /* Last path position is invalid. */ if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh)) return 1; /* we need only to know, whether it is the same item */ ih = tp_item_head(path); return memcmp(stored_ih, ih, IH_SIZE); } /* unformatted nodes are not logged anymore, ever. This is safe now */ #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) /* block can not be forgotten as it is in I/O or held by someone */ #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) /* prepare for delete or cut of direct item */ static inline int prepare_for_direct_item(struct treepath *path, struct item_head *le_ih, struct inode *inode, loff_t new_file_length, int *cut_size) { loff_t round_len; if (new_file_length == max_reiserfs_offset(inode)) { /* item has to be deleted */ *cut_size = -(IH_SIZE + ih_item_len(le_ih)); return M_DELETE; } /* new file gets truncated */ if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { round_len = ROUND_UP(new_file_length); /* this was new_file_length < le_ih ... */ if (round_len < le_ih_k_offset(le_ih)) { *cut_size = -(IH_SIZE + ih_item_len(le_ih)); return M_DELETE; /* Delete this item. */ } /* Calculate first position and size for cutting from item. */ pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1); *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); return M_CUT; /* Cut from this item. */ } /* old file: items may have any length */ if (new_file_length < le_ih_k_offset(le_ih)) { *cut_size = -(IH_SIZE + ih_item_len(le_ih)); return M_DELETE; /* Delete this item. */ } /* Calculate first position and size for cutting from item. */ *cut_size = -(ih_item_len(le_ih) - (pos_in_item(path) = new_file_length + 1 - le_ih_k_offset(le_ih))); return M_CUT; /* Cut from this item. */ } static inline int prepare_for_direntry_item(struct treepath *path, struct item_head *le_ih, struct inode *inode, loff_t new_file_length, int *cut_size) { if (le_ih_k_offset(le_ih) == DOT_OFFSET && new_file_length == max_reiserfs_offset(inode)) { RFALSE(ih_entry_count(le_ih) != 2, "PAP-5220: incorrect empty directory item (%h)", le_ih); *cut_size = -(IH_SIZE + ih_item_len(le_ih)); /* Delete the directory item containing "." and ".." entry. */ return M_DELETE; } if (ih_entry_count(le_ih) == 1) { /* * Delete the directory item such as there is one record only * in this item */ *cut_size = -(IH_SIZE + ih_item_len(le_ih)); return M_DELETE; } /* Cut one record from the directory item. */ *cut_size = -(DEH_SIZE + entry_length(get_last_bh(path), le_ih, pos_in_item(path))); return M_CUT; } #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) /* * If the path points to a directory or direct item, calculate mode * and the size cut, for balance. * If the path points to an indirect item, remove some number of its * unformatted nodes. * In case of file truncate calculate whether this item must be * deleted/truncated or last unformatted node of this item will be * converted to a direct item. * This function returns a determination of what balance mode the * calling function should employ. */ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, /* * Number of unformatted nodes * which were removed from end * of the file. */ int *removed, int *cut_size, /* MAX_KEY_OFFSET in case of delete. */ unsigned long long new_file_length ) { struct super_block *sb = inode->i_sb; struct item_head *p_le_ih = tp_item_head(path); struct buffer_head *bh = PATH_PLAST_BUFFER(path); BUG_ON(!th->t_trans_id); /* Stat_data item. */ if (is_statdata_le_ih(p_le_ih)) { RFALSE(new_file_length != max_reiserfs_offset(inode), "PAP-5210: mode must be M_DELETE"); *cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); return M_DELETE; } /* Directory item. */ if (is_direntry_le_ih(p_le_ih)) return prepare_for_direntry_item(path, p_le_ih, inode, new_file_length, cut_size); /* Direct item. */ if (is_direct_le_ih(p_le_ih)) return prepare_for_direct_item(path, p_le_ih, inode, new_file_length, cut_size); /* Case of an indirect item. */ { int blk_size = sb->s_blocksize; struct item_head s_ih; int need_re_search; int delete = 0; int result = M_CUT; int pos = 0; if ( new_file_length == max_reiserfs_offset (inode) ) { /* * prepare_for_delete_or_cut() is called by * reiserfs_delete_item() */ new_file_length = 0; delete = 1; } do { need_re_search = 0; *cut_size = 0; bh = PATH_PLAST_BUFFER(path); copy_item_head(&s_ih, tp_item_head(path)); pos = I_UNFM_NUM(&s_ih); while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { __le32 *unfm; __u32 block; /* * Each unformatted block deletion may involve * one additional bitmap block into the transaction, * thereby the initial journal space reservation * might not be enough. */ if (!delete && (*cut_size) != 0 && reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) break; unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1; block = get_block_num(unfm, 0); if (block != 0) { reiserfs_prepare_for_journal(sb, bh, 1); put_block_num(unfm, 0, 0); journal_mark_dirty(th, bh); reiserfs_free_block(th, inode, block, 1); } reiserfs_cond_resched(sb); if (item_moved (&s_ih, path)) { need_re_search = 1; break; } pos --; (*removed)++; (*cut_size) -= UNFM_P_SIZE; if (pos == 0) { (*cut_size) -= IH_SIZE; result = M_DELETE; break; } } /* * a trick. If the buffer has been logged, this will * do nothing. If we've broken the loop without logging * it, it will restore the buffer */ reiserfs_restore_prepared_buffer(sb, bh); } while (need_re_search && search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); pos_in_item(path) = pos * UNFM_P_SIZE; if (*cut_size == 0) { /* * Nothing was cut. maybe convert last unformatted node to the * direct item? */ result = M_CONVERT; } return result; } } /* Calculate number of bytes which will be deleted or cut during balance */ static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) { int del_size; struct item_head *p_le_ih = tp_item_head(tb->tb_path); if (is_statdata_le_ih(p_le_ih)) return 0; del_size = (mode == M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; if (is_direntry_le_ih(p_le_ih)) { /* * return EMPTY_DIR_SIZE; We delete emty directories only. * we can't use EMPTY_DIR_SIZE, as old format dirs have a * different empty size. ick. FIXME, is this right? */ return del_size; } if (is_indirect_le_ih(p_le_ih)) del_size = (del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(tb->tb_path)->b_size); return del_size; } static void init_tb_struct(struct reiserfs_transaction_handle *th, struct tree_balance *tb, struct super_block *sb, struct treepath *path, int size) { BUG_ON(!th->t_trans_id); memset(tb, '\0', sizeof(struct tree_balance)); tb->transaction_handle = th; tb->tb_sb = sb; tb->tb_path = path; PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; tb->insert_size[0] = size; } void padd_item(char *item, int total_length, int length) { int i; for (i = total_length; i > length;) item[--i] = 0; } #ifdef REISERQUOTA_DEBUG char key2type(struct reiserfs_key *ih) { if (is_direntry_le_key(2, ih)) return 'd'; if (is_direct_le_key(2, ih)) return 'D'; if (is_indirect_le_key(2, ih)) return 'i'; if (is_statdata_le_key(2, ih)) return 's'; return 'u'; } char head2type(struct item_head *ih) { if (is_direntry_le_ih(ih)) return 'd'; if (is_direct_le_ih(ih)) return 'D'; if (is_indirect_le_ih(ih)) return 'i'; if (is_statdata_le_ih(ih)) return 's'; return 'u'; } #endif /* * Delete object item. * th - active transaction handle * path - path to the deleted item * item_key - key to search for the deleted item * indode - used for updating i_blocks and quotas * un_bh - NULL or unformatted node pointer */ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *item_key, struct inode *inode, struct buffer_head *un_bh) { struct super_block *sb = inode->i_sb; struct tree_balance s_del_balance; struct item_head s_ih; struct item_head *q_ih; int quota_cut_bytes; int ret_value, del_size, removed; int depth; #ifdef CONFIG_REISERFS_CHECK char mode; int iter = 0; #endif BUG_ON(!th->t_trans_id); init_tb_struct(th, &s_del_balance, sb, path, 0 /*size is unknown */ ); while (1) { removed = 0; #ifdef CONFIG_REISERFS_CHECK iter++; mode = #endif prepare_for_delete_or_cut(th, inode, path, item_key, &removed, &del_size, max_reiserfs_offset(inode)); RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); copy_item_head(&s_ih, tp_item_head(path)); s_del_balance.insert_size[0] = del_size; ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); if (ret_value != REPEAT_SEARCH) break; PROC_INFO_INC(sb, delete_item_restarted); /* file system changed, repeat search */ ret_value = search_for_position_by_key(sb, item_key, path); if (ret_value == IO_ERROR) break; if (ret_value == FILE_NOT_FOUND) { reiserfs_warning(sb, "vs-5340", "no items of the file %K found", item_key); break; } } /* while (1) */ if (ret_value != CARRY_ON) { unfix_nodes(&s_del_balance); return 0; } /* reiserfs_delete_item returns item length when success */ ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); q_ih = tp_item_head(path); quota_cut_bytes = ih_item_len(q_ih); /* * hack so the quota code doesn't have to guess if the file has a * tail. On tail insert, we allocate quota for 1 unformatted node. * We test the offset because the tail might have been * split into multiple items, and we only want to decrement for * the unfm node once */ if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; } else { quota_cut_bytes = 0; } } if (un_bh) { int off; char *data; /* * We are in direct2indirect conversion, so move tail contents * to the unformatted node */ /* * note, we do the copy before preparing the buffer because we * don't care about the contents of the unformatted node yet. * the only thing we really care about is the direct item's * data is in the unformatted node. * * Otherwise, we would have to call * reiserfs_prepare_for_journal on the unformatted node, * which might schedule, meaning we'd have to loop all the * way back up to the start of the while loop. * * The unformatted node must be dirtied later on. We can't be * sure here if the entire tail has been deleted yet. * * un_bh is from the page cache (all unformatted nodes are * from the page cache) and might be a highmem page. So, we * can't use un_bh->b_data. * -clm */ data = kmap_atomic(un_bh->b_page); off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1)); memcpy(data + off, ih_item_body(PATH_PLAST_BUFFER(path), &s_ih), ret_value); kunmap_atomic(data); } /* Perform balancing after all resources have been collected at once. */ do_balance(&s_del_balance, NULL, NULL, M_DELETE); #ifdef REISERQUOTA_DEBUG reiserfs_debug(sb, REISERFS_DEBUG_CODE, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, inode->i_uid, head2type(&s_ih)); #endif depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_free_space_nodirty(inode, quota_cut_bytes); reiserfs_write_lock_nested(inode->i_sb, depth); /* Return deleted body length */ return ret_value; } /* * Summary Of Mechanisms For Handling Collisions Between Processes: * * deletion of the body of the object is performed by iput(), with the * result that if multiple processes are operating on a file, the * deletion of the body of the file is deferred until the last process * that has an open inode performs its iput(). * * writes and truncates are protected from collisions by use of * semaphores. * * creates, linking, and mknod are protected from collisions with other * processes by making the reiserfs_add_entry() the last step in the * creation, and then rolling back all changes if there was a collision. * - Hans */ /* this deletes item which never gets split */ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, struct inode *inode, struct reiserfs_key *key) { struct super_block *sb = th->t_super; struct tree_balance tb; INITIALIZE_PATH(path); int item_len = 0; int tb_init = 0; struct cpu_key cpu_key; int retval; int quota_cut_bytes = 0; BUG_ON(!th->t_trans_id); le_key2cpu_key(&cpu_key, key); while (1) { retval = search_item(th->t_super, &cpu_key, &path); if (retval == IO_ERROR) { reiserfs_error(th->t_super, "vs-5350", "i/o failure occurred trying " "to delete %K", &cpu_key); break; } if (retval != ITEM_FOUND) { pathrelse(&path); /* * No need for a warning, if there is just no free * space to insert '..' item into the * newly-created subdir */ if (! ((unsigned long long) GET_HASH_VALUE(le_key_k_offset (le_key_version(key), key)) == 0 && (unsigned long long) GET_GENERATION_NUMBER(le_key_k_offset (le_key_version(key), key)) == 1)) reiserfs_warning(th->t_super, "vs-5355", "%k not found", key); break; } if (!tb_init) { tb_init = 1; item_len = ih_item_len(tp_item_head(&path)); init_tb_struct(th, &tb, th->t_super, &path, -(IH_SIZE + item_len)); } quota_cut_bytes = ih_item_len(tp_item_head(&path)); retval = fix_nodes(M_DELETE, &tb, NULL, NULL); if (retval == REPEAT_SEARCH) { PROC_INFO_INC(th->t_super, delete_solid_item_restarted); continue; } if (retval == CARRY_ON) { do_balance(&tb, NULL, NULL, M_DELETE); /* * Should we count quota for item? (we don't * count quotas for save-links) */ if (inode) { int depth; #ifdef REISERQUOTA_DEBUG reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key)); #endif depth = reiserfs_write_unlock_nested(sb); dquot_free_space_nodirty(inode, quota_cut_bytes); reiserfs_write_lock_nested(sb, depth); } break; } /* IO_ERROR, NO_DISK_SPACE, etc */ reiserfs_warning(th->t_super, "vs-5360", "could not delete %K due to fix_nodes failure", &cpu_key); unfix_nodes(&tb); break; } reiserfs_check_path(&path); } int reiserfs_delete_object(struct reiserfs_transaction_handle *th, struct inode *inode) { int err; inode->i_size = 0; BUG_ON(!th->t_trans_id); /* for directory this deletes item containing "." and ".." */ err = reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ ); if (err) return err; #if defined( USE_INODE_GENERATION_COUNTER ) if (!old_format_only(th->t_super)) { __le32 *inode_generation; inode_generation = &REISERFS_SB(th->t_super)->s_rs->s_inode_generation; le32_add_cpu(inode_generation, 1); } /* USE_INODE_GENERATION_COUNTER */ #endif reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); return err; } static void unmap_buffers(struct page *page, loff_t pos) { struct buffer_head *bh; struct buffer_head *head; struct buffer_head *next; unsigned long tail_index; unsigned long cur_index; if (page) { if (page_has_buffers(page)) { tail_index = pos & (PAGE_SIZE - 1); cur_index = 0; head = page_buffers(page); bh = head; do { next = bh->b_this_page; /* * we want to unmap the buffers that contain * the tail, and all the buffers after it * (since the tail must be at the end of the * file). We don't want to unmap file data * before the tail, since it might be dirty * and waiting to reach disk */ cur_index += bh->b_size; if (cur_index > tail_index) { reiserfs_unmap_buffer(bh); } bh = next; } while (bh != head); } } } static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, struct inode *inode, struct page *page, struct treepath *path, const struct cpu_key *item_key, loff_t new_file_size, char *mode) { struct super_block *sb = inode->i_sb; int block_size = sb->s_blocksize; int cut_bytes; BUG_ON(!th->t_trans_id); BUG_ON(new_file_size != inode->i_size); /* * the page being sent in could be NULL if there was an i/o error * reading in the last block. The user will hit problems trying to * read the file, but for now we just skip the indirect2direct */ if (atomic_read(&inode->i_count) > 1 || !tail_has_to_be_packed(inode) || !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) { /* leave tail in an unformatted node */ *mode = M_SKIP_BALANCING; cut_bytes = block_size - (new_file_size & (block_size - 1)); pathrelse(path); return cut_bytes; } /* Perform the conversion to a direct_item. */ return indirect2direct(th, inode, page, path, item_key, new_file_size, mode); } /* * we did indirect_to_direct conversion. And we have inserted direct * item successesfully, but there were no disk space to cut unfm * pointer being converted. Therefore we have to delete inserted * direct item(s) */ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path) { struct cpu_key tail_key; int tail_len; int removed; BUG_ON(!th->t_trans_id); make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); tail_key.key_length = 4; tail_len = (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; while (tail_len) { /* look for the last byte of the tail */ if (search_for_position_by_key(inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND) reiserfs_panic(inode->i_sb, "vs-5615", "found invalid item"); RFALSE(path->pos_in_item != ih_item_len(tp_item_head(path)) - 1, "vs-5616: appended bytes found"); PATH_LAST_POSITION(path)--; removed = reiserfs_delete_item(th, path, &tail_key, inode, NULL /*unbh not needed */ ); RFALSE(removed <= 0 || removed > tail_len, "vs-5617: there was tail %d bytes, removed item length %d bytes", tail_len, removed); tail_len -= removed; set_cpu_key_k_offset(&tail_key, cpu_key_k_offset(&tail_key) - removed); } reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " "conversion has been rolled back due to " "lack of disk space"); mark_inode_dirty(inode); } /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, struct treepath *path, struct cpu_key *item_key, struct inode *inode, struct page *page, loff_t new_file_size) { struct super_block *sb = inode->i_sb; /* * Every function which is going to call do_balance must first * create a tree_balance structure. Then it must fill up this * structure by using the init_tb_struct and fix_nodes functions. * After that we can make tree balancing. */ struct tree_balance s_cut_balance; struct item_head *p_le_ih; int cut_size = 0; /* Amount to be cut. */ int ret_value = CARRY_ON; int removed = 0; /* Number of the removed unformatted nodes. */ int is_inode_locked = 0; char mode; /* Mode of the balance. */ int retval2 = -1; int quota_cut_bytes; loff_t tail_pos = 0; int depth; BUG_ON(!th->t_trans_id); init_tb_struct(th, &s_cut_balance, inode->i_sb, path, cut_size); /* * Repeat this loop until we either cut the item without needing * to balance, or we fix_nodes without schedule occurring */ while (1) { /* * Determine the balance mode, position of the first byte to * be cut, and size to be cut. In case of the indirect item * free unformatted nodes which are pointed to by the cut * pointers. */ mode = prepare_for_delete_or_cut(th, inode, path, item_key, &removed, &cut_size, new_file_size); if (mode == M_CONVERT) { /* * convert last unformatted node to direct item or * leave tail in the unformatted node */ RFALSE(ret_value != CARRY_ON, "PAP-5570: can not convert twice"); ret_value = maybe_indirect_to_direct(th, inode, page, path, item_key, new_file_size, &mode); if (mode == M_SKIP_BALANCING) /* tail has been left in the unformatted node */ return ret_value; is_inode_locked = 1; /* * removing of last unformatted node will * change value we have to return to truncate. * Save it */ retval2 = ret_value; /* * So, we have performed the first part of the * conversion: * inserting the new direct item. Now we are * removing the last unformatted node pointer. * Set key to search for it. */ set_cpu_key_k_type(item_key, TYPE_INDIRECT); item_key->key_length = 4; new_file_size -= (new_file_size & (sb->s_blocksize - 1)); tail_pos = new_file_size; set_cpu_key_k_offset(item_key, new_file_size + 1); if (search_for_position_by_key (sb, item_key, path) == POSITION_NOT_FOUND) { print_block(PATH_PLAST_BUFFER(path), 3, PATH_LAST_POSITION(path) - 1, PATH_LAST_POSITION(path) + 1); reiserfs_panic(sb, "PAP-5580", "item to " "convert does not exist (%K)", item_key); } continue; } if (cut_size == 0) { pathrelse(path); return 0; } s_cut_balance.insert_size[0] = cut_size; ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL); if (ret_value != REPEAT_SEARCH) break; PROC_INFO_INC(sb, cut_from_item_restarted); ret_value = search_for_position_by_key(sb, item_key, path); if (ret_value == POSITION_FOUND) continue; reiserfs_warning(sb, "PAP-5610", "item %K not found", item_key); unfix_nodes(&s_cut_balance); return (ret_value == IO_ERROR) ? -EIO : -ENOENT; } /* while */ /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */ if (ret_value != CARRY_ON) { if (is_inode_locked) { /* * FIXME: this seems to be not needed: we are always * able to cut item */ indirect_to_direct_roll_back(th, inode, path); } if (ret_value == NO_DISK_SPACE) reiserfs_warning(sb, "reiserfs-5092", "NO_DISK_SPACE"); unfix_nodes(&s_cut_balance); return -EIO; } /* go ahead and perform balancing */ RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode"); /* Calculate number of bytes that need to be cut from the item. */ quota_cut_bytes = (mode == M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance. insert_size[0]; if (retval2 == -1) ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); else ret_value = retval2; /* * For direct items, we only change the quota when deleting the last * item. */ p_le_ih = tp_item_head(s_cut_balance.tb_path); if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { if (mode == M_DELETE && (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == 1) { /* FIXME: this is to keep 3.5 happy */ REISERFS_I(inode)->i_first_direct_byte = U32_MAX; quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; } else { quota_cut_bytes = 0; } } #ifdef CONFIG_REISERFS_CHECK if (is_inode_locked) { struct item_head *le_ih = tp_item_head(s_cut_balance.tb_path); /* * we are going to complete indirect2direct conversion. Make * sure, that we exactly remove last unformatted node pointer * of the item */ if (!is_indirect_le_ih(le_ih)) reiserfs_panic(sb, "vs-5652", "item must be indirect %h", le_ih); if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) reiserfs_panic(sb, "vs-5653", "completing " "indirect2direct conversion indirect " "item %h being deleted must be of " "4 byte long", le_ih); if (mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { reiserfs_panic(sb, "vs-5654", "can not complete " "indirect2direct conversion of %h " "(CUT, insert_size==%d)", le_ih, s_cut_balance.insert_size[0]); } /* * it would be useful to make sure, that right neighboring * item is direct item of this file */ } #endif do_balance(&s_cut_balance, NULL, NULL, mode); if (is_inode_locked) { /* * we've done an indirect->direct conversion. when the * data block was freed, it was removed from the list of * blocks that must be flushed before the transaction * commits, make sure to unmap and invalidate it */ unmap_buffers(page, tail_pos); REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; } #ifdef REISERQUOTA_DEBUG reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, '?'); #endif depth = reiserfs_write_unlock_nested(sb); dquot_free_space_nodirty(inode, quota_cut_bytes); reiserfs_write_lock_nested(sb, depth); return ret_value; } static void truncate_directory(struct reiserfs_transaction_handle *th, struct inode *inode) { BUG_ON(!th->t_trans_id); if (inode->i_nlink) reiserfs_error(inode->i_sb, "vs-5655", "link count != 0"); set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); reiserfs_update_sd(th, inode); set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET); set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); } /* * Truncate file to the new size. Note, this must be called with a * transaction already started */ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *inode, /* ->i_size contains new size */ struct page *page, /* up to date for last block */ /* * when it is called by file_release to convert * the tail - no timestamps should be updated */ int update_timestamps ) { INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ struct item_head *p_le_ih; /* Pointer to an item header. */ /* Key to search for a previous file item. */ struct cpu_key s_item_key; loff_t file_size, /* Old file size. */ new_file_size; /* New file size. */ int deleted; /* Number of deleted or truncated bytes. */ int retval; int err = 0; BUG_ON(!th->t_trans_id); if (! (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return 0; /* deletion of directory - no need to update timestamps */ if (S_ISDIR(inode->i_mode)) { truncate_directory(th, inode); return 0; } /* Get new file size. */ new_file_size = inode->i_size; /* FIXME: note, that key type is unimportant here */ make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, 3); retval = search_for_position_by_key(inode->i_sb, &s_item_key, &s_search_path); if (retval == IO_ERROR) { reiserfs_error(inode->i_sb, "vs-5657", "i/o failure occurred trying to truncate %K", &s_item_key); err = -EIO; goto out; } if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { reiserfs_error(inode->i_sb, "PAP-5660", "wrong result %d of search for %K", retval, &s_item_key); err = -EIO; goto out; } s_search_path.pos_in_item--; /* Get real file size (total length of all file items) */ p_le_ih = tp_item_head(&s_search_path); if (is_statdata_le_ih(p_le_ih)) file_size = 0; else { loff_t offset = le_ih_k_offset(p_le_ih); int bytes = op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); /* * this may mismatch with real file size: if last direct item * had no padding zeros and last unformatted node had no free * space, this file would have this file size */ file_size = offset + bytes - 1; } /* * are we doing a full truncate or delete, if so * kick in the reada code */ if (new_file_size == 0) s_search_path.reada = PATH_READA | PATH_READA_BACK; if (file_size == 0 || file_size < new_file_size) { goto update_and_out; } /* Update key to search for the last file item. */ set_cpu_key_k_offset(&s_item_key, file_size); do { /* Cut or delete file item. */ deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, inode, page, new_file_size); if (deleted < 0) { reiserfs_warning(inode->i_sb, "vs-5665", "reiserfs_cut_from_item failed"); reiserfs_check_path(&s_search_path); return 0; } RFALSE(deleted > file_size, "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", deleted, file_size, &s_item_key); /* Change key to search the last file item. */ file_size -= deleted; set_cpu_key_k_offset(&s_item_key, file_size); /* * While there are bytes to truncate and previous * file item is presented in the tree. */ /* * This loop could take a really long time, and could log * many more blocks than a transaction can hold. So, we do * a polite journal end here, and if the transaction needs * ending, we make sure the file is consistent before ending * the current trans and starting a new one */ if (journal_transaction_should_end(th, 0) || reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { pathrelse(&s_search_path); if (update_timestamps) { inode->i_mtime = current_time(inode); inode->i_ctime = current_time(inode); } reiserfs_update_sd(th, inode); err = journal_end(th); if (err) goto out; err = journal_begin(th, inode->i_sb, JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ; if (err) goto out; reiserfs_update_inode_transaction(inode); } } while (file_size > ROUND_UP(new_file_size) && search_for_position_by_key(inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND); RFALSE(file_size > ROUND_UP(new_file_size), "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d", new_file_size, file_size, s_item_key.on_disk_key.k_objectid); update_and_out: if (update_timestamps) { /* this is truncate, not file closing */ inode->i_mtime = current_time(inode); inode->i_ctime = current_time(inode); } reiserfs_update_sd(th, inode); out: pathrelse(&s_search_path); return err; } #ifdef CONFIG_REISERFS_CHECK /* this makes sure, that we __append__, not overwrite or add holes */ static void check_research_for_paste(struct treepath *path, const struct cpu_key *key) { struct item_head *found_ih = tp_item_head(path); if (is_direct_le_ih(found_ih)) { if (le_ih_k_offset(found_ih) + op_bytes_number(found_ih, get_last_bh(path)->b_size) != cpu_key_k_offset(key) || op_bytes_number(found_ih, get_last_bh(path)->b_size) != pos_in_item(path)) reiserfs_panic(NULL, "PAP-5720", "found direct item " "%h or position (%d) does not match " "to key %K", found_ih, pos_in_item(path), key); } if (is_indirect_le_ih(found_ih)) { if (le_ih_k_offset(found_ih) + op_bytes_number(found_ih, get_last_bh(path)->b_size) != cpu_key_k_offset(key) || I_UNFM_NUM(found_ih) != pos_in_item(path) || get_ih_free_space(found_ih) != 0) reiserfs_panic(NULL, "PAP-5730", "found indirect " "item (%h) or position (%d) does not " "match to key (%K)", found_ih, pos_in_item(path), key); } } #endif /* config reiserfs check */ /* * Paste bytes to the existing item. * Returns bytes number pasted into the item. */ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, /* Path to the pasted item. */ struct treepath *search_path, /* Key to search for the needed item. */ const struct cpu_key *key, /* Inode item belongs to */ struct inode *inode, /* Pointer to the bytes to paste. */ const char *body, /* Size of pasted bytes. */ int pasted_size) { struct super_block *sb = inode->i_sb; struct tree_balance s_paste_balance; int retval; int fs_gen; int depth; BUG_ON(!th->t_trans_id); fs_gen = get_generation(inode->i_sb); #ifdef REISERQUOTA_DEBUG reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): allocating %u id=%u type=%c", pasted_size, inode->i_uid, key2type(&key->on_disk_key)); #endif depth = reiserfs_write_unlock_nested(sb); retval = dquot_alloc_space_nodirty(inode, pasted_size); reiserfs_write_lock_nested(sb, depth); if (retval) { pathrelse(search_path); return retval; } init_tb_struct(th, &s_paste_balance, th->t_super, search_path, pasted_size); #ifdef DISPLACE_NEW_PACKING_LOCALITIES s_paste_balance.key = key->on_disk_key; #endif /* DQUOT_* can schedule, must check before the fix_nodes */ if (fs_changed(fs_gen, inode->i_sb)) { goto search_again; } while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, body)) == REPEAT_SEARCH) { search_again: /* file system changed while we were in the fix_nodes */ PROC_INFO_INC(th->t_super, paste_into_item_restarted); retval = search_for_position_by_key(th->t_super, key, search_path); if (retval == IO_ERROR) { retval = -EIO; goto error_out; } if (retval == POSITION_FOUND) { reiserfs_warning(inode->i_sb, "PAP-5710", "entry or pasted byte (%K) exists", key); retval = -EEXIST; goto error_out; } #ifdef CONFIG_REISERFS_CHECK check_research_for_paste(search_path, key); #endif } /* * Perform balancing after all resources are collected by fix_nodes, * and accessing them will not risk triggering schedule. */ if (retval == CARRY_ON) { do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); return 0; } retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; error_out: /* this also releases the path */ unfix_nodes(&s_paste_balance); #ifdef REISERQUOTA_DEBUG reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): freeing %u id=%u type=%c", pasted_size, inode->i_uid, key2type(&key->on_disk_key)); #endif depth = reiserfs_write_unlock_nested(sb); dquot_free_space_nodirty(inode, pasted_size); reiserfs_write_lock_nested(sb, depth); return retval; } /* * Insert new item into the buffer at the path. * th - active transaction handle * path - path to the inserted item * ih - pointer to the item header to insert * body - pointer to the bytes to insert */ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *key, struct item_head *ih, struct inode *inode, const char *body) { struct tree_balance s_ins_balance; int retval; int fs_gen = 0; int quota_bytes = 0; BUG_ON(!th->t_trans_id); if (inode) { /* Do we count quotas for item? */ int depth; fs_gen = get_generation(inode->i_sb); quota_bytes = ih_item_len(ih); /* * hack so the quota code doesn't have to guess * if the file has a tail, links are always tails, * so there's no guessing needed */ if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; #ifdef REISERQUOTA_DEBUG reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif /* * We can't dirty inode here. It would be immediately * written but appropriate stat item isn't inserted yet... */ depth = reiserfs_write_unlock_nested(inode->i_sb); retval = dquot_alloc_space_nodirty(inode, quota_bytes); reiserfs_write_lock_nested(inode->i_sb, depth); if (retval) { pathrelse(path); return retval; } } init_tb_struct(th, &s_ins_balance, th->t_super, path, IH_SIZE + ih_item_len(ih)); #ifdef DISPLACE_NEW_PACKING_LOCALITIES s_ins_balance.key = key->on_disk_key; #endif /* * DQUOT_* can schedule, must check to be sure calling * fix_nodes is safe */ if (inode && fs_changed(fs_gen, inode->i_sb)) { goto search_again; } while ((retval = fix_nodes(M_INSERT, &s_ins_balance, ih, body)) == REPEAT_SEARCH) { search_again: /* file system changed while we were in the fix_nodes */ PROC_INFO_INC(th->t_super, insert_item_restarted); retval = search_item(th->t_super, key, path); if (retval == IO_ERROR) { retval = -EIO; goto error_out; } if (retval == ITEM_FOUND) { reiserfs_warning(th->t_super, "PAP-5760", "key %K already exists in the tree", key); retval = -EEXIST; goto error_out; } } /* make balancing after all resources will be collected at a time */ if (retval == CARRY_ON) { do_balance(&s_ins_balance, ih, body, M_INSERT); return 0; } retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; error_out: /* also releases the path */ unfix_nodes(&s_ins_balance); #ifdef REISERQUOTA_DEBUG if (inode) reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif if (inode) { int depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_free_space_nodirty(inode, quota_bytes); reiserfs_write_lock_nested(inode->i_sb, depth); } return retval; }
2082 22 1 22 1 1 1 1 1 7 2 2083 92 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 /* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/timer.h> #include <linux/etherdevice.h> #include "hsr_main.h" #include "hsr_device.h" #include "hsr_netlink.h" #include "hsr_framereg.h" #include "hsr_slave.h" static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev; struct hsr_port *port, *master; struct hsr_priv *hsr; int mtu_max; int res; dev = netdev_notifier_info_to_dev(ptr); port = hsr_port_get_rtnl(dev); if (port == NULL) { if (!is_hsr_master(dev)) return NOTIFY_DONE; /* Not an HSR device */ hsr = netdev_priv(dev); port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port == NULL) { /* Resend of notification concerning removed device? */ return NOTIFY_DONE; } } else { hsr = port->hsr; } switch (event) { case NETDEV_UP: /* Administrative state DOWN */ case NETDEV_DOWN: /* Administrative state UP */ case NETDEV_CHANGE: /* Link (carrier) state changes */ hsr_check_carrier_and_operstate(hsr); break; case NETDEV_CHANGEADDR: if (port->type == HSR_PT_MASTER) { /* This should not happen since there's no * ndo_set_mac_address() for HSR devices - i.e. not * supported. */ break; } master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port->type == HSR_PT_SLAVE_A) { ether_addr_copy(master->dev->dev_addr, dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev); } /* Make sure we recognize frames from ourselves in hsr_rcv() */ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); res = hsr_create_self_node(&hsr->self_node_db, master->dev->dev_addr, port ? port->dev->dev_addr : master->dev->dev_addr); if (res) netdev_warn(master->dev, "Could not update HSR node address.\n"); break; case NETDEV_CHANGEMTU: if (port->type == HSR_PT_MASTER) break; /* Handled in ndo_change_mtu() */ mtu_max = hsr_get_max_mtu(port->hsr); master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER); master->dev->mtu = mtu_max; break; case NETDEV_UNREGISTER: hsr_del_port(port); break; case NETDEV_PRE_TYPE_CHANGE: /* HSR works only on Ethernet devices. Refuse slave to change * its type. */ return NOTIFY_BAD; } return NOTIFY_DONE; } struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt) { struct hsr_port *port; hsr_for_each_port(hsr, port) if (port->type == pt) return port; return NULL; } static struct notifier_block hsr_nb = { .notifier_call = hsr_netdev_notify, /* Slave event notifications */ }; static int __init hsr_init(void) { int res; BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN); register_netdevice_notifier(&hsr_nb); res = hsr_netlink_init(); return res; } static void __exit hsr_exit(void) { unregister_netdevice_notifier(&hsr_nb); hsr_netlink_exit(); } module_init(hsr_init); module_exit(hsr_exit); MODULE_LICENSE("GPL");
47 47 47 47 47 47 47 47 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 /* * upcase.c - Generate the full NTFS Unicode upcase table in little endian. * Part of the Linux-NTFS project. * * Copyright (c) 2001 Richard Russon <ntfs@flatcap.org> * Copyright (c) 2001-2006 Anton Altaparmakov * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program (in the main directory of the Linux-NTFS source * in the file COPYING); if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "malloc.h" #include "ntfs.h" ntfschar *generate_default_upcase(void) { static const int uc_run_table[][3] = { /* Start, End, Add */ {0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74}, {0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86}, {0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100}, {0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128}, {0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112}, {0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126}, {0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8}, {0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8}, {0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8}, {0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7}, {0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16}, {0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26}, {0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32}, {0} }; static const int uc_dup_table[][2] = { /* Start, End */ {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC}, {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB}, {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5}, {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9}, {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95}, {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9}, {0} }; static const int uc_word_table[][2] = { /* Offset, Value */ {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196}, {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C}, {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D}, {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F}, {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9}, {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE}, {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7}, {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197}, {0} }; int i, r; ntfschar *uc; uc = ntfs_malloc_nofs(default_upcase_len * sizeof(ntfschar)); if (!uc) return uc; memset(uc, 0, default_upcase_len * sizeof(ntfschar)); /* Generate the little endian Unicode upcase table used by ntfs. */ for (i = 0; i < default_upcase_len; i++) uc[i] = cpu_to_le16(i); for (r = 0; uc_run_table[r][0]; r++) for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) le16_add_cpu(&uc[i], uc_run_table[r][2]); for (r = 0; uc_dup_table[r][0]; r++) for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) le16_add_cpu(&uc[i + 1], -1); for (r = 0; uc_word_table[r][0]; r++) uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); return uc; }
1981 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* * "security" table * * This is for use by Mandatory Access Control (MAC) security models, * which need to be able to manage security policy in separate context * to DAC. * * Based on iptable_mangle.c * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/slab.h> #include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); MODULE_DESCRIPTION("iptables security table, for MAC rules"); #define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) static int __net_init iptable_security_table_init(struct net *net); static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_SECURITY, .table_init = iptable_security_table_init, }; static unsigned int iptable_security_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ipt_do_table(skb, state, state->net->ipv4.iptable_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init iptable_security_table_init(struct net *net) { struct ipt_replace *repl; int ret; if (net->ipv4.iptable_security) return 0; repl = ipt_alloc_initial_table(&security_table); if (repl == NULL) return -ENOMEM; ret = ipt_register_table(net, &security_table, repl, sectbl_ops, &net->ipv4.iptable_security); kfree(repl); return ret; } static void __net_exit iptable_security_net_exit(struct net *net) { if (!net->ipv4.iptable_security) return; ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops); net->ipv4.iptable_security = NULL; } static struct pernet_operations iptable_security_net_ops = { .exit = iptable_security_net_exit, }; static int __init iptable_security_init(void) { int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); ret = register_pernet_subsys(&iptable_security_net_ops); if (ret < 0) { kfree(sectbl_ops); return ret; } ret = iptable_security_table_init(&init_net); if (ret) { unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); } return ret; } static void __exit iptable_security_fini(void) { unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); } module_init(iptable_security_init); module_exit(iptable_security_fini);
34 32 9 1 2 2 2 2 2 2 2 2 2 2 2 2 2 12 12 12 12 12 12 12 12 12 12 12 12 1 1 1 2 1 1 9 9 3 2 2 1 1 2 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 /* * kernel/power/main.c - PM subsystem core functionality. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab * * This file is released under the GPLv2 * */ #include <linux/export.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/pm-trace.h> #include <linux/workqueue.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/suspend.h> #include "power.h" #ifdef CONFIG_PM_SLEEP void lock_system_sleep(void) { current->flags |= PF_FREEZER_SKIP; mutex_lock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(lock_system_sleep); void unlock_system_sleep(void) { /* * Don't use freezer_count() because we don't want the call to * try_to_freeze() here. * * Reason: * Fundamentally, we just don't need it, because freezing condition * doesn't come into effect until we release the * system_transition_mutex lock, since the freezer always works with * system_transition_mutex held. * * More importantly, in the case of hibernation, * unlock_system_sleep() gets called in snapshot_read() and * snapshot_write() when the freezing condition is still in effect. * Which means, if we use try_to_freeze() here, it would make them * enter the refrigerator, thus causing hibernation to lockup. */ current->flags &= ~PF_FREEZER_SKIP; mutex_unlock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(unlock_system_sleep); /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); int register_pm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&pm_chain_head, nb); } EXPORT_SYMBOL_GPL(register_pm_notifier); int unregister_pm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&pm_chain_head, nb); } EXPORT_SYMBOL_GPL(unregister_pm_notifier); int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls) { int ret; ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL, nr_to_call, nr_calls); return notifier_to_errno(ret); } int pm_notifier_call_chain(unsigned long val) { return __pm_notifier_call_chain(val, -1, NULL); } /* If set, devices may be suspended and resumed asynchronously. */ int pm_async_enabled = 1; static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", pm_async_enabled); } static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; pm_async_enabled = val; return n; } power_attr(pm_async); #ifdef CONFIG_SUSPEND static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { char *s = buf; suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; if (mem_sleep_current == i) s += sprintf(s, "[%s] ", label); else s += sprintf(s, "%s ", label); } /* Convert the last space to a newline if needed. */ if (s != buf) *(s-1) = '\n'; return (s - buf); } static suspend_state_t decode_suspend_state(const char *buf, size_t n) { suspend_state_t state; char *p; int len; p = memchr(buf, '\n', n); len = p ? p - buf : n; for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) { const char *label = mem_sleep_states[state]; if (label && len == strlen(label) && !strncmp(buf, label, len)) return state; } return PM_SUSPEND_ON; } static ssize_t mem_sleep_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { suspend_state_t state; int error; error = pm_autosleep_lock(); if (error) return error; if (pm_autosleep_state() > PM_SUSPEND_ON) { error = -EBUSY; goto out; } state = decode_suspend_state(buf, n); if (state < PM_SUSPEND_MAX && state > PM_SUSPEND_ON) mem_sleep_current = state; else error = -EINVAL; out: pm_autosleep_unlock(); return error ? error : n; } power_attr(mem_sleep); #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_PM_SLEEP_DEBUG int pm_test_level = TEST_NONE; static const char * const pm_tests[__TEST_AFTER_LAST] = { [TEST_NONE] = "none", [TEST_CORE] = "core", [TEST_CPUS] = "processors", [TEST_PLATFORM] = "platform", [TEST_DEVICES] = "devices", [TEST_FREEZER] = "freezer", }; static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { char *s = buf; int level; for (level = TEST_FIRST; level <= TEST_MAX; level++) if (pm_tests[level]) { if (level == pm_test_level) s += sprintf(s, "[%s] ", pm_tests[level]); else s += sprintf(s, "%s ", pm_tests[level]); } if (s != buf) /* convert the last space to a newline */ *(s-1) = '\n'; return (s - buf); } static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { const char * const *s; int level; char *p; int len; int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; lock_system_sleep(); level = TEST_FIRST; for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { pm_test_level = level; error = 0; break; } unlock_system_sleep(); return error ? error : n; } power_attr(pm_test); #endif /* CONFIG_PM_SLEEP_DEBUG */ #ifdef CONFIG_DEBUG_FS static char *suspend_step_name(enum suspend_stat_step step) { switch (step) { case SUSPEND_FREEZE: return "freeze"; case SUSPEND_PREPARE: return "prepare"; case SUSPEND_SUSPEND: return "suspend"; case SUSPEND_SUSPEND_NOIRQ: return "suspend_noirq"; case SUSPEND_RESUME_NOIRQ: return "resume_noirq"; case SUSPEND_RESUME: return "resume"; default: return ""; } } static int suspend_stats_show(struct seq_file *s, void *unused) { int i, index, last_dev, last_errno, last_step; last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; last_dev %= REC_FAILED_NUM; last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; last_errno %= REC_FAILED_NUM; last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; last_step %= REC_FAILED_NUM; seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n" "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n", "success", suspend_stats.success, "fail", suspend_stats.fail, "failed_freeze", suspend_stats.failed_freeze, "failed_prepare", suspend_stats.failed_prepare, "failed_suspend", suspend_stats.failed_suspend, "failed_suspend_late", suspend_stats.failed_suspend_late, "failed_suspend_noirq", suspend_stats.failed_suspend_noirq, "failed_resume", suspend_stats.failed_resume, "failed_resume_early", suspend_stats.failed_resume_early, "failed_resume_noirq", suspend_stats.failed_resume_noirq); seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", suspend_stats.failed_devs[last_dev]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_dev + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; seq_printf(s, "\t\t\t%-s\n", suspend_stats.failed_devs[index]); } seq_printf(s, " last_failed_errno:\t%-d\n", suspend_stats.errno[last_errno]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_errno + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; seq_printf(s, "\t\t\t%-d\n", suspend_stats.errno[index]); } seq_printf(s, " last_failed_step:\t%-s\n", suspend_step_name( suspend_stats.failed_steps[last_step])); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_step + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; seq_printf(s, "\t\t\t%-s\n", suspend_step_name( suspend_stats.failed_steps[index])); } return 0; } static int suspend_stats_open(struct inode *inode, struct file *file) { return single_open(file, suspend_stats_show, NULL); } static const struct file_operations suspend_stats_operations = { .open = suspend_stats_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int __init pm_debugfs_init(void) { debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO, NULL, NULL, &suspend_stats_operations); return 0; } late_initcall(pm_debugfs_init); #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_SLEEP_DEBUG /* * pm_print_times: print time taken by devices to suspend and resume. * * show() returns whether printing of suspend and resume times is enabled. * store() accepts 0 or 1. 0 disables printing and 1 enables it. */ bool pm_print_times_enabled; static ssize_t pm_print_times_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", pm_print_times_enabled); } static ssize_t pm_print_times_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; pm_print_times_enabled = !!val; return n; } power_attr(pm_print_times); static inline void pm_print_times_init(void) { pm_print_times_enabled = !!initcall_debug; } static ssize_t pm_wakeup_irq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA; } power_attr_ro(pm_wakeup_irq); bool pm_debug_messages_on __read_mostly; static ssize_t pm_debug_messages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", pm_debug_messages_on); } static ssize_t pm_debug_messages_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; pm_debug_messages_on = !!val; return n; } power_attr(pm_debug_messages); /** * __pm_pr_dbg - Print a suspend debug message to the kernel log. * @defer: Whether or not to use printk_deferred() to print the message. * @fmt: Message format. * * The message will be emitted if enabled through the pm_debug_messages * sysfs attribute. */ void __pm_pr_dbg(bool defer, const char *fmt, ...) { struct va_format vaf; va_list args; if (!pm_debug_messages_on) return; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; if (defer) printk_deferred(KERN_DEBUG "PM: %pV", &vaf); else printk(KERN_DEBUG "PM: %pV", &vaf); va_end(args); } #else /* !CONFIG_PM_SLEEP_DEBUG */ static inline void pm_print_times_init(void) {} #endif /* CONFIG_PM_SLEEP_DEBUG */ struct kobject *power_kobj; /** * state - control system sleep states. * * show() returns available sleep state labels, which may be "mem", "standby", * "freeze" and "disk" (hibernation). * See Documentation/admin-guide/pm/sleep-states.rst for a description of * what they mean. * * store() accepts one of those strings, translates it into the proper * enumerated value, and initiates a suspend transition. */ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { char *s = buf; #ifdef CONFIG_SUSPEND suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (pm_states[i]) s += sprintf(s,"%s ", pm_states[i]); #endif if (hibernation_available()) s += sprintf(s, "disk "); if (s != buf) /* convert the last space to a newline */ *(s-1) = '\n'; return (s - buf); } static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state; #endif char *p; int len; p = memchr(buf, '\n', n); len = p ? p - buf : n; /* Check hibernation first. */ if (len == 4 && !strncmp(buf, "disk", len)) return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) { const char *label = pm_states[state]; if (label && len == strlen(label) && !strncmp(buf, label, len)) return state; } #endif return PM_SUSPEND_ON; } static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { suspend_state_t state; int error; error = pm_autosleep_lock(); if (error) return error; if (pm_autosleep_state() > PM_SUSPEND_ON) { error = -EBUSY; goto out; } state = decode_state(buf, n); if (state < PM_SUSPEND_MAX) { if (state == PM_SUSPEND_MEM) state = mem_sleep_current; error = pm_suspend(state); } else if (state == PM_SUSPEND_MAX) { error = hibernate(); } else { error = -EINVAL; } out: pm_autosleep_unlock(); return error ? error : n; } power_attr(state); #ifdef CONFIG_PM_SLEEP /* * The 'wakeup_count' attribute, along with the functions defined in * drivers/base/power/wakeup.c, provides a means by which wakeup events can be * handled in a non-racy way. * * If a wakeup event occurs when the system is in a sleep state, it simply is * woken up. In turn, if an event that would wake the system up from a sleep * state occurs when it is undergoing a transition to that sleep state, the * transition should be aborted. Moreover, if such an event occurs when the * system is in the working state, an attempt to start a transition to the * given sleep state should fail during certain period after the detection of * the event. Using the 'state' attribute alone is not sufficient to satisfy * these requirements, because a wakeup event may occur exactly when 'state' * is being written to and may be delivered to user space right before it is * frozen, so the event will remain only partially processed until the system is * woken up by another event. In particular, it won't cause the transition to * a sleep state to be aborted. * * This difficulty may be overcome if user space uses 'wakeup_count' before * writing to 'state'. It first should read from 'wakeup_count' and store * the read value. Then, after carrying out its own preparations for the system * transition to a sleep state, it should write the stored value to * 'wakeup_count'. If that fails, at least one wakeup event has occurred since * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it * is allowed to write to 'state', but the transition will be aborted if there * are any wakeup events detected after 'wakeup_count' was written to. */ static ssize_t wakeup_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { unsigned int val; return pm_get_wakeup_count(&val, true) ? sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned int val; int error; error = pm_autosleep_lock(); if (error) return error; if (pm_autosleep_state() > PM_SUSPEND_ON) { error = -EBUSY; goto out; } error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) error = n; else pm_print_active_wakeup_sources(); } out: pm_autosleep_unlock(); return error; } power_attr(wakeup_count); #ifdef CONFIG_PM_AUTOSLEEP static ssize_t autosleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { suspend_state_t state = pm_autosleep_state(); if (state == PM_SUSPEND_ON) return sprintf(buf, "off\n"); #ifdef CONFIG_SUSPEND if (state < PM_SUSPEND_MAX) return sprintf(buf, "%s\n", pm_states[state] ? pm_states[state] : "error"); #endif #ifdef CONFIG_HIBERNATION return sprintf(buf, "disk\n"); #else return sprintf(buf, "error"); #endif } static ssize_t autosleep_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { suspend_state_t state = decode_state(buf, n); int error; if (state == PM_SUSPEND_ON && strcmp(buf, "off") && strcmp(buf, "off\n")) return -EINVAL; if (state == PM_SUSPEND_MEM) state = mem_sleep_current; error = pm_autosleep_set_state(state); return error ? error : n; } power_attr(autosleep); #endif /* CONFIG_PM_AUTOSLEEP */ #ifdef CONFIG_PM_WAKELOCKS static ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return pm_show_wakelocks(buf, true); } static ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int error = pm_wake_lock(buf); return error ? error : n; } power_attr(wake_lock); static ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return pm_show_wakelocks(buf, false); } static ssize_t wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int error = pm_wake_unlock(buf); return error ? error : n; } power_attr(wake_unlock); #endif /* CONFIG_PM_WAKELOCKS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE int pm_trace_enabled; static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", pm_trace_enabled); } static ssize_t pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int val; if (sscanf(buf, "%d", &val) == 1) { pm_trace_enabled = !!val; if (pm_trace_enabled) { pr_warn("PM: Enabling pm_trace changes system date and time during resume.\n" "PM: Correct system time has to be restored manually after resume.\n"); } return n; } return -EINVAL; } power_attr(pm_trace); static ssize_t pm_trace_dev_match_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return show_trace_dev_match(buf, PAGE_SIZE); } power_attr_ro(pm_trace_dev_match); #endif /* CONFIG_PM_TRACE */ #ifdef CONFIG_FREEZER static ssize_t pm_freeze_timeout_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", freeze_timeout_msecs); } static ssize_t pm_freeze_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; freeze_timeout_msecs = val; return n; } power_attr(pm_freeze_timeout); #endif /* CONFIG_FREEZER*/ static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE &pm_trace_attr.attr, &pm_trace_dev_match_attr.attr, #endif #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, #ifdef CONFIG_SUSPEND &mem_sleep_attr.attr, #endif #ifdef CONFIG_PM_AUTOSLEEP &autosleep_attr.attr, #endif #ifdef CONFIG_PM_WAKELOCKS &wake_lock_attr.attr, &wake_unlock_attr.attr, #endif #ifdef CONFIG_PM_SLEEP_DEBUG &pm_test_attr.attr, &pm_print_times_attr.attr, &pm_wakeup_irq_attr.attr, &pm_debug_messages_attr.attr, #endif #endif #ifdef CONFIG_FREEZER &pm_freeze_timeout_attr.attr, #endif NULL, }; static const struct attribute_group attr_group = { .attrs = g, }; struct workqueue_struct *pm_wq; EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); return pm_wq ? 0 : -ENOMEM; } static int __init pm_init(void) { int error = pm_start_workqueue(); if (error) return error; hibernate_image_size_init(); hibernate_reserved_size_init(); pm_states_init(); power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; error = sysfs_create_group(power_kobj, &attr_group); if (error) return error; pm_print_times_init(); return pm_autosleep_init(); } core_initcall(pm_init);
3715 5912 248 18 6910 4405 6557 132 68 1226 11155 1 1377 2420 33665 13636 24473 251 5285 51 76 8770 259 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_BITOPS_H #define _ASM_X86_BITOPS_H /* * Copyright 1992, Linus Torvalds. * * Note: inlines with more than a single statement should be marked * __always_inline to avoid problems with older gcc's inlining heuristics. */ #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif #include <linux/compiler.h> #include <asm/alternative.h> #include <asm/rmwcc.h> #include <asm/barrier.h> #if BITS_PER_LONG == 32 # define _BITOPS_LONG_SHIFT 5 #elif BITS_PER_LONG == 64 # define _BITOPS_LONG_SHIFT 6 #else # error "Unexpected BITS_PER_LONG" #endif #define BIT_64(n) (U64_C(1) << (n)) /* * These have to be done with inline assembly: that way the bit-setting * is guaranteed to be atomic. All bit operations return 0 if the bit * was cleared before the operation and != 0 if it was not. * * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ #define RLONG_ADDR(x) "m" (*(volatile long *) (x)) #define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) #define ADDR RLONG_ADDR(addr) /* * We do the locked ops that don't return the old value as * a mask operation on a byte. */ #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) /** * set_bit - Atomically set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * This function is atomic and may not be reordered. See __set_bit() * if you do not require the atomic guarantees. * * Note: there are no guarantees that this function will not be reordered * on non x86 architectures, so if you are writing portable code, * make sure not to rely on its reordering guarantees. * * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ static __always_inline void set_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr)) : "memory"); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } /** * __set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * Unlike set_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } /** * clear_bit - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * clear_bit() is atomic and may not be reordered. However, it does * not contain a memory barrier, so if it is used for locking purposes, * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() * in order to ensure changes are visible on other processors. */ static __always_inline void clear_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)~CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } /* * clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) { bool negative; asm volatile(LOCK_PREFIX "andb %2,%1" CC_SET(s) : CC_OUT(s) (negative), WBYTE_ADDR(addr) : "ir" ((char) ~(1 << nr)) : "memory"); return negative; } // Let everybody know we have it #define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte /* * __clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * __clear_bit() is non-atomic and implies release semantics before the memory * operation. It can be used for an unlock if no other CPUs can concurrently * modify other bits in the word. */ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) { __clear_bit(nr, addr); } /** * __change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * * Unlike change_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } /** * change_bit - Toggle a bit in memory * @nr: Bit to change * @addr: Address to start counting from * * change_bit() is atomic and may not be reordered. * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ static __always_inline void change_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "xorb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } /** * test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", c); } /** * test_and_set_bit_lock - Set a bit and return its old value for lock * @nr: Bit to set * @addr: Address to count from * * This is the same as test_and_set_bit on x86. */ static __always_inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } /** * __test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is non-atomic and can be reordered. * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { bool oldbit; asm(__ASM_SIZE(bts) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } /** * test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, "Ir", nr, "%0", c); } /** * __test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * * This operation is non-atomic and can be reordered. * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. * * Note: the operation is performed atomically with respect to * the local CPU, but not other CPUs. Portable code should not * rely on this behaviour. * KVM relies on this behaviour on x86 for modifying memory that is also * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(btr) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } /* WARNING: non atomic and it can be reordered! */ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(btc) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } /** * test_and_change_bit - Change a bit and return its old value * @nr: Bit to change * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, "Ir", nr, "%0", c); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(bt) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); return oldbit; } #if 0 /* Fool kernel-doc since it doesn't do macros yet */ /** * test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ static bool test_bit(int nr, const volatile unsigned long *addr); #endif #define test_bit(nr, addr) \ (__builtin_constant_p((nr)) \ ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) /** * __ffs - find first set bit in word * @word: The word to search * * Undefined if no bit exists, so code should check against 0 first. */ static __always_inline unsigned long __ffs(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; } /** * ffz - find first zero bit in word * @word: The word to search * * Undefined if no zero exists, so code should check against ~0UL first. */ static __always_inline unsigned long ffz(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; } /* * __fls: find last set bit in word * @word: The word to search * * Undefined if no set bit exists, so code should check against 0 first. */ static __always_inline unsigned long __fls(unsigned long word) { asm("bsr %1,%0" : "=r" (word) : "rm" (word)); return word; } #undef ADDR #ifdef __KERNEL__ /** * ffs - find first set bit in word * @x: the word to search * * This is defined the same way as the libc and compiler builtin ffs * routines, therefore differs in spirit from the other bitops. * * ffs(value) returns 0 if value is 0 or the position of the first * set bit if value is nonzero. The first (least significant) bit * is at position 1. */ static __always_inline int ffs(int x) { int r; #ifdef CONFIG_X86_64 /* * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before, except that the * top 32 bits will be cleared. * * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ asm("bsfl %1,%0" : "=r" (r) : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" : "=&r" (r) : "rm" (x), "r" (-1)); #else asm("bsfl %1,%0\n\t" "jnz 1f\n\t" "movl $-1,%0\n" "1:" : "=r" (r) : "rm" (x)); #endif return r + 1; } /** * fls - find last set bit in word * @x: the word to search * * This is defined in a similar way as the libc and compiler builtin * ffs, but returns the position of the most significant set bit. * * fls(value) returns 0 if value is 0 or the position of the last * set bit if value is nonzero. The last (most significant) bit is * at position 32. */ static __always_inline int fls(int x) { int r; #ifdef CONFIG_X86_64 /* * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before, except that the * top 32 bits will be cleared. * * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ asm("bsrl %1,%0" : "=r" (r) : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" : "=&r" (r) : "rm" (x), "rm" (-1)); #else asm("bsrl %1,%0\n\t" "jnz 1f\n\t" "movl $-1,%0\n" "1:" : "=r" (r) : "rm" (x)); #endif return r + 1; } /** * fls64 - find last set bit in a 64-bit word * @x: the word to search * * This is defined in a similar way as the libc and compiler builtin * ffsll, but returns the position of the most significant set bit. * * fls64(value) returns 0 if value is 0 or the position of the last * set bit if value is nonzero. The last (most significant) bit is * at position 64. */ #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } #else #include <asm-generic/bitops/fls64.h> #endif #include <asm-generic/bitops/find.h> #include <asm-generic/bitops/sched.h> #include <asm/arch_hweight.h> #include <asm-generic/bitops/const_hweight.h> #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic-setbit.h> #endif /* __KERNEL__ */ #endif /* _ASM_X86_BITOPS_H */
11 11 11 11 11 11 11 11 11 11 11 11 11 11 7 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 /* PKCS#7 parser * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #define pr_fmt(fmt) "PKCS7: "fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/oid_registry.h> #include <crypto/public_key.h> #include "pkcs7_parser.h" #include "pkcs7.asn1.h" MODULE_DESCRIPTION("PKCS#7 parser"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); struct pkcs7_parse_context { struct pkcs7_message *msg; /* Message being constructed */ struct pkcs7_signed_info *sinfo; /* SignedInfo being constructed */ struct pkcs7_signed_info **ppsinfo; struct x509_certificate *certs; /* Certificate cache */ struct x509_certificate **ppcerts; unsigned long data; /* Start of data */ enum OID last_oid; /* Last OID encountered */ unsigned x509_index; unsigned sinfo_index; const void *raw_serial; unsigned raw_serial_size; unsigned raw_issuer_size; const void *raw_issuer; const void *raw_skid; unsigned raw_skid_size; bool expect_skid; }; /* * Free a signed information block. */ static void pkcs7_free_signed_info(struct pkcs7_signed_info *sinfo) { if (sinfo) { public_key_signature_free(sinfo->sig); kfree(sinfo); } } /** * pkcs7_free_message - Free a PKCS#7 message * @pkcs7: The PKCS#7 message to free */ void pkcs7_free_message(struct pkcs7_message *pkcs7) { struct x509_certificate *cert; struct pkcs7_signed_info *sinfo; if (pkcs7) { while (pkcs7->certs) { cert = pkcs7->certs; pkcs7->certs = cert->next; x509_free_certificate(cert); } while (pkcs7->crl) { cert = pkcs7->crl; pkcs7->crl = cert->next; x509_free_certificate(cert); } while (pkcs7->signed_infos) { sinfo = pkcs7->signed_infos; pkcs7->signed_infos = sinfo->next; pkcs7_free_signed_info(sinfo); } kfree(pkcs7); } } EXPORT_SYMBOL_GPL(pkcs7_free_message); /* * Check authenticatedAttributes are provided or not provided consistently. */ static int pkcs7_check_authattrs(struct pkcs7_message *msg) { struct pkcs7_signed_info *sinfo; bool want = false; sinfo = msg->signed_infos; if (!sinfo) goto inconsistent; if (sinfo->authattrs) { want = true; msg->have_authattrs = true; } for (sinfo = sinfo->next; sinfo; sinfo = sinfo->next) if (!!sinfo->authattrs != want) goto inconsistent; return 0; inconsistent: pr_warn("Inconsistently supplied authAttrs\n"); return -EINVAL; } /** * pkcs7_parse_message - Parse a PKCS#7 message * @data: The raw binary ASN.1 encoded message to be parsed * @datalen: The size of the encoded message */ struct pkcs7_message *pkcs7_parse_message(const void *data, size_t datalen) { struct pkcs7_parse_context *ctx; struct pkcs7_message *msg = ERR_PTR(-ENOMEM); int ret; ctx = kzalloc(sizeof(struct pkcs7_parse_context), GFP_KERNEL); if (!ctx) goto out_no_ctx; ctx->msg = kzalloc(sizeof(struct pkcs7_message), GFP_KERNEL); if (!ctx->msg) goto out_no_msg; ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL); if (!ctx->sinfo) goto out_no_sinfo; ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!ctx->sinfo->sig) goto out_no_sig; ctx->data = (unsigned long)data; ctx->ppcerts = &ctx->certs; ctx->ppsinfo = &ctx->msg->signed_infos; /* Attempt to decode the signature */ ret = asn1_ber_decoder(&pkcs7_decoder, ctx, data, datalen); if (ret < 0) { msg = ERR_PTR(ret); goto out; } ret = pkcs7_check_authattrs(ctx->msg); if (ret < 0) { msg = ERR_PTR(ret); goto out; } msg = ctx->msg; ctx->msg = NULL; out: while (ctx->certs) { struct x509_certificate *cert = ctx->certs; ctx->certs = cert->next; x509_free_certificate(cert); } out_no_sig: pkcs7_free_signed_info(ctx->sinfo); out_no_sinfo: pkcs7_free_message(ctx->msg); out_no_msg: kfree(ctx); out_no_ctx: return msg; } EXPORT_SYMBOL_GPL(pkcs7_parse_message); /** * pkcs7_get_content_data - Get access to the PKCS#7 content * @pkcs7: The preparsed PKCS#7 message to access * @_data: Place to return a pointer to the data * @_data_len: Place to return the data length * @_headerlen: Size of ASN.1 header not included in _data * * Get access to the data content of the PKCS#7 message. The size of the * header of the ASN.1 object that contains it is also provided and can be used * to adjust *_data and *_data_len to get the entire object. * * Returns -ENODATA if the data object was missing from the message. */ int pkcs7_get_content_data(const struct pkcs7_message *pkcs7, const void **_data, size_t *_data_len, size_t *_headerlen) { if (!pkcs7->data) return -ENODATA; *_data = pkcs7->data; *_data_len = pkcs7->data_len; if (_headerlen) *_headerlen = pkcs7->data_hdrlen; return 0; } EXPORT_SYMBOL_GPL(pkcs7_get_content_data); /* * Note an OID when we find one for later processing when we know how * to interpret it. */ int pkcs7_note_OID(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->last_oid = look_up_OID(value, vlen); if (ctx->last_oid == OID__NR) { char buffer[50]; sprint_oid(value, vlen, buffer, sizeof(buffer)); printk("PKCS7: Unknown OID: [%lu] %s\n", (unsigned long)value - ctx->data, buffer); } return 0; } /* * Note the digest algorithm for the signature. */ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { case OID_md4: ctx->sinfo->sig->hash_algo = "md4"; break; case OID_md5: ctx->sinfo->sig->hash_algo = "md5"; break; case OID_sha1: ctx->sinfo->sig->hash_algo = "sha1"; break; case OID_sha256: ctx->sinfo->sig->hash_algo = "sha256"; break; case OID_sha384: ctx->sinfo->sig->hash_algo = "sha384"; break; case OID_sha512: ctx->sinfo->sig->hash_algo = "sha512"; break; case OID_sha224: ctx->sinfo->sig->hash_algo = "sha224"; break; default: printk("Unsupported digest algo: %u\n", ctx->last_oid); return -ENOPKG; } return 0; } /* * Note the public key algorithm for the signature. */ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { case OID_rsaEncryption: ctx->sinfo->sig->pkey_algo = "rsa"; break; default: printk("Unsupported pkey algo: %u\n", ctx->last_oid); return -ENOPKG; } return 0; } /* * We only support signed data [RFC2315 sec 9]. */ int pkcs7_check_content_type(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; if (ctx->last_oid != OID_signed_data) { pr_warn("Only support pkcs7_signedData type\n"); return -EINVAL; } return 0; } /* * Note the SignedData version */ int pkcs7_note_signeddata_version(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; unsigned version; if (vlen != 1) goto unsupported; ctx->msg->version = version = *(const u8 *)value; switch (version) { case 1: /* PKCS#7 SignedData [RFC2315 sec 9.1] * CMS ver 1 SignedData [RFC5652 sec 5.1] */ break; case 3: /* CMS ver 3 SignedData [RFC2315 sec 5.1] */ break; default: goto unsupported; } return 0; unsupported: pr_warn("Unsupported SignedData version\n"); return -EINVAL; } /* * Note the SignerInfo version */ int pkcs7_note_signerinfo_version(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; unsigned version; if (vlen != 1) goto unsupported; version = *(const u8 *)value; switch (version) { case 1: /* PKCS#7 SignerInfo [RFC2315 sec 9.2] * CMS ver 1 SignerInfo [RFC5652 sec 5.3] */ if (ctx->msg->version != 1) goto version_mismatch; ctx->expect_skid = false; break; case 3: /* CMS ver 3 SignerInfo [RFC2315 sec 5.3] */ if (ctx->msg->version == 1) goto version_mismatch; ctx->expect_skid = true; break; default: goto unsupported; } return 0; unsupported: pr_warn("Unsupported SignerInfo version\n"); return -EINVAL; version_mismatch: pr_warn("SignedData-SignerInfo version mismatch\n"); return -EBADMSG; } /* * Extract a certificate and store it in the context. */ int pkcs7_extract_cert(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct x509_certificate *x509; if (tag != ((ASN1_UNIV << 6) | ASN1_CONS_BIT | ASN1_SEQ)) { pr_debug("Cert began with tag %02x at %lu\n", tag, (unsigned long)ctx - ctx->data); return -EBADMSG; } /* We have to correct for the header so that the X.509 parser can start * from the beginning. Note that since X.509 stipulates DER, there * probably shouldn't be an EOC trailer - but it is in PKCS#7 (which * stipulates BER). */ value -= hdrlen; vlen += hdrlen; if (((u8*)value)[1] == 0x80) vlen += 2; /* Indefinite length - there should be an EOC */ x509 = x509_cert_parse(value, vlen); if (IS_ERR(x509)) return PTR_ERR(x509); x509->index = ++ctx->x509_index; pr_debug("Got cert %u for %s\n", x509->index, x509->subject); pr_debug("- fingerprint %*phN\n", x509->id->len, x509->id->data); *ctx->ppcerts = x509; ctx->ppcerts = &x509->next; return 0; } /* * Save the certificate list */ int pkcs7_note_certificate_list(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_devel("Got cert list (%02x)\n", tag); *ctx->ppcerts = ctx->msg->certs; ctx->msg->certs = ctx->certs; ctx->certs = NULL; ctx->ppcerts = &ctx->certs; return 0; } /* * Note the content type. */ int pkcs7_note_content(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; if (ctx->last_oid != OID_data && ctx->last_oid != OID_msIndirectData) { pr_warn("Unsupported data type %d\n", ctx->last_oid); return -EINVAL; } ctx->msg->data_type = ctx->last_oid; return 0; } /* * Extract the data from the message and store that and its content type OID in * the context. */ int pkcs7_note_data(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_debug("Got data\n"); ctx->msg->data = value; ctx->msg->data_len = vlen; ctx->msg->data_hdrlen = hdrlen; return 0; } /* * Parse authenticated attributes. */ int pkcs7_sig_note_authenticated_attr(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; enum OID content_type; pr_devel("AuthAttr: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value); switch (ctx->last_oid) { case OID_contentType: if (__test_and_set_bit(sinfo_has_content_type, &sinfo->aa_set)) goto repeated; content_type = look_up_OID(value, vlen); if (content_type != ctx->msg->data_type) { pr_warn("Mismatch between global data type (%d) and sinfo %u (%d)\n", ctx->msg->data_type, sinfo->index, content_type); return -EBADMSG; } return 0; case OID_signingTime: if (__test_and_set_bit(sinfo_has_signing_time, &sinfo->aa_set)) goto repeated; /* Should we check that the signing time is consistent * with the signer's X.509 cert? */ return x509_decode_time(&sinfo->signing_time, hdrlen, tag, value, vlen); case OID_messageDigest: if (__test_and_set_bit(sinfo_has_message_digest, &sinfo->aa_set)) goto repeated; if (tag != ASN1_OTS) return -EBADMSG; sinfo->msgdigest = value; sinfo->msgdigest_len = vlen; return 0; case OID_smimeCapabilites: if (__test_and_set_bit(sinfo_has_smime_caps, &sinfo->aa_set)) goto repeated; if (ctx->msg->data_type != OID_msIndirectData) { pr_warn("S/MIME Caps only allowed with Authenticode\n"); return -EKEYREJECTED; } return 0; /* Microsoft SpOpusInfo seems to be contain cont[0] 16-bit BE * char URLs and cont[1] 8-bit char URLs. * * Microsoft StatementType seems to contain a list of OIDs that * are also used as extendedKeyUsage types in X.509 certs. */ case OID_msSpOpusInfo: if (__test_and_set_bit(sinfo_has_ms_opus_info, &sinfo->aa_set)) goto repeated; goto authenticode_check; case OID_msStatementType: if (__test_and_set_bit(sinfo_has_ms_statement_type, &sinfo->aa_set)) goto repeated; authenticode_check: if (ctx->msg->data_type != OID_msIndirectData) { pr_warn("Authenticode AuthAttrs only allowed with Authenticode\n"); return -EKEYREJECTED; } /* I'm not sure how to validate these */ return 0; default: return 0; } repeated: /* We permit max one item per AuthenticatedAttribute and no repeats */ pr_warn("Repeated/multivalue AuthAttrs not permitted\n"); return -EKEYREJECTED; } /* * Note the set of auth attributes for digestion purposes [RFC2315 sec 9.3] */ int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) || !test_bit(sinfo_has_message_digest, &sinfo->aa_set)) { pr_warn("Missing required AuthAttr\n"); return -EBADMSG; } if (ctx->msg->data_type != OID_msIndirectData && test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set)) { pr_warn("Unexpected Authenticode AuthAttr\n"); return -EBADMSG; } /* We need to switch the 'CONT 0' to a 'SET OF' when we digest */ sinfo->authattrs = value - (hdrlen - 1); sinfo->authattrs_len = vlen + (hdrlen - 1); return 0; } /* * Note the issuing certificate serial number */ int pkcs7_sig_note_serial(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->raw_serial = value; ctx->raw_serial_size = vlen; return 0; } /* * Note the issuer's name */ int pkcs7_sig_note_issuer(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->raw_issuer = value; ctx->raw_issuer_size = vlen; return 0; } /* * Note the issuing cert's subjectKeyIdentifier */ int pkcs7_sig_note_skid(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_devel("SKID: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value); ctx->raw_skid = value; ctx->raw_skid_size = vlen; return 0; } /* * Note the signature data */ int pkcs7_sig_note_signature(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->sinfo->sig->s = kmemdup(value, vlen, GFP_KERNEL); if (!ctx->sinfo->sig->s) return -ENOMEM; ctx->sinfo->sig->s_size = vlen; return 0; } /* * Note a signature information block */ int pkcs7_note_signed_info(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; struct asymmetric_key_id *kid; if (ctx->msg->data_type == OID_msIndirectData && !sinfo->authattrs) { pr_warn("Authenticode requires AuthAttrs\n"); return -EBADMSG; } /* Generate cert issuer + serial number key ID */ if (!ctx->expect_skid) { kid = asymmetric_key_generate_id(ctx->raw_serial, ctx->raw_serial_size, ctx->raw_issuer, ctx->raw_issuer_size); } else { kid = asymmetric_key_generate_id(ctx->raw_skid, ctx->raw_skid_size, "", 0); } if (IS_ERR(kid)) return PTR_ERR(kid); pr_devel("SINFO KID: %u [%*phN]\n", kid->len, kid->len, kid->data); sinfo->sig->auth_ids[0] = kid; sinfo->index = ++ctx->sinfo_index; *ctx->ppsinfo = sinfo; ctx->ppsinfo = &sinfo->next; ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL); if (!ctx->sinfo) return -ENOMEM; ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!ctx->sinfo->sig) return -ENOMEM; return 0; }
11 11 11 14 11 11 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 // SPDX-License-Identifier: GPL-2.0 /* * drivers/usb/core/sysfs.c * * (C) Copyright 2002 David Brownell * (C) Copyright 2002,2004 Greg Kroah-Hartman * (C) Copyright 2002,2004 IBM Corp. * * All of the sysfs file attributes for usb devices and interfaces. * * Released under the GPLv2 only. */ #include <linux/kernel.h> #include <linux/string.h> #include <linux/usb.h> #include <linux/usb/quirks.h> #include <linux/of.h> #include "usb.h" /* Active configuration fields */ #define usb_actconfig_show(field, format_string) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_device *udev; \ struct usb_host_config *actconfig; \ ssize_t rc; \ \ udev = to_usb_device(dev); \ rc = usb_lock_device_interruptible(udev); \ if (rc < 0) \ return -EINTR; \ actconfig = udev->actconfig; \ if (actconfig) \ rc = sprintf(buf, format_string, \ actconfig->desc.field); \ usb_unlock_device(udev); \ return rc; \ } \ #define usb_actconfig_attr(field, format_string) \ usb_actconfig_show(field, format_string) \ static DEVICE_ATTR_RO(field) usb_actconfig_attr(bNumInterfaces, "%2d\n"); usb_actconfig_attr(bmAttributes, "%2x\n"); static ssize_t bMaxPower_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; struct usb_host_config *actconfig; ssize_t rc; udev = to_usb_device(dev); rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; actconfig = udev->actconfig; if (actconfig) rc = sprintf(buf, "%dmA\n", usb_get_max_power(udev, actconfig)); usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RO(bMaxPower); static ssize_t configuration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; struct usb_host_config *actconfig; ssize_t rc; udev = to_usb_device(dev); rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; actconfig = udev->actconfig; if (actconfig && actconfig->string) rc = sprintf(buf, "%s\n", actconfig->string); usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RO(configuration); /* configuration value is always present, and r/w */ usb_actconfig_show(bConfigurationValue, "%u\n"); static ssize_t bConfigurationValue_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int config, value, rc; if (sscanf(buf, "%d", &config) != 1 || config < -1 || config > 255) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; value = usb_set_configuration(udev, config); usb_unlock_device(udev); return (value < 0) ? value : count; } static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); #ifdef CONFIG_OF static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_node *of_node = dev->of_node; return sprintf(buf, "%pOF\n", of_node); } static DEVICE_ATTR_RO(devspec); #endif /* String fields */ #define usb_string_attr(name) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_device *udev; \ int retval; \ \ udev = to_usb_device(dev); \ retval = usb_lock_device_interruptible(udev); \ if (retval < 0) \ return -EINTR; \ retval = sprintf(buf, "%s\n", udev->name); \ usb_unlock_device(udev); \ return retval; \ } \ static DEVICE_ATTR_RO(name) usb_string_attr(product); usb_string_attr(manufacturer); usb_string_attr(serial); static ssize_t speed_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; char *speed; udev = to_usb_device(dev); switch (udev->speed) { case USB_SPEED_LOW: speed = "1.5"; break; case USB_SPEED_UNKNOWN: case USB_SPEED_FULL: speed = "12"; break; case USB_SPEED_HIGH: speed = "480"; break; case USB_SPEED_WIRELESS: speed = "480"; break; case USB_SPEED_SUPER: speed = "5000"; break; case USB_SPEED_SUPER_PLUS: speed = "10000"; break; default: speed = "unknown"; } return sprintf(buf, "%s\n", speed); } static DEVICE_ATTR_RO(speed); static ssize_t rx_lanes_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->rx_lanes); } static DEVICE_ATTR_RO(rx_lanes); static ssize_t tx_lanes_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->tx_lanes); } static DEVICE_ATTR_RO(tx_lanes); static ssize_t busnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->bus->busnum); } static DEVICE_ATTR_RO(busnum); static ssize_t devnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->devnum); } static DEVICE_ATTR_RO(devnum); static ssize_t devpath_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%s\n", udev->devpath); } static DEVICE_ATTR_RO(devpath); static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; u16 bcdUSB; udev = to_usb_device(dev); bcdUSB = le16_to_cpu(udev->descriptor.bcdUSB); return sprintf(buf, "%2x.%02x\n", bcdUSB >> 8, bcdUSB & 0xff); } static DEVICE_ATTR_RO(version); static ssize_t maxchild_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->maxchild); } static DEVICE_ATTR_RO(maxchild); static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "0x%x\n", udev->quirks); } static DEVICE_ATTR_RO(quirks); static ssize_t avoid_reset_quirk_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%d\n", !!(udev->quirks & USB_QUIRK_RESET)); } static ssize_t avoid_reset_quirk_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int val, rc; if (sscanf(buf, "%d", &val) != 1 || val < 0 || val > 1) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (val) udev->quirks |= USB_QUIRK_RESET; else udev->quirks &= ~USB_QUIRK_RESET; usb_unlock_device(udev); return count; } static DEVICE_ATTR_RW(avoid_reset_quirk); static ssize_t urbnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sprintf(buf, "%d\n", atomic_read(&udev->urbnum)); } static DEVICE_ATTR_RO(urbnum); static ssize_t removable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; char *state; udev = to_usb_device(dev); switch (udev->removable) { case USB_DEVICE_REMOVABLE: state = "removable"; break; case USB_DEVICE_FIXED: state = "fixed"; break; default: state = "unknown"; } return sprintf(buf, "%s\n", state); } static DEVICE_ATTR_RO(removable); static ssize_t ltm_capable_show(struct device *dev, struct device_attribute *attr, char *buf) { if (usb_device_supports_ltm(to_usb_device(dev))) return sprintf(buf, "%s\n", "yes"); return sprintf(buf, "%s\n", "no"); } static DEVICE_ATTR_RO(ltm_capable); #ifdef CONFIG_PM static ssize_t persist_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->persist_enabled); } static ssize_t persist_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int value, rc; /* Hubs are always enabled for USB_PERSIST */ if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) return -EPERM; if (sscanf(buf, "%d", &value) != 1) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; udev->persist_enabled = !!value; usb_unlock_device(udev); return count; } static DEVICE_ATTR_RW(persist); static int add_persist_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); /* Hubs are automatically enabled for USB_PERSIST, * no point in creating the attribute file. */ if (udev->descriptor.bDeviceClass != USB_CLASS_HUB) rc = sysfs_add_file_to_group(&dev->kobj, &dev_attr_persist.attr, power_group_name); } return rc; } static void remove_persist_attributes(struct device *dev) { sysfs_remove_file_from_group(&dev->kobj, &dev_attr_persist.attr, power_group_name); } static ssize_t connected_duration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sprintf(buf, "%u\n", jiffies_to_msecs(jiffies - udev->connect_time)); } static DEVICE_ATTR_RO(connected_duration); /* * If the device is resumed, the last time the device was suspended has * been pre-subtracted from active_duration. We add the current time to * get the duration that the device was actually active. * * If the device is suspended, the active_duration is up-to-date. */ static ssize_t active_duration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); int duration; if (udev->state != USB_STATE_SUSPENDED) duration = jiffies_to_msecs(jiffies + udev->active_duration); else duration = jiffies_to_msecs(udev->active_duration); return sprintf(buf, "%u\n", duration); } static DEVICE_ATTR_RO(active_duration); static ssize_t autosuspend_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", dev->power.autosuspend_delay / 1000); } static ssize_t autosuspend_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int value; if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/1000 || value <= -INT_MAX/1000) return -EINVAL; pm_runtime_set_autosuspend_delay(dev, value * 1000); return count; } static DEVICE_ATTR_RW(autosuspend); static const char on_string[] = "on"; static const char auto_string[] = "auto"; static void warn_level(void) { static int level_warned; if (!level_warned) { level_warned = 1; printk(KERN_WARNING "WARNING! power/level is deprecated; " "use power/control instead\n"); } } static ssize_t level_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p = auto_string; warn_level(); if (udev->state != USB_STATE_SUSPENDED && !udev->dev.power.runtime_auto) p = on_string; return sprintf(buf, "%s\n", p); } static ssize_t level_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int len = count; char *cp; int rc = count; int rv; warn_level(); cp = memchr(buf, '\n', count); if (cp) len = cp - buf; rv = usb_lock_device_interruptible(udev); if (rv < 0) return -EINTR; if (len == sizeof on_string - 1 && strncmp(buf, on_string, len) == 0) usb_disable_autosuspend(udev); else if (len == sizeof auto_string - 1 && strncmp(buf, auto_string, len) == 0) usb_enable_autosuspend(udev); else rc = -EINVAL; usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RW(level); static ssize_t usb2_hardware_lpm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; if (udev->usb2_hw_lpm_allowed == 1) p = "enabled"; else p = "disabled"; return sprintf(buf, "%s\n", p); } static ssize_t usb2_hardware_lpm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); bool value; int ret; ret = usb_lock_device_interruptible(udev); if (ret < 0) return -EINTR; ret = strtobool(buf, &value); if (!ret) { udev->usb2_hw_lpm_allowed = value; if (value) ret = usb_enable_usb2_hardware_lpm(udev); else ret = usb_disable_usb2_hardware_lpm(udev); } usb_unlock_device(udev); if (!ret) return count; return ret; } static DEVICE_ATTR_RW(usb2_hardware_lpm); static ssize_t usb2_lpm_l1_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->l1_params.timeout); } static ssize_t usb2_lpm_l1_timeout_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); u16 timeout; if (kstrtou16(buf, 0, &timeout)) return -EINVAL; udev->l1_params.timeout = timeout; return count; } static DEVICE_ATTR_RW(usb2_lpm_l1_timeout); static ssize_t usb2_lpm_besl_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sprintf(buf, "%d\n", udev->l1_params.besl); } static ssize_t usb2_lpm_besl_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); u8 besl; if (kstrtou8(buf, 0, &besl) || besl > 15) return -EINVAL; udev->l1_params.besl = besl; return count; } static DEVICE_ATTR_RW(usb2_lpm_besl); static ssize_t usb3_hardware_lpm_u1_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; int rc; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (udev->usb3_lpm_u1_enabled) p = "enabled"; else p = "disabled"; usb_unlock_device(udev); return sprintf(buf, "%s\n", p); } static DEVICE_ATTR_RO(usb3_hardware_lpm_u1); static ssize_t usb3_hardware_lpm_u2_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; int rc; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (udev->usb3_lpm_u2_enabled) p = "enabled"; else p = "disabled"; usb_unlock_device(udev); return sprintf(buf, "%s\n", p); } static DEVICE_ATTR_RO(usb3_hardware_lpm_u2); static struct attribute *usb2_hardware_lpm_attr[] = { &dev_attr_usb2_hardware_lpm.attr, &dev_attr_usb2_lpm_l1_timeout.attr, &dev_attr_usb2_lpm_besl.attr, NULL, }; static struct attribute_group usb2_hardware_lpm_attr_group = { .name = power_group_name, .attrs = usb2_hardware_lpm_attr, }; static struct attribute *usb3_hardware_lpm_attr[] = { &dev_attr_usb3_hardware_lpm_u1.attr, &dev_attr_usb3_hardware_lpm_u2.attr, NULL, }; static struct attribute_group usb3_hardware_lpm_attr_group = { .name = power_group_name, .attrs = usb3_hardware_lpm_attr, }; static struct attribute *power_attrs[] = { &dev_attr_autosuspend.attr, &dev_attr_level.attr, &dev_attr_connected_duration.attr, &dev_attr_active_duration.attr, NULL, }; static struct attribute_group power_attr_group = { .name = power_group_name, .attrs = power_attrs, }; static int add_power_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); rc = sysfs_merge_group(&dev->kobj, &power_attr_group); if (udev->usb2_hw_lpm_capable == 1) rc = sysfs_merge_group(&dev->kobj, &usb2_hardware_lpm_attr_group); if ((udev->speed == USB_SPEED_SUPER || udev->speed == USB_SPEED_SUPER_PLUS) && udev->lpm_capable == 1) rc = sysfs_merge_group(&dev->kobj, &usb3_hardware_lpm_attr_group); } return rc; } static void remove_power_attributes(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &usb2_hardware_lpm_attr_group); sysfs_unmerge_group(&dev->kobj, &power_attr_group); } #else #define add_persist_attributes(dev) 0 #define remove_persist_attributes(dev) do {} while (0) #define add_power_attributes(dev) 0 #define remove_power_attributes(dev) do {} while (0) #endif /* CONFIG_PM */ /* Descriptor fields */ #define usb_descriptor_attr_le16(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_device *udev; \ \ udev = to_usb_device(dev); \ return sprintf(buf, format_string, \ le16_to_cpu(udev->descriptor.field)); \ } \ static DEVICE_ATTR_RO(field) usb_descriptor_attr_le16(idVendor, "%04x\n"); usb_descriptor_attr_le16(idProduct, "%04x\n"); usb_descriptor_attr_le16(bcdDevice, "%04x\n"); #define usb_descriptor_attr(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_device *udev; \ \ udev = to_usb_device(dev); \ return sprintf(buf, format_string, udev->descriptor.field); \ } \ static DEVICE_ATTR_RO(field) usb_descriptor_attr(bDeviceClass, "%02x\n"); usb_descriptor_attr(bDeviceSubClass, "%02x\n"); usb_descriptor_attr(bDeviceProtocol, "%02x\n"); usb_descriptor_attr(bNumConfigurations, "%d\n"); usb_descriptor_attr(bMaxPacketSize0, "%d\n"); /* show if the device is authorized (1) or not (0) */ static ssize_t authorized_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *usb_dev = to_usb_device(dev); return snprintf(buf, PAGE_SIZE, "%u\n", usb_dev->authorized); } /* * Authorize a device to be used in the system * * Writing a 0 deauthorizes the device, writing a 1 authorizes it. */ static ssize_t authorized_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { ssize_t result; struct usb_device *usb_dev = to_usb_device(dev); unsigned val; result = sscanf(buf, "%u\n", &val); if (result != 1) result = -EINVAL; else if (val == 0) result = usb_deauthorize_device(usb_dev); else result = usb_authorize_device(usb_dev); return result < 0 ? result : size; } static DEVICE_ATTR_IGNORE_LOCKDEP(authorized, S_IRUGO | S_IWUSR, authorized_show, authorized_store); /* "Safely remove a device" */ static ssize_t remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int rc = 0; usb_lock_device(udev); if (udev->state != USB_STATE_NOTATTACHED) { /* To avoid races, first unconfigure and then remove */ usb_set_configuration(udev, -1); rc = usb_remove_device(udev); } if (rc == 0) rc = count; usb_unlock_device(udev); return rc; } static DEVICE_ATTR_IGNORE_LOCKDEP(remove, S_IWUSR, NULL, remove_store); static struct attribute *dev_attrs[] = { /* current configuration's attributes */ &dev_attr_configuration.attr, &dev_attr_bNumInterfaces.attr, &dev_attr_bConfigurationValue.attr, &dev_attr_bmAttributes.attr, &dev_attr_bMaxPower.attr, /* device attributes */ &dev_attr_urbnum.attr, &dev_attr_idVendor.attr, &dev_attr_idProduct.attr, &dev_attr_bcdDevice.attr, &dev_attr_bDeviceClass.attr, &dev_attr_bDeviceSubClass.attr, &dev_attr_bDeviceProtocol.attr, &dev_attr_bNumConfigurations.attr, &dev_attr_bMaxPacketSize0.attr, &dev_attr_speed.attr, &dev_attr_rx_lanes.attr, &dev_attr_tx_lanes.attr, &dev_attr_busnum.attr, &dev_attr_devnum.attr, &dev_attr_devpath.attr, &dev_attr_version.attr, &dev_attr_maxchild.attr, &dev_attr_quirks.attr, &dev_attr_avoid_reset_quirk.attr, &dev_attr_authorized.attr, &dev_attr_remove.attr, &dev_attr_removable.attr, &dev_attr_ltm_capable.attr, #ifdef CONFIG_OF &dev_attr_devspec.attr, #endif NULL, }; static struct attribute_group dev_attr_grp = { .attrs = dev_attrs, }; /* When modifying this list, be sure to modify dev_string_attrs_are_visible() * accordingly. */ static struct attribute *dev_string_attrs[] = { &dev_attr_manufacturer.attr, &dev_attr_product.attr, &dev_attr_serial.attr, NULL }; static umode_t dev_string_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); struct usb_device *udev = to_usb_device(dev); if (a == &dev_attr_manufacturer.attr) { if (udev->manufacturer == NULL) return 0; } else if (a == &dev_attr_product.attr) { if (udev->product == NULL) return 0; } else if (a == &dev_attr_serial.attr) { if (udev->serial == NULL) return 0; } return a->mode; } static struct attribute_group dev_string_attr_grp = { .attrs = dev_string_attrs, .is_visible = dev_string_attrs_are_visible, }; const struct attribute_group *usb_device_groups[] = { &dev_attr_grp, &dev_string_attr_grp, NULL }; /* Binary descriptors */ static ssize_t read_descriptors(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); struct usb_device *udev = to_usb_device(dev); size_t nleft = count; size_t srclen, n; int cfgno; void *src; int retval; retval = usb_lock_device_interruptible(udev); if (retval < 0) return -EINTR; /* The binary attribute begins with the device descriptor. * Following that are the raw descriptor entries for all the * configurations (config plus subsidiary descriptors). */ for (cfgno = -1; cfgno < udev->descriptor.bNumConfigurations && nleft > 0; ++cfgno) { if (cfgno < 0) { src = &udev->descriptor; srclen = sizeof(struct usb_device_descriptor); } else { src = udev->rawdescriptors[cfgno]; srclen = __le16_to_cpu(udev->config[cfgno].desc. wTotalLength); } if (off < srclen) { n = min(nleft, srclen - (size_t) off); memcpy(buf, src + off, n); nleft -= n; buf += n; off = 0; } else { off -= srclen; } } usb_unlock_device(udev); return count - nleft; } static struct bin_attribute dev_bin_attr_descriptors = { .attr = {.name = "descriptors", .mode = 0444}, .read = read_descriptors, .size = 18 + 65535, /* dev descr + max-size raw descriptor */ }; int usb_create_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; int retval; retval = device_create_bin_file(dev, &dev_bin_attr_descriptors); if (retval) goto error; retval = add_persist_attributes(dev); if (retval) goto error; retval = add_power_attributes(dev); if (retval) goto error; return retval; error: usb_remove_sysfs_dev_files(udev); return retval; } void usb_remove_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; remove_power_attributes(dev); remove_persist_attributes(dev); device_remove_bin_file(dev, &dev_bin_attr_descriptors); } /* Interface Association Descriptor fields */ #define usb_intf_assoc_attr(field, format_string) \ static ssize_t \ iad_##field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ \ return sprintf(buf, format_string, \ intf->intf_assoc->field); \ } \ static DEVICE_ATTR_RO(iad_##field) usb_intf_assoc_attr(bFirstInterface, "%02x\n"); usb_intf_assoc_attr(bInterfaceCount, "%02d\n"); usb_intf_assoc_attr(bFunctionClass, "%02x\n"); usb_intf_assoc_attr(bFunctionSubClass, "%02x\n"); usb_intf_assoc_attr(bFunctionProtocol, "%02x\n"); /* Interface fields */ #define usb_intf_attr(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ \ return sprintf(buf, format_string, \ intf->cur_altsetting->desc.field); \ } \ static DEVICE_ATTR_RO(field) usb_intf_attr(bInterfaceNumber, "%02x\n"); usb_intf_attr(bAlternateSetting, "%2d\n"); usb_intf_attr(bNumEndpoints, "%02x\n"); usb_intf_attr(bInterfaceClass, "%02x\n"); usb_intf_attr(bInterfaceSubClass, "%02x\n"); usb_intf_attr(bInterfaceProtocol, "%02x\n"); static ssize_t interface_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf; char *string; intf = to_usb_interface(dev); string = READ_ONCE(intf->cur_altsetting->string); if (!string) return 0; return sprintf(buf, "%s\n", string); } static DEVICE_ATTR_RO(interface); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf; struct usb_device *udev; struct usb_host_interface *alt; intf = to_usb_interface(dev); udev = interface_to_usbdev(intf); alt = READ_ONCE(intf->cur_altsetting); return sprintf(buf, "usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02X" "ic%02Xisc%02Xip%02Xin%02X\n", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct), le16_to_cpu(udev->descriptor.bcdDevice), udev->descriptor.bDeviceClass, udev->descriptor.bDeviceSubClass, udev->descriptor.bDeviceProtocol, alt->desc.bInterfaceClass, alt->desc.bInterfaceSubClass, alt->desc.bInterfaceProtocol, alt->desc.bInterfaceNumber); } static DEVICE_ATTR_RO(modalias); static ssize_t supports_autosuspend_show(struct device *dev, struct device_attribute *attr, char *buf) { int s; s = device_lock_interruptible(dev); if (s < 0) return -EINTR; /* Devices will be autosuspended even when an interface isn't claimed */ s = (!dev->driver || to_usb_driver(dev->driver)->supports_autosuspend); device_unlock(dev); return sprintf(buf, "%u\n", s); } static DEVICE_ATTR_RO(supports_autosuspend); /* * interface_authorized_show - show authorization status of an USB interface * 1 is authorized, 0 is deauthorized */ static ssize_t interface_authorized_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); return sprintf(buf, "%u\n", intf->authorized); } /* * interface_authorized_store - authorize or deauthorize an USB interface */ static ssize_t interface_authorized_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); bool val; if (strtobool(buf, &val) != 0) return -EINVAL; if (val) usb_authorize_interface(intf); else usb_deauthorize_interface(intf); return count; } static struct device_attribute dev_attr_interface_authorized = __ATTR(authorized, S_IRUGO | S_IWUSR, interface_authorized_show, interface_authorized_store); static struct attribute *intf_attrs[] = { &dev_attr_bInterfaceNumber.attr, &dev_attr_bAlternateSetting.attr, &dev_attr_bNumEndpoints.attr, &dev_attr_bInterfaceClass.attr, &dev_attr_bInterfaceSubClass.attr, &dev_attr_bInterfaceProtocol.attr, &dev_attr_modalias.attr, &dev_attr_supports_autosuspend.attr, &dev_attr_interface_authorized.attr, NULL, }; static struct attribute_group intf_attr_grp = { .attrs = intf_attrs, }; static struct attribute *intf_assoc_attrs[] = { &dev_attr_iad_bFirstInterface.attr, &dev_attr_iad_bInterfaceCount.attr, &dev_attr_iad_bFunctionClass.attr, &dev_attr_iad_bFunctionSubClass.attr, &dev_attr_iad_bFunctionProtocol.attr, NULL, }; static umode_t intf_assoc_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); struct usb_interface *intf = to_usb_interface(dev); if (intf->intf_assoc == NULL) return 0; return a->mode; } static struct attribute_group intf_assoc_attr_grp = { .attrs = intf_assoc_attrs, .is_visible = intf_assoc_attrs_are_visible, }; const struct attribute_group *usb_interface_groups[] = { &intf_attr_grp, &intf_assoc_attr_grp, NULL }; void usb_create_sysfs_intf_files(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_host_interface *alt = intf->cur_altsetting; if (intf->sysfs_files_created || intf->unregistering) return; if (!alt->string && !(udev->quirks & USB_QUIRK_CONFIG_INTF_STRINGS)) alt->string = usb_cache_string(udev, alt->desc.iInterface); if (alt->string && device_create_file(&intf->dev, &dev_attr_interface)) ; /* We don't actually care if the function fails. */ intf->sysfs_files_created = 1; } void usb_remove_sysfs_intf_files(struct usb_interface *intf) { if (!intf->sysfs_files_created) return; device_remove_file(&intf->dev, &dev_attr_interface); intf->sysfs_files_created = 0; }
22 70 41 1282 119 117 15 114 97 6 95 24 23 3 2 1 21 20 19 114 111 3 2 4 2 4 7 11 4 7 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H #include <linux/if_vlan.h> #include <uapi/linux/tcp.h> #include <uapi/linux/udp.h> #include <uapi/linux/virtio_net.h> static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) { switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: return protocol == cpu_to_be16(ETH_P_IP); case VIRTIO_NET_HDR_GSO_TCPV6: return protocol == cpu_to_be16(ETH_P_IPV6); case VIRTIO_NET_HDR_GSO_UDP: return protocol == cpu_to_be16(ETH_P_IP) || protocol == cpu_to_be16(ETH_P_IPV6); default: return false; } } static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { if (skb->protocol) return 0; switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: skb->protocol = cpu_to_be16(ETH_P_IP); break; case VIRTIO_NET_HDR_GSO_TCPV6: skb->protocol = cpu_to_be16(ETH_P_IPV6); break; default: return -EINVAL; } return 0; } static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) { unsigned int gso_type = 0; unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; ip_proto = IPPROTO_UDP; thlen = sizeof(struct udphdr); break; default: return -EINVAL; } if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) gso_type |= SKB_GSO_TCP_ECN; if (hdr->gso_size == 0) return -EINVAL; } skb_reset_mac_header(skb); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start); u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); if (!pskb_may_pull(skb, needed)) return -EINVAL; if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; p_off = skb_transport_offset(skb) + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } else { /* gso packets without NEEDS_CSUM do not set transport_offset. * probe and drop if does not match one of the above types. */ if (gso_type && skb->network_header) { struct flow_keys_basic keys; if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); if (!protocol) virtio_net_hdr_set_proto(skb, hdr); else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; else skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) { /* UFO does not specify ipv4 or 6: try both */ if (gso_type & SKB_GSO_UDP && skb->protocol == htons(ETH_P_IP)) { skb->protocol = htons(ETH_P_IPV6); goto retry; } return -EINVAL; } p_off = keys.control.thoff + thlen; if (!pskb_may_pull(skb, p_off) || keys.basic.ip_proto != ip_proto) return -EINVAL; skb_set_transport_header(skb, keys.control.thoff); } else if (gso_type) { p_off = thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); /* UFO may not include transport header in gso_size. */ if (gso_type & SKB_GSO_UDP) nh_off -= thlen; /* Too small packets are not really GSO ones. */ if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; /* Header must be checked, and gso_segs computed. */ shinfo->gso_type |= SKB_GSO_DODGY; shinfo->gso_segs = 0; } } return 0; } static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, bool has_data_valid, int vlan_hlen) { memset(hdr, 0, sizeof(*hdr)); /* no info leak */ if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); /* This is a hint as to how much should be linear. */ hdr->hdr_len = __cpu_to_virtio16(little_endian, skb_headlen(skb)); hdr->gso_size = __cpu_to_virtio16(little_endian, sinfo->gso_size); if (sinfo->gso_type & SKB_GSO_TCPV4) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; if (skb->ip_summed == CHECKSUM_PARTIAL) { hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = __cpu_to_virtio16(little_endian, skb_checksum_start_offset(skb) + vlan_hlen); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); } else if (has_data_valid && skb->ip_summed == CHECKSUM_UNNECESSARY) { hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; } #endif /* _LINUX_VIRTIO_NET_H */
64 64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 /* Helpers for initial module or kernel cmdline parsing Copyright (C) 2001 Rusty Russell. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/device.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/ctype.h> #ifdef CONFIG_SYSFS /* Protects all built-in parameters, modules use their own param_lock */ static DEFINE_MUTEX(param_lock); /* Use the module's mutex, or if built-in use the built-in mutex */ #ifdef CONFIG_MODULES #define KPARAM_MUTEX(mod) ((mod) ? &(mod)->param_lock : &param_lock) #else #define KPARAM_MUTEX(mod) (&param_lock) #endif static inline void check_kparam_locked(struct module *mod) { BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod))); } #else static inline void check_kparam_locked(struct module *mod) { } #endif /* !CONFIG_SYSFS */ /* This just allows us to keep track of which parameters are kmalloced. */ struct kmalloced_param { struct list_head list; char val[]; }; static LIST_HEAD(kmalloced_params); static DEFINE_SPINLOCK(kmalloced_params_lock); static void *kmalloc_parameter(unsigned int size) { struct kmalloced_param *p; p = kmalloc(sizeof(*p) + size, GFP_KERNEL); if (!p) return NULL; spin_lock(&kmalloced_params_lock); list_add(&p->list, &kmalloced_params); spin_unlock(&kmalloced_params_lock); return p->val; } /* Does nothing if parameter wasn't kmalloced above. */ static void maybe_kfree_parameter(void *param) { struct kmalloced_param *p; spin_lock(&kmalloced_params_lock); list_for_each_entry(p, &kmalloced_params, list) { if (p->val == param) { list_del(&p->list); kfree(p); break; } } spin_unlock(&kmalloced_params_lock); } static char dash2underscore(char c) { if (c == '-') return '_'; return c; } bool parameqn(const char *a, const char *b, size_t n) { size_t i; for (i = 0; i < n; i++) { if (dash2underscore(a[i]) != dash2underscore(b[i])) return false; } return true; } bool parameq(const char *a, const char *b) { return parameqn(a, b, strlen(a)+1); } static void param_check_unsafe(const struct kernel_param *kp) { if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { pr_notice("Setting dangerous option %s - tainting kernel\n", kp->name); add_taint(TAINT_USER, LOCKDEP_STILL_OK); } } static int parse_one(char *param, char *val, const char *doing, const struct kernel_param *params, unsigned num_params, s16 min_level, s16 max_level, void *arg, int (*handle_unknown)(char *param, char *val, const char *doing, void *arg)) { unsigned int i; int err; /* Find parameter */ for (i = 0; i < num_params; i++) { if (parameq(param, params[i].name)) { if (params[i].level < min_level || params[i].level > max_level) return 0; /* No one handled NULL, so do it here. */ if (!val && !(params[i].ops->flags & KERNEL_PARAM_OPS_FL_NOARG)) return -EINVAL; pr_debug("handling %s with %p\n", param, params[i].ops->set); kernel_param_lock(params[i].mod); param_check_unsafe(&params[i]); err = params[i].ops->set(val, &params[i]); kernel_param_unlock(params[i].mod); return err; } } if (handle_unknown) { pr_debug("doing %s: %s='%s'\n", doing, param, val); return handle_unknown(param, val, doing, arg); } pr_debug("Unknown argument '%s'\n", param); return -ENOENT; } /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ char *parse_args(const char *doing, char *args, const struct kernel_param *params, unsigned num, s16 min_level, s16 max_level, void *arg, int (*unknown)(char *param, char *val, const char *doing, void *arg)) { char *param, *val, *err = NULL; /* Chew leading spaces */ args = skip_spaces(args); if (*args) pr_debug("doing %s, parsing ARGS: '%s'\n", doing, args); while (*args) { int ret; int irq_was_disabled; args = next_arg(args, &param, &val); /* Stop at -- */ if (!val && strcmp(param, "--") == 0) return err ?: args; irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, doing, params, num, min_level, max_level, arg, unknown); if (irq_was_disabled && !irqs_disabled()) pr_warn("%s: option '%s' enabled irq's!\n", doing, param); switch (ret) { case 0: continue; case -ENOENT: pr_err("%s: Unknown parameter `%s'\n", doing, param); break; case -ENOSPC: pr_err("%s: `%s' too large for parameter `%s'\n", doing, val ?: "", param); break; default: pr_err("%s: `%s' invalid for parameter `%s'\n", doing, val ?: "", param); break; } err = ERR_PTR(ret); } return err; } /* Lazy bastard, eh? */ #define STANDARD_PARAM_DEF(name, type, format, strtolfn) \ int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ return strtolfn(val, 0, (type *)kp->arg); \ } \ int param_get_##name(char *buffer, const struct kernel_param *kp) \ { \ return scnprintf(buffer, PAGE_SIZE, format "\n", \ *((type *)kp->arg)); \ } \ const struct kernel_param_ops param_ops_##name = { \ .set = param_set_##name, \ .get = param_get_##name, \ }; \ EXPORT_SYMBOL(param_set_##name); \ EXPORT_SYMBOL(param_get_##name); \ EXPORT_SYMBOL(param_ops_##name) STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", kstrtou8); STANDARD_PARAM_DEF(short, short, "%hi", kstrtos16); STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", kstrtou16); STANDARD_PARAM_DEF(int, int, "%i", kstrtoint); STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint); STANDARD_PARAM_DEF(long, long, "%li", kstrtol); STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul); STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); int param_set_charp(const char *val, const struct kernel_param *kp) { if (strlen(val) > 1024) { pr_err("%s: string parameter too long\n", kp->name); return -ENOSPC; } maybe_kfree_parameter(*(char **)kp->arg); /* This is a hack. We can't kmalloc in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { *(char **)kp->arg = kmalloc_parameter(strlen(val)+1); if (!*(char **)kp->arg) return -ENOMEM; strcpy(*(char **)kp->arg, val); } else *(const char **)kp->arg = val; return 0; } EXPORT_SYMBOL(param_set_charp); int param_get_charp(char *buffer, const struct kernel_param *kp) { return scnprintf(buffer, PAGE_SIZE, "%s\n", *((char **)kp->arg)); } EXPORT_SYMBOL(param_get_charp); void param_free_charp(void *arg) { maybe_kfree_parameter(*((char **)arg)); } EXPORT_SYMBOL(param_free_charp); const struct kernel_param_ops param_ops_charp = { .set = param_set_charp, .get = param_get_charp, .free = param_free_charp, }; EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, const struct kernel_param *kp) { /* No equals means "set"... */ if (!val) val = "1"; /* One of =[yYnN01] */ return strtobool(val, kp->arg); } EXPORT_SYMBOL(param_set_bool); int param_get_bool(char *buffer, const struct kernel_param *kp) { /* Y and N chosen as being relatively non-coder friendly */ return sprintf(buffer, "%c\n", *(bool *)kp->arg ? 'Y' : 'N'); } EXPORT_SYMBOL(param_get_bool); const struct kernel_param_ops param_ops_bool = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool, .get = param_get_bool, }; EXPORT_SYMBOL(param_ops_bool); int param_set_bool_enable_only(const char *val, const struct kernel_param *kp) { int err = 0; bool new_value; bool orig_value = *(bool *)kp->arg; struct kernel_param dummy_kp = *kp; dummy_kp.arg = &new_value; err = param_set_bool(val, &dummy_kp); if (err) return err; /* Don't let them unset it once it's set! */ if (!new_value && orig_value) return -EROFS; if (new_value) err = param_set_bool(val, kp); return err; } EXPORT_SYMBOL_GPL(param_set_bool_enable_only); const struct kernel_param_ops param_ops_bool_enable_only = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool_enable_only, .get = param_get_bool, }; EXPORT_SYMBOL_GPL(param_ops_bool_enable_only); /* This one must be bool. */ int param_set_invbool(const char *val, const struct kernel_param *kp) { int ret; bool boolval; struct kernel_param dummy; dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) *(bool *)kp->arg = !boolval; return ret; } EXPORT_SYMBOL(param_set_invbool); int param_get_invbool(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%c\n", (*(bool *)kp->arg) ? 'N' : 'Y'); } EXPORT_SYMBOL(param_get_invbool); const struct kernel_param_ops param_ops_invbool = { .set = param_set_invbool, .get = param_get_invbool, }; EXPORT_SYMBOL(param_ops_invbool); int param_set_bint(const char *val, const struct kernel_param *kp) { /* Match bool exactly, by re-using it. */ struct kernel_param boolkp = *kp; bool v; int ret; boolkp.arg = &v; ret = param_set_bool(val, &boolkp); if (ret == 0) *(int *)kp->arg = v; return ret; } EXPORT_SYMBOL(param_set_bint); const struct kernel_param_ops param_ops_bint = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bint, .get = param_get_int, }; EXPORT_SYMBOL(param_ops_bint); /* We break the rule and mangle the string. */ static int param_array(struct module *mod, const char *name, const char *val, unsigned int min, unsigned int max, void *elem, int elemsize, int (*set)(const char *, const struct kernel_param *kp), s16 level, unsigned int *num) { int ret; struct kernel_param kp; char save; /* Get the name right for errors. */ kp.name = name; kp.arg = elem; kp.level = level; *num = 0; /* We expect a comma-separated list of values. */ do { int len; if (*num == max) { pr_err("%s: can only take %i arguments\n", name, max); return -EINVAL; } len = strcspn(val, ","); /* nul-terminate and parse */ save = val[len]; ((char *)val)[len] = '\0'; check_kparam_locked(mod); ret = set(val, &kp); if (ret != 0) return ret; kp.arg += elemsize; val += len+1; (*num)++; } while (save == ','); if (*num < min) { pr_err("%s: needs at least %i arguments\n", name, min); return -EINVAL; } return 0; } static int param_array_set(const char *val, const struct kernel_param *kp) { const struct kparam_array *arr = kp->arr; unsigned int temp_num; return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem, arr->elemsize, arr->ops->set, kp->level, arr->num ?: &temp_num); } static int param_array_get(char *buffer, const struct kernel_param *kp) { int i, off, ret; const struct kparam_array *arr = kp->arr; struct kernel_param p = *kp; for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { /* Replace \n with comma */ if (i) buffer[off - 1] = ','; p.arg = arr->elem + arr->elemsize * i; check_kparam_locked(p.mod); ret = arr->ops->get(buffer + off, &p); if (ret < 0) return ret; off += ret; } buffer[off] = '\0'; return off; } static void param_array_free(void *arg) { unsigned int i; const struct kparam_array *arr = arg; if (arr->ops->free) for (i = 0; i < (arr->num ? *arr->num : arr->max); i++) arr->ops->free(arr->elem + arr->elemsize * i); } const struct kernel_param_ops param_array_ops = { .set = param_array_set, .get = param_array_get, .free = param_array_free, }; EXPORT_SYMBOL(param_array_ops); int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; if (strlen(val)+1 > kps->maxlen) { pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); return -ENOSPC; } strcpy(kps->string, val); return 0; } EXPORT_SYMBOL(param_set_copystring); int param_get_string(char *buffer, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; return scnprintf(buffer, PAGE_SIZE, "%s\n", kps->string); } EXPORT_SYMBOL(param_get_string); const struct kernel_param_ops param_ops_string = { .set = param_set_copystring, .get = param_get_string, }; EXPORT_SYMBOL(param_ops_string); /* sysfs output in /sys/modules/XYZ/parameters/ */ #define to_module_attr(n) container_of(n, struct module_attribute, attr) #define to_module_kobject(n) container_of(n, struct module_kobject, kobj) struct param_attribute { struct module_attribute mattr; const struct kernel_param *param; }; struct module_param_attrs { unsigned int num; struct attribute_group grp; struct param_attribute attrs[0]; }; #ifdef CONFIG_SYSFS #define to_param_attr(n) container_of(n, struct param_attribute, mattr) static ssize_t param_attr_show(struct module_attribute *mattr, struct module_kobject *mk, char *buf) { int count; struct param_attribute *attribute = to_param_attr(mattr); if (!attribute->param->ops->get) return -EPERM; kernel_param_lock(mk->mod); count = attribute->param->ops->get(buf, attribute->param); kernel_param_unlock(mk->mod); return count; } /* sysfs always hands a nul-terminated string in buf. We rely on that. */ static ssize_t param_attr_store(struct module_attribute *mattr, struct module_kobject *mk, const char *buf, size_t len) { int err; struct param_attribute *attribute = to_param_attr(mattr); if (!attribute->param->ops->set) return -EPERM; kernel_param_lock(mk->mod); param_check_unsafe(attribute->param); err = attribute->param->ops->set(buf, attribute->param); kernel_param_unlock(mk->mod); if (!err) return len; return err; } #endif #ifdef CONFIG_MODULES #define __modinit #else #define __modinit __init #endif #ifdef CONFIG_SYSFS void kernel_param_lock(struct module *mod) { mutex_lock(KPARAM_MUTEX(mod)); } void kernel_param_unlock(struct module *mod) { mutex_unlock(KPARAM_MUTEX(mod)); } EXPORT_SYMBOL(kernel_param_lock); EXPORT_SYMBOL(kernel_param_unlock); /* * add_sysfs_param - add a parameter to sysfs * @mk: struct module_kobject * @kp: the actual parameter definition to add to sysfs * @name: name of parameter * * Create a kobject if for a (per-module) parameter if mp NULL, and * create file in sysfs. Returns an error on out of memory. Always cleans up * if there's an error. */ static __modinit int add_sysfs_param(struct module_kobject *mk, const struct kernel_param *kp, const char *name) { struct module_param_attrs *new_mp; struct attribute **new_attrs; unsigned int i; /* We don't bother calling this with invisible parameters. */ BUG_ON(!kp->perm); if (!mk->mp) { /* First allocation. */ mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL); if (!mk->mp) return -ENOMEM; mk->mp->grp.name = "parameters"; /* NULL-terminated attribute array. */ mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]), GFP_KERNEL); /* Caller will cleanup via free_module_param_attrs */ if (!mk->mp->grp.attrs) return -ENOMEM; } /* Enlarge allocations. */ new_mp = krealloc(mk->mp, sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1), GFP_KERNEL); if (!new_mp) return -ENOMEM; mk->mp = new_mp; /* Extra pointer for NULL terminator */ new_attrs = krealloc(mk->mp->grp.attrs, sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2), GFP_KERNEL); if (!new_attrs) return -ENOMEM; mk->mp->grp.attrs = new_attrs; /* Tack new one on the end. */ memset(&mk->mp->attrs[mk->mp->num], 0, sizeof(mk->mp->attrs[0])); sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr); mk->mp->attrs[mk->mp->num].param = kp; mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show; /* Do not allow runtime DAC changes to make param writable. */ if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; else mk->mp->attrs[mk->mp->num].mattr.store = NULL; mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name; mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm; mk->mp->num++; /* Fix up all the pointers, since krealloc can move us */ for (i = 0; i < mk->mp->num; i++) mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr; mk->mp->grp.attrs[mk->mp->num] = NULL; return 0; } #ifdef CONFIG_MODULES static void free_module_param_attrs(struct module_kobject *mk) { if (mk->mp) kfree(mk->mp->grp.attrs); kfree(mk->mp); mk->mp = NULL; } /* * module_param_sysfs_setup - setup sysfs support for one module * @mod: module * @kparam: module parameters (array) * @num_params: number of module parameters * * Adds sysfs entries for module parameters under * /sys/module/[mod->name]/parameters/ */ int module_param_sysfs_setup(struct module *mod, const struct kernel_param *kparam, unsigned int num_params) { int i, err; bool params = false; for (i = 0; i < num_params; i++) { if (kparam[i].perm == 0) continue; err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name); if (err) { free_module_param_attrs(&mod->mkobj); return err; } params = true; } if (!params) return 0; /* Create the param group. */ err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); if (err) free_module_param_attrs(&mod->mkobj); return err; } /* * module_param_sysfs_remove - remove sysfs support for one module * @mod: module * * Remove sysfs entries for module parameters and the corresponding * kobject. */ void module_param_sysfs_remove(struct module *mod) { if (mod->mkobj.mp) { sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp); /* We are positive that no one is using any param * attrs at this point. Deallocate immediately. */ free_module_param_attrs(&mod->mkobj); } } #endif void destroy_params(const struct kernel_param *params, unsigned num) { unsigned int i; for (i = 0; i < num; i++) if (params[i].ops->free) params[i].ops->free(params[i].arg); } static struct module_kobject * __init locate_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; int err; kobj = kset_find_obj(module_kset, name); if (kobj) { mk = to_module_kobject(kobj); } else { mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); BUG_ON(!mk); mk->mod = THIS_MODULE; mk->kobj.kset = module_kset; err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); #ifdef CONFIG_MODULES if (!err) err = sysfs_create_file(&mk->kobj, &module_uevent.attr); #endif if (err) { kobject_put(&mk->kobj); pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", name, err); return NULL; } /* So that we hold reference in both cases. */ kobject_get(&mk->kobj); } return mk; } static void __init kernel_add_sysfs_param(const char *name, const struct kernel_param *kparam, unsigned int name_skip) { struct module_kobject *mk; int err; mk = locate_module_kobject(name); if (!mk) return; /* We need to remove old parameters before adding more. */ if (mk->mp) sysfs_remove_group(&mk->kobj, &mk->mp->grp); /* These should not fail at boot. */ err = add_sysfs_param(mk, kparam, kparam->name + name_skip); BUG_ON(err); err = sysfs_create_group(&mk->kobj, &mk->mp->grp); BUG_ON(err); kobject_uevent(&mk->kobj, KOBJ_ADD); kobject_put(&mk->kobj); } /* * param_sysfs_builtin - add sysfs parameters for built-in modules * * Add module_parameters to sysfs for "modules" built into the kernel. * * The "module" name (KBUILD_MODNAME) is stored before a dot, the * "parameter" name is stored behind a dot in kernel_param->name. So, * extract the "module" name for all built-in kernel_param-eters, * and for all who have the same, call kernel_add_sysfs_param. */ static void __init param_sysfs_builtin(void) { const struct kernel_param *kp; unsigned int name_len; char modname[MODULE_NAME_LEN]; for (kp = __start___param; kp < __stop___param; kp++) { char *dot; if (kp->perm == 0) continue; dot = strchr(kp->name, '.'); if (!dot) { /* This happens for core_param() */ strcpy(modname, "kernel"); name_len = 0; } else { name_len = dot - kp->name + 1; strlcpy(modname, kp->name, name_len); } kernel_add_sysfs_param(modname, kp, name_len); } } ssize_t __modver_version_show(struct module_attribute *mattr, struct module_kobject *mk, char *buf) { struct module_version_attribute *vattr = container_of(mattr, struct module_version_attribute, mattr); return scnprintf(buf, PAGE_SIZE, "%s\n", vattr->version); } extern const struct module_version_attribute *__start___modver[]; extern const struct module_version_attribute *__stop___modver[]; static void __init version_sysfs_builtin(void) { const struct module_version_attribute **p; struct module_kobject *mk; int err; for (p = __start___modver; p < __stop___modver; p++) { const struct module_version_attribute *vattr = *p; mk = locate_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); WARN_ON_ONCE(err); kobject_uevent(&mk->kobj, KOBJ_ADD); kobject_put(&mk->kobj); } } } /* module-related sysfs stuff */ static ssize_t module_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct module_attribute *attribute; struct module_kobject *mk; int ret; attribute = to_module_attr(attr); mk = to_module_kobject(kobj); if (!attribute->show) return -EIO; ret = attribute->show(attribute, mk, buf); return ret; } static ssize_t module_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len) { struct module_attribute *attribute; struct module_kobject *mk; int ret; attribute = to_module_attr(attr); mk = to_module_kobject(kobj); if (!attribute->store) return -EIO; ret = attribute->store(attribute, mk, buf, len); return ret; } static const struct sysfs_ops module_sysfs_ops = { .show = module_attr_show, .store = module_attr_store, }; static int uevent_filter(struct kset *kset, struct kobject *kobj) { struct kobj_type *ktype = get_ktype(kobj); if (ktype == &module_ktype) return 1; return 0; } static const struct kset_uevent_ops module_uevent_ops = { .filter = uevent_filter, }; struct kset *module_kset; int module_sysfs_initialized; static void module_kobj_release(struct kobject *kobj) { struct module_kobject *mk = to_module_kobject(kobj); complete(mk->kobj_completion); } struct kobj_type module_ktype = { .release = module_kobj_release, .sysfs_ops = &module_sysfs_ops, }; /* * param_sysfs_init - wrapper for built-in params support */ static int __init param_sysfs_init(void) { module_kset = kset_create_and_add("module", &module_uevent_ops, NULL); if (!module_kset) { printk(KERN_WARNING "%s (%d): error creating kset\n", __FILE__, __LINE__); return -ENOMEM; } module_sysfs_initialized = 1; version_sysfs_builtin(); param_sysfs_builtin(); return 0; } subsys_initcall(param_sysfs_init); #endif /* CONFIG_SYSFS */
53 2 1 53 36 5 3 3 22 1 1 44 44 44 44 27 44 13 5 18 44 41 44 1 1 1 10 10 10 10 1 9 6 10 10 16 15 16 16 16 16 5 2 3 16 14 14 13 7 6 6 6 3 6 3 3 3 3 5 3 5 5 14 7 30 30 30 13 12 1 1 30 29 29 30 40 38 2 38 38 38 38 25 38 38 36 9 36 36 1 36 2 34 16 36 36 11 1 36 25 11 36 36 36 14 36 38 81 80 81 81 3 52 79 3 77 13 2 11 2 9 73 68 71 3 70 69 2 10 29 30 30 93 93 2 14 14 82 81 58 81 70 1 1 1 1 1 192 193 193 193 72 73 16 29 30 30 4 2 1 1 2 2 2 4 2 3 1 1 2 5 2 2 2 5 4 7 7 6 6 1 5 5 1 3 1 2 2 2 2 3 3 2 2 2 5 5 5 2 2 25 25 2 2 3 166 16 6 6 6 21 8 28 6 28 16 16 14 16 1 1 16 16 16 2 15 16 4 4 4 1 1 1 1 1 1 1 1 1 1 6 6 6 5 3 3 3 3 5 6 17 17 17 17 17 17 2 17 17 17 17 17 17 91 91 60 91 91 60 91 59 58 58 50 50 14 2 2 12 1 9 56 8 2 6 6 3 3 51 49 82 48 23 49 26 49 14 48 49 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 52 57 57 57 57 57 29 29 18 18 18 18 18 3 3 3 3 3 3 3 49 49 49 50 50 50 25 25 16 6 1 6 14 16 100 22 21 21 21 100 100 1 65 64 65 44 44 44 44 44 10 44 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 /* * History: * Started: Aug 9 by Lawrence Foard (entropy@world.std.com), * to allow user process control of SCSI devices. * Development Sponsored by Killy Corp. NY NY * * Original driver (sg.c): * Copyright (C) 1992 Lawrence Foard * Version 2 and 3 extensions to driver: * Copyright (C) 1998 - 2014 Douglas Gilbert * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * */ static int sg_version_num = 30536; /* 2 digits for each component */ #define SG_VERSION_STR "3.5.36" /* * D. P. Gilbert (dgilbert@interlog.com), notes: * - scsi logging is available via SCSI_LOG_TIMEOUT macros. First * the kernel/module needs to be built with CONFIG_SCSI_LOGGING * (otherwise the macros compile to empty statements). * */ #include <linux/module.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/mtio.h> #include <linux/ioctl.h> #include <linux/slab.h> #include <linux/fcntl.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/moduleparam.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/seq_file.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/blktrace_api.h> #include <linux/mutex.h> #include <linux/atomic.h> #include <linux/ratelimit.h> #include <linux/uio.h> #include <linux/cred.h> /* for sg_check_file_access() */ #include "scsi.h" #include <scsi/scsi_dbg.h> #include <scsi/scsi_host.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_ioctl.h> #include <scsi/sg.h> #include "scsi_logging.h" #ifdef CONFIG_SCSI_PROC_FS #include <linux/proc_fs.h> static char *sg_version_date = "20140603"; static int sg_proc_init(void); #endif #define SG_ALLOW_DIO_DEF 0 #define SG_MAX_DEVS 32768 /* SG_MAX_CDB_SIZE should be 260 (spc4r37 section 3.1.30) however the type * of sg_io_hdr::cmd_len can only represent 255. All SCSI commands greater * than 16 bytes are "variable length" whose length is a multiple of 4 */ #define SG_MAX_CDB_SIZE 252 #define SG_DEFAULT_TIMEOUT mult_frac(SG_DEFAULT_TIMEOUT_USER, HZ, USER_HZ) int sg_big_buff = SG_DEF_RESERVED_SIZE; /* N.B. This variable is readable and writeable via /proc/scsi/sg/def_reserved_size . Each time sg_open() is called a buffer of this size (or less if there is not enough memory) will be reserved for use by this file descriptor. [Deprecated usage: this variable is also readable via /proc/sys/kernel/sg-big-buff if the sg driver is built into the kernel (i.e. it is not a module).] */ static int def_reserved_size = -1; /* picks up init parameter */ static int sg_allow_dio = SG_ALLOW_DIO_DEF; static int scatter_elem_sz = SG_SCATTER_SZ; static int scatter_elem_sz_prev = SG_SCATTER_SZ; #define SG_SECTOR_SZ 512 static int sg_add_device(struct device *, struct class_interface *); static void sg_remove_device(struct device *, struct class_interface *); static DEFINE_IDR(sg_index_idr); static DEFINE_RWLOCK(sg_index_lock); /* Also used to lock file descriptor list for device */ static struct class_interface sg_interface = { .add_dev = sg_add_device, .remove_dev = sg_remove_device, }; typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */ unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */ unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ unsigned bufflen; /* Size of (aggregate) data buffer */ struct page **pages; int page_order; char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ unsigned char cmd_opcode; /* first byte of command */ } Sg_scatter_hold; struct sg_device; /* forward declarations */ struct sg_fd; typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ struct list_head entry; /* list entry */ struct sg_fd *parentfp; /* NULL -> not in use */ Sg_scatter_hold data; /* hold buffer, perhaps scatter list */ sg_io_hdr_t header; /* scsi command+info, see <scsi/sg.h> */ unsigned char sense_b[SCSI_SENSE_BUFFERSIZE]; char res_used; /* 1 -> using reserve buffer, 0 -> not ... */ char orphan; /* 1 -> drop on sight, 0 -> normal */ char sg_io_owned; /* 1 -> packet belongs to SG_IO */ /* done protected by rq_list_lock */ char done; /* 0->before bh, 1->before read, 2->read */ struct request *rq; struct bio *bio; struct execute_work ew; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ struct list_head sfd_siblings; /* protected by device's sfd_lock */ struct sg_device *parentdp; /* owning device */ wait_queue_head_t read_wait; /* queue read until command done */ rwlock_t rq_list_lock; /* protect access to list in req_arr */ struct mutex f_mutex; /* protect against changes in this fd */ int timeout; /* defaults to SG_DEFAULT_TIMEOUT */ int timeout_user; /* defaults to SG_DEFAULT_TIMEOUT_USER */ Sg_scatter_hold reserve; /* buffer held for this file descriptor */ struct list_head rq_list; /* head of request list */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ char force_packid; /* 1 -> pack_id input to read(), 0 -> ignored */ char cmd_q; /* 1 -> allow command queuing, 0 -> don't */ unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */ char keep_orphan; /* 0 -> drop orphan (def), 1 -> keep for read() */ char mmap_called; /* 0 -> mmap() never called on this fd */ char res_in_use; /* 1 -> 'reserve' array in use */ struct kref f_ref; struct execute_work ew; } Sg_fd; typedef struct sg_device { /* holds the state of each scsi generic device */ struct scsi_device *device; wait_queue_head_t open_wait; /* queue open() when O_EXCL present */ struct mutex open_rel_lock; /* held when in open() or release() */ int sg_tablesize; /* adapter's max scatter-gather table size */ u32 index; /* device index number */ struct list_head sfds; rwlock_t sfd_lock; /* protect access to sfd list */ atomic_t detaching; /* 0->device usable, 1->device detaching */ bool exclude; /* 1->open(O_EXCL) succeeded and is active */ int open_cnt; /* count of opens (perhaps < num(sfds) ) */ char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */ struct gendisk *disk; struct cdev * cdev; /* char_dev [sysfs: /sys/cdev/major/sg<n>] */ struct kref d_ref; } Sg_device; /* tasklet or soft irq callback */ static void sg_rq_end_io(struct request *rq, blk_status_t status); static int sg_start_req(Sg_request *srp, unsigned char *cmd); static int sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp); static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, size_t count, int blocking, int read_only, int sg_io_owned, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); static void sg_remove_scat(Sg_fd * sfp, Sg_scatter_hold * schp); static void sg_build_reserve(Sg_fd * sfp, int req_size); static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); static Sg_fd *sg_add_sfp(Sg_device * sdp); static void sg_remove_sfp(struct kref *); static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static Sg_device *sg_get_dev(int dev); static void sg_device_destroy(struct kref *kref); #define SZ_SG_HEADER sizeof(struct sg_header) #define SZ_SG_IO_HDR sizeof(sg_io_hdr_t) #define SZ_SG_IOVEC sizeof(sg_iovec_t) #define SZ_SG_REQ_INFO sizeof(sg_req_info_t) #define sg_printk(prefix, sdp, fmt, a...) \ sdev_prefix_printk(prefix, (sdp)->device, \ (sdp)->disk->disk_name, fmt, ##a) /* * The SCSI interfaces that use read() and write() as an asynchronous variant of * ioctl(..., SG_IO, ...) are fundamentally unsafe, since there are lots of ways * to trigger read() and write() calls from various contexts with elevated * privileges. This can lead to kernel memory corruption (e.g. if these * interfaces are called through splice()) and privilege escalation inside * userspace (e.g. if a process with access to such a device passes a file * descriptor to a SUID binary as stdin/stdout/stderr). * * This function provides protection for the legacy API by restricting the * calling context. */ static int sg_check_file_access(struct file *filp, const char *caller) { if (filp->f_cred != current_real_cred()) { pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", caller, task_tgid_vnr(current), current->comm); return -EPERM; } if (uaccess_kernel()) { pr_err_once("%s: process %d (%s) called from kernel context, this is not allowed.\n", caller, task_tgid_vnr(current), current->comm); return -EACCES; } return 0; } static int sg_allow_access(struct file *filp, unsigned char *cmd) { struct sg_fd *sfp = filp->private_data; if (sfp->parentdp->device->type == TYPE_SCANNER) return 0; return blk_verify_command(cmd, filp->f_mode); } static int open_wait(Sg_device *sdp, int flags) { int retval = 0; if (flags & O_EXCL) { while (sdp->open_cnt > 0) { mutex_unlock(&sdp->open_rel_lock); retval = wait_event_interruptible(sdp->open_wait, (atomic_read(&sdp->detaching) || !sdp->open_cnt)); mutex_lock(&sdp->open_rel_lock); if (retval) /* -ERESTARTSYS */ return retval; if (atomic_read(&sdp->detaching)) return -ENODEV; } } else { while (sdp->exclude) { mutex_unlock(&sdp->open_rel_lock); retval = wait_event_interruptible(sdp->open_wait, (atomic_read(&sdp->detaching) || !sdp->exclude)); mutex_lock(&sdp->open_rel_lock); if (retval) /* -ERESTARTSYS */ return retval; if (atomic_read(&sdp->detaching)) return -ENODEV; } } return retval; } /* Returns 0 on success, else a negated errno value */ static int sg_open(struct inode *inode, struct file *filp) { int dev = iminor(inode); int flags = filp->f_flags; struct request_queue *q; Sg_device *sdp; Sg_fd *sfp; int retval; nonseekable_open(inode, filp); if ((flags & O_EXCL) && (O_RDONLY == (flags & O_ACCMODE))) return -EPERM; /* Can't lock it with read only access */ sdp = sg_get_dev(dev); if (IS_ERR(sdp)) return PTR_ERR(sdp); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_open: flags=0x%x\n", flags)); /* This driver's module count bumped by fops_get in <linux/fs.h> */ /* Prevent the device driver from vanishing while we sleep */ retval = scsi_device_get(sdp->device); if (retval) goto sg_put; retval = scsi_autopm_get_device(sdp->device); if (retval) goto sdp_put; /* scsi_block_when_processing_errors() may block so bypass * check if O_NONBLOCK. Permits SCSI commands to be issued * during error recovery. Tread carefully. */ if (!((flags & O_NONBLOCK) || scsi_block_when_processing_errors(sdp->device))) { retval = -ENXIO; /* we are in error recovery for this device */ goto error_out; } mutex_lock(&sdp->open_rel_lock); if (flags & O_NONBLOCK) { if (flags & O_EXCL) { if (sdp->open_cnt > 0) { retval = -EBUSY; goto error_mutex_locked; } } else { if (sdp->exclude) { retval = -EBUSY; goto error_mutex_locked; } } } else { retval = open_wait(sdp, flags); if (retval) /* -ERESTARTSYS or -ENODEV */ goto error_mutex_locked; } /* N.B. at this point we are holding the open_rel_lock */ if (flags & O_EXCL) sdp->exclude = true; if (sdp->open_cnt < 1) { /* no existing opens */ sdp->sgdebug = 0; q = sdp->device->request_queue; sdp->sg_tablesize = queue_max_segments(q); } sfp = sg_add_sfp(sdp); if (IS_ERR(sfp)) { retval = PTR_ERR(sfp); goto out_undo; } filp->private_data = sfp; sdp->open_cnt++; mutex_unlock(&sdp->open_rel_lock); retval = 0; sg_put: kref_put(&sdp->d_ref, sg_device_destroy); return retval; out_undo: if (flags & O_EXCL) { sdp->exclude = false; /* undo if error */ wake_up_interruptible(&sdp->open_wait); } error_mutex_locked: mutex_unlock(&sdp->open_rel_lock); error_out: scsi_autopm_put_device(sdp->device); sdp_put: scsi_device_put(sdp->device); goto sg_put; } /* Release resources associated with a successful sg_open() * Returns 0 on success, else a negated errno value */ static int sg_release(struct inode *inode, struct file *filp) { Sg_device *sdp; Sg_fd *sfp; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_release\n")); mutex_lock(&sdp->open_rel_lock); scsi_autopm_put_device(sdp->device); kref_put(&sfp->f_ref, sg_remove_sfp); sdp->open_cnt--; /* possibly many open()s waiting on exlude clearing, start many; * only open(O_EXCL)s wait on 0==open_cnt so only start one */ if (sdp->exclude) { sdp->exclude = false; wake_up_interruptible_all(&sdp->open_wait); } else if (0 == sdp->open_cnt) { wake_up_interruptible(&sdp->open_wait); } mutex_unlock(&sdp->open_rel_lock); return 0; } static ssize_t sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) { Sg_device *sdp; Sg_fd *sfp; Sg_request *srp; int req_pack_id = -1; bool busy; sg_io_hdr_t *hp; struct sg_header *old_hdr = NULL; int retval = 0; /* * This could cause a response to be stranded. Close the associated * file descriptor to free up any resources being held. */ retval = sg_check_file_access(filp, __func__); if (retval) return retval; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_read: count=%d\n", (int) count)); if (!access_ok(VERIFY_WRITE, buf, count)) return -EFAULT; if (sfp->force_packid && (count >= SZ_SG_HEADER)) { old_hdr = kmalloc(SZ_SG_HEADER, GFP_KERNEL); if (!old_hdr) return -ENOMEM; if (__copy_from_user(old_hdr, buf, SZ_SG_HEADER)) { retval = -EFAULT; goto free_old_hdr; } if (old_hdr->reply_len < 0) { if (count >= SZ_SG_IO_HDR) { sg_io_hdr_t *new_hdr; new_hdr = kmalloc(SZ_SG_IO_HDR, GFP_KERNEL); if (!new_hdr) { retval = -ENOMEM; goto free_old_hdr; } retval =__copy_from_user (new_hdr, buf, SZ_SG_IO_HDR); req_pack_id = new_hdr->pack_id; kfree(new_hdr); if (retval) { retval = -EFAULT; goto free_old_hdr; } } } else req_pack_id = old_hdr->pack_id; } srp = sg_get_rq_mark(sfp, req_pack_id, &busy); if (!srp) { /* now wait on packet to arrive */ if (filp->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto free_old_hdr; } retval = wait_event_interruptible(sfp->read_wait, ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) || (!busy && atomic_read(&sdp->detaching)))); if (!srp) { /* signal or detaching */ if (!retval) retval = -ENODEV; goto free_old_hdr; } } if (srp->header.interface_id != '\0') { retval = sg_new_read(sfp, buf, count, srp); goto free_old_hdr; } hp = &srp->header; if (old_hdr == NULL) { old_hdr = kmalloc(SZ_SG_HEADER, GFP_KERNEL); if (! old_hdr) { retval = -ENOMEM; goto free_old_hdr; } } memset(old_hdr, 0, SZ_SG_HEADER); old_hdr->reply_len = (int) hp->timeout; old_hdr->pack_len = old_hdr->reply_len; /* old, strange behaviour */ old_hdr->pack_id = hp->pack_id; old_hdr->twelve_byte = ((srp->data.cmd_opcode >= 0xc0) && (12 == hp->cmd_len)) ? 1 : 0; old_hdr->target_status = hp->masked_status; old_hdr->host_status = hp->host_status; old_hdr->driver_status = hp->driver_status; if ((CHECK_CONDITION & hp->masked_status) || (DRIVER_SENSE & hp->driver_status)) memcpy(old_hdr->sense_buffer, srp->sense_b, sizeof (old_hdr->sense_buffer)); switch (hp->host_status) { /* This setup of 'result' is for backward compatibility and is best ignored by the user who should use target, host + driver status */ case DID_OK: case DID_PASSTHROUGH: case DID_SOFT_ERROR: old_hdr->result = 0; break; case DID_NO_CONNECT: case DID_BUS_BUSY: case DID_TIME_OUT: old_hdr->result = EBUSY; break; case DID_BAD_TARGET: case DID_ABORT: case DID_PARITY: case DID_RESET: case DID_BAD_INTR: old_hdr->result = EIO; break; case DID_ERROR: old_hdr->result = (srp->sense_b[0] == 0 && hp->masked_status == GOOD) ? 0 : EIO; break; default: old_hdr->result = EIO; break; } /* Now copy the result back to the user buffer. */ if (count >= SZ_SG_HEADER) { if (__copy_to_user(buf, old_hdr, SZ_SG_HEADER)) { retval = -EFAULT; goto free_old_hdr; } buf += SZ_SG_HEADER; if (count > old_hdr->reply_len) count = old_hdr->reply_len; if (count > SZ_SG_HEADER) { if (sg_read_oxfer(srp, buf, count - SZ_SG_HEADER)) { retval = -EFAULT; goto free_old_hdr; } } } else count = (old_hdr->result == 0) ? 0 : -EIO; sg_finish_rem_req(srp); sg_remove_request(sfp, srp); retval = count; free_old_hdr: kfree(old_hdr); return retval; } static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) { sg_io_hdr_t *hp = &srp->header; int err = 0, err2; int len; if (count < SZ_SG_IO_HDR) { err = -EINVAL; goto err_out; } hp->sb_len_wr = 0; if ((hp->mx_sb_len > 0) && hp->sbp) { if ((CHECK_CONDITION & hp->masked_status) || (DRIVER_SENSE & hp->driver_status)) { int sb_len = SCSI_SENSE_BUFFERSIZE; sb_len = (hp->mx_sb_len > sb_len) ? sb_len : hp->mx_sb_len; len = 8 + (int) srp->sense_b[7]; /* Additional sense length field */ len = (len > sb_len) ? sb_len : len; if (copy_to_user(hp->sbp, srp->sense_b, len)) { err = -EFAULT; goto err_out; } hp->sb_len_wr = len; } } if (hp->masked_status || hp->host_status || hp->driver_status) hp->info |= SG_INFO_CHECK; if (copy_to_user(buf, hp, SZ_SG_IO_HDR)) { err = -EFAULT; goto err_out; } err_out: err2 = sg_finish_rem_req(srp); sg_remove_request(sfp, srp); return err ? : err2 ? : count; } static ssize_t sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) { int mxsize, cmd_size, k; int input_size, blocking; unsigned char opcode; Sg_device *sdp; Sg_fd *sfp; Sg_request *srp; struct sg_header old_hdr; sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; int retval; retval = sg_check_file_access(filp, __func__); if (retval) return retval; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_write: count=%d\n", (int) count)); if (atomic_read(&sdp->detaching)) return -ENODEV; if (!((filp->f_flags & O_NONBLOCK) || scsi_block_when_processing_errors(sdp->device))) return -ENXIO; if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; /* protects following copy_from_user()s + get_user()s */ if (count < SZ_SG_HEADER) return -EIO; if (__copy_from_user(&old_hdr, buf, SZ_SG_HEADER)) return -EFAULT; blocking = !(filp->f_flags & O_NONBLOCK); if (old_hdr.reply_len < 0) return sg_new_write(sfp, filp, buf, count, blocking, 0, 0, NULL); if (count < (SZ_SG_HEADER + 6)) return -EIO; /* The minimum scsi command length is 6 bytes. */ if (!(srp = sg_add_request(sfp))) { SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sdp, "sg_write: queue full\n")); return -EDOM; } buf += SZ_SG_HEADER; __get_user(opcode, buf); mutex_lock(&sfp->f_mutex); if (sfp->next_cmd_len > 0) { cmd_size = sfp->next_cmd_len; sfp->next_cmd_len = 0; /* reset so only this write() effected */ } else { cmd_size = COMMAND_SIZE(opcode); /* based on SCSI command group */ if ((opcode >= 0xc0) && old_hdr.twelve_byte) cmd_size = 12; } mutex_unlock(&sfp->f_mutex); SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, "sg_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) opcode, cmd_size)); /* Determine buffer size. */ input_size = count - cmd_size; mxsize = (input_size > old_hdr.reply_len) ? input_size : old_hdr.reply_len; mxsize -= SZ_SG_HEADER; input_size -= SZ_SG_HEADER; if (input_size < 0) { sg_remove_request(sfp, srp); return -EIO; /* User did not pass enough bytes for this command. */ } hp = &srp->header; hp->interface_id = '\0'; /* indicator of old interface tunnelled */ hp->cmd_len = (unsigned char) cmd_size; hp->iovec_count = 0; hp->mx_sb_len = 0; if (input_size > 0) hp->dxfer_direction = (old_hdr.reply_len > SZ_SG_HEADER) ? SG_DXFER_TO_FROM_DEV : SG_DXFER_TO_DEV; else hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; hp->dxfer_len = mxsize; if ((hp->dxfer_direction == SG_DXFER_TO_DEV) || (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV)) hp->dxferp = (char __user *)buf + cmd_size; else hp->dxferp = NULL; hp->sbp = NULL; hp->timeout = old_hdr.reply_len; /* structure abuse ... */ hp->flags = input_size; /* structure abuse ... */ hp->pack_id = old_hdr.pack_id; hp->usr_ptr = NULL; if (__copy_from_user(cmnd, buf, cmd_size)) { sg_remove_request(sfp, srp); return -EFAULT; } /* * SG_DXFER_TO_FROM_DEV is functionally equivalent to SG_DXFER_FROM_DEV, * but is is possible that the app intended SG_DXFER_TO_DEV, because there * is a non-zero input_size, so emit a warning. */ if (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV) { printk_ratelimited(KERN_WARNING "sg_write: data in/out %d/%d bytes " "for SCSI command 0x%x-- guessing " "data in;\n program %s not setting " "count and/or reply_len properly\n", old_hdr.reply_len - (int)SZ_SG_HEADER, input_size, (unsigned int) cmnd[0], current->comm); } k = sg_common_write(sfp, srp, cmnd, sfp->timeout, blocking); return (k < 0) ? k : count; } static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, size_t count, int blocking, int read_only, int sg_io_owned, Sg_request **o_srp) { int k; Sg_request *srp; sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; int timeout; unsigned long ul_timeout; if (count < SZ_SG_IO_HDR) return -EINVAL; if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; /* protects following copy_from_user()s + get_user()s */ sfp->cmd_q = 1; /* when sg_io_hdr seen, set command queuing on */ if (!(srp = sg_add_request(sfp))) { SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sfp->parentdp, "sg_new_write: queue full\n")); return -EDOM; } srp->sg_io_owned = sg_io_owned; hp = &srp->header; if (__copy_from_user(hp, buf, SZ_SG_IO_HDR)) { sg_remove_request(sfp, srp); return -EFAULT; } if (hp->interface_id != 'S') { sg_remove_request(sfp, srp); return -ENOSYS; } if (hp->flags & SG_FLAG_MMAP_IO) { if (hp->dxfer_len > sfp->reserve.bufflen) { sg_remove_request(sfp, srp); return -ENOMEM; /* MMAP_IO size must fit in reserve buffer */ } if (hp->flags & SG_FLAG_DIRECT_IO) { sg_remove_request(sfp, srp); return -EINVAL; /* either MMAP_IO or DIRECT_IO (not both) */ } if (sfp->res_in_use) { sg_remove_request(sfp, srp); return -EBUSY; /* reserve buffer already being used */ } } ul_timeout = msecs_to_jiffies(srp->header.timeout); timeout = (ul_timeout < INT_MAX) ? ul_timeout : INT_MAX; if ((!hp->cmdp) || (hp->cmd_len < 6) || (hp->cmd_len > sizeof (cmnd))) { sg_remove_request(sfp, srp); return -EMSGSIZE; } if (!access_ok(VERIFY_READ, hp->cmdp, hp->cmd_len)) { sg_remove_request(sfp, srp); return -EFAULT; /* protects following copy_from_user()s + get_user()s */ } if (__copy_from_user(cmnd, hp->cmdp, hp->cmd_len)) { sg_remove_request(sfp, srp); return -EFAULT; } if (read_only && sg_allow_access(file, cmnd)) { sg_remove_request(sfp, srp); return -EPERM; } k = sg_common_write(sfp, srp, cmnd, timeout, blocking); if (k < 0) return k; if (o_srp) *o_srp = srp; return count; } static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking) { int k, at_head; Sg_device *sdp = sfp->parentdp; sg_io_hdr_t *hp = &srp->header; srp->data.cmd_opcode = cmnd[0]; /* hold opcode of command */ hp->status = 0; hp->masked_status = 0; hp->msg_status = 0; hp->info = 0; hp->host_status = 0; hp->driver_status = 0; hp->resid = 0; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); if (hp->dxfer_len >= SZ_256M) { sg_remove_request(sfp, srp); return -EINVAL; } k = sg_start_req(srp, cmnd); if (k) { SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sfp->parentdp, "sg_common_write: start_req err=%d\n", k)); sg_finish_rem_req(srp); sg_remove_request(sfp, srp); return k; /* probably out of space --> ENOMEM */ } if (atomic_read(&sdp->detaching)) { if (srp->bio) { scsi_req_free_cmd(scsi_req(srp->rq)); blk_end_request_all(srp->rq, BLK_STS_IOERR); srp->rq = NULL; } sg_finish_rem_req(srp); sg_remove_request(sfp, srp); return -ENODEV; } hp->duration = jiffies_to_msecs(jiffies); if (hp->interface_id != '\0' && /* v3 (or later) interface */ (SG_FLAG_Q_AT_TAIL & hp->flags)) at_head = 0; else at_head = 1; srp->rq->timeout = timeout; kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */ blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, srp->rq, at_head, sg_rq_end_io); return 0; } static int srp_done(Sg_fd *sfp, Sg_request *srp) { unsigned long flags; int ret; read_lock_irqsave(&sfp->rq_list_lock, flags); ret = srp->done; read_unlock_irqrestore(&sfp->rq_list_lock, flags); return ret; } static int max_sectors_bytes(struct request_queue *q) { unsigned int max_sectors = queue_max_sectors(q); max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9); return max_sectors << 9; } static void sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) { Sg_request *srp; int val; unsigned int ms; val = 0; list_for_each_entry(srp, &sfp->rq_list, entry) { if (val >= SG_MAX_QUEUE) break; rinfo[val].req_state = srp->done + 1; rinfo[val].problem = srp->header.masked_status & srp->header.host_status & srp->header.driver_status; if (srp->done) rinfo[val].duration = srp->header.duration; else { ms = jiffies_to_msecs(jiffies); rinfo[val].duration = (ms > srp->header.duration) ? (ms - srp->header.duration) : 0; } rinfo[val].orphan = srp->orphan; rinfo[val].sg_io_owned = srp->sg_io_owned; rinfo[val].pack_id = srp->header.pack_id; rinfo[val].usr_ptr = srp->header.usr_ptr; val++; } } static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) { void __user *p = (void __user *)arg; int __user *ip = p; int result, val, read_only; Sg_device *sdp; Sg_fd *sfp; Sg_request *srp; unsigned long iflags; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_ioctl: cmd=0x%x\n", (int) cmd_in)); read_only = (O_RDWR != (filp->f_flags & O_ACCMODE)); switch (cmd_in) { case SG_IO: if (atomic_read(&sdp->detaching)) return -ENODEV; if (!scsi_block_when_processing_errors(sdp->device)) return -ENXIO; if (!access_ok(VERIFY_WRITE, p, SZ_SG_IO_HDR)) return -EFAULT; result = sg_new_write(sfp, filp, p, SZ_SG_IO_HDR, 1, read_only, 1, &srp); if (result < 0) return result; result = wait_event_interruptible(sfp->read_wait, srp_done(sfp, srp)); write_lock_irq(&sfp->rq_list_lock); if (srp->done) { srp->done = 2; write_unlock_irq(&sfp->rq_list_lock); result = sg_new_read(sfp, p, SZ_SG_IO_HDR, srp); return (result < 0) ? result : 0; } srp->orphan = 1; write_unlock_irq(&sfp->rq_list_lock); return result; /* -ERESTARTSYS because signal hit process */ case SG_SET_TIMEOUT: result = get_user(val, ip); if (result) return result; if (val < 0) return -EIO; if (val >= mult_frac((s64)INT_MAX, USER_HZ, HZ)) val = min_t(s64, mult_frac((s64)INT_MAX, USER_HZ, HZ), INT_MAX); sfp->timeout_user = val; sfp->timeout = mult_frac(val, HZ, USER_HZ); return 0; case SG_GET_TIMEOUT: /* N.B. User receives timeout as return value */ /* strange ..., for backward compatibility */ return sfp->timeout_user; case SG_SET_FORCE_LOW_DMA: /* * N.B. This ioctl never worked properly, but failed to * return an error value. So returning '0' to keep compability * with legacy applications. */ return 0; case SG_GET_LOW_DMA: return put_user((int) sdp->device->host->unchecked_isa_dma, ip); case SG_GET_SCSI_ID: if (!access_ok(VERIFY_WRITE, p, sizeof (sg_scsi_id_t))) return -EFAULT; else { sg_scsi_id_t __user *sg_idp = p; if (atomic_read(&sdp->detaching)) return -ENODEV; __put_user((int) sdp->device->host->host_no, &sg_idp->host_no); __put_user((int) sdp->device->channel, &sg_idp->channel); __put_user((int) sdp->device->id, &sg_idp->scsi_id); __put_user((int) sdp->device->lun, &sg_idp->lun); __put_user((int) sdp->device->type, &sg_idp->scsi_type); __put_user((short) sdp->device->host->cmd_per_lun, &sg_idp->h_cmd_per_lun); __put_user((short) sdp->device->queue_depth, &sg_idp->d_queue_depth); __put_user(0, &sg_idp->unused[0]); __put_user(0, &sg_idp->unused[1]); return 0; } case SG_SET_FORCE_PACK_ID: result = get_user(val, ip); if (result) return result; sfp->force_packid = val ? 1 : 0; return 0; case SG_GET_PACK_ID: if (!access_ok(VERIFY_WRITE, ip, sizeof (int))) return -EFAULT; read_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) { read_unlock_irqrestore(&sfp->rq_list_lock, iflags); __put_user(srp->header.pack_id, ip); return 0; } } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); __put_user(-1, ip); return 0; case SG_GET_NUM_WAITING: read_lock_irqsave(&sfp->rq_list_lock, iflags); val = 0; list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) ++val; } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); return put_user(val, ip); case SG_GET_SG_TABLESIZE: return put_user(sdp->sg_tablesize, ip); case SG_SET_RESERVED_SIZE: result = get_user(val, ip); if (result) return result; if (val < 0) return -EINVAL; val = min_t(int, val, max_sectors_bytes(sdp->device->request_queue)); mutex_lock(&sfp->f_mutex); if (val != sfp->reserve.bufflen) { if (sfp->mmap_called || sfp->res_in_use) { mutex_unlock(&sfp->f_mutex); return -EBUSY; } sg_remove_scat(sfp, &sfp->reserve); sg_build_reserve(sfp, val); } mutex_unlock(&sfp->f_mutex); return 0; case SG_GET_RESERVED_SIZE: val = min_t(int, sfp->reserve.bufflen, max_sectors_bytes(sdp->device->request_queue)); return put_user(val, ip); case SG_SET_COMMAND_Q: result = get_user(val, ip); if (result) return result; sfp->cmd_q = val ? 1 : 0; return 0; case SG_GET_COMMAND_Q: return put_user((int) sfp->cmd_q, ip); case SG_SET_KEEP_ORPHAN: result = get_user(val, ip); if (result) return result; sfp->keep_orphan = val; return 0; case SG_GET_KEEP_ORPHAN: return put_user((int) sfp->keep_orphan, ip); case SG_NEXT_CMD_LEN: result = get_user(val, ip); if (result) return result; if (val > SG_MAX_CDB_SIZE) return -ENOMEM; sfp->next_cmd_len = (val > 0) ? val : 0; return 0; case SG_GET_VERSION_NUM: return put_user(sg_version_num, ip); case SG_GET_ACCESS_COUNT: /* faked - we don't have a real access count anymore */ val = (sdp->device ? 1 : 0); return put_user(val, ip); case SG_GET_REQUEST_TABLE: if (!access_ok(VERIFY_WRITE, p, SZ_SG_REQ_INFO * SG_MAX_QUEUE)) return -EFAULT; else { sg_req_info_t *rinfo; rinfo = kcalloc(SG_MAX_QUEUE, SZ_SG_REQ_INFO, GFP_KERNEL); if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); sg_fill_request_table(sfp, rinfo); read_unlock_irqrestore(&sfp->rq_list_lock, iflags); result = __copy_to_user(p, rinfo, SZ_SG_REQ_INFO * SG_MAX_QUEUE); result = result ? -EFAULT : 0; kfree(rinfo); return result; } case SG_EMULATED_HOST: if (atomic_read(&sdp->detaching)) return -ENODEV; return put_user(sdp->device->host->hostt->emulated, ip); case SCSI_IOCTL_SEND_COMMAND: if (atomic_read(&sdp->detaching)) return -ENODEV; return sg_scsi_ioctl(sdp->device->request_queue, NULL, filp->f_mode, p); case SG_SET_DEBUG: result = get_user(val, ip); if (result) return result; sdp->sgdebug = (char) val; return 0; case BLKSECTGET: return put_user(max_sectors_bytes(sdp->device->request_queue), ip); case BLKTRACESETUP: return blk_trace_setup(sdp->device->request_queue, sdp->disk->disk_name, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), NULL, p); case BLKTRACESTART: return blk_trace_startstop(sdp->device->request_queue, 1); case BLKTRACESTOP: return blk_trace_startstop(sdp->device->request_queue, 0); case BLKTRACETEARDOWN: return blk_trace_remove(sdp->device->request_queue); case SCSI_IOCTL_GET_IDLUN: case SCSI_IOCTL_GET_BUS_NUMBER: case SCSI_IOCTL_PROBE_HOST: case SG_GET_TRANSFORM: case SG_SCSI_RESET: if (atomic_read(&sdp->detaching)) return -ENODEV; break; default: if (read_only) return -EPERM; /* don't know so take safe approach */ break; } result = scsi_ioctl_block_when_processing_errors(sdp->device, cmd_in, filp->f_flags & O_NDELAY); if (result) return result; return scsi_ioctl(sdp->device, cmd_in, p); } #ifdef CONFIG_COMPAT static long sg_compat_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) { Sg_device *sdp; Sg_fd *sfp; struct scsi_device *sdev; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; sdev = sdp->device; if (sdev->host->hostt->compat_ioctl) { int ret; ret = sdev->host->hostt->compat_ioctl(sdev, cmd_in, (void __user *)arg); return ret; } return -ENOIOCTLCMD; } #endif static __poll_t sg_poll(struct file *filp, poll_table * wait) { __poll_t res = 0; Sg_device *sdp; Sg_fd *sfp; Sg_request *srp; int count = 0; unsigned long iflags; sfp = filp->private_data; if (!sfp) return EPOLLERR; sdp = sfp->parentdp; if (!sdp) return EPOLLERR; poll_wait(filp, &sfp->read_wait, wait); read_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(srp, &sfp->rq_list, entry) { /* if any read waiting, flag it */ if ((0 == res) && (1 == srp->done) && (!srp->sg_io_owned)) res = EPOLLIN | EPOLLRDNORM; ++count; } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (atomic_read(&sdp->detaching)) res |= EPOLLHUP; else if (!sfp->cmd_q) { if (0 == count) res |= EPOLLOUT | EPOLLWRNORM; } else if (count < SG_MAX_QUEUE) res |= EPOLLOUT | EPOLLWRNORM; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_poll: res=0x%x\n", (__force u32) res)); return res; } static int sg_fasync(int fd, struct file *filp, int mode) { Sg_device *sdp; Sg_fd *sfp; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_fasync: mode=%d\n", mode)); return fasync_helper(fd, filp, mode, &sfp->async_qp); } static vm_fault_t sg_vma_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; Sg_fd *sfp; unsigned long offset, len, sa; Sg_scatter_hold *rsv_schp; int k, length; if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data))) return VM_FAULT_SIGBUS; rsv_schp = &sfp->reserve; offset = vmf->pgoff << PAGE_SHIFT; if (offset >= rsv_schp->bufflen) return VM_FAULT_SIGBUS; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sfp->parentdp, "sg_vma_fault: offset=%lu, scatg=%d\n", offset, rsv_schp->k_use_sg)); sa = vma->vm_start; length = 1 << (PAGE_SHIFT + rsv_schp->page_order); for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; len = (len < length) ? len : length; if (offset < len) { struct page *page = nth_page(rsv_schp->pages[k], offset >> PAGE_SHIFT); get_page(page); /* increment page count */ vmf->page = page; return 0; /* success */ } sa += len; offset -= len; } return VM_FAULT_SIGBUS; } static const struct vm_operations_struct sg_mmap_vm_ops = { .fault = sg_vma_fault, }; static int sg_mmap(struct file *filp, struct vm_area_struct *vma) { Sg_fd *sfp; unsigned long req_sz, len, sa; Sg_scatter_hold *rsv_schp; int k, length; int ret = 0; if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) return -ENXIO; req_sz = vma->vm_end - vma->vm_start; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sfp->parentdp, "sg_mmap starting, vm_start=%p, len=%d\n", (void *) vma->vm_start, (int) req_sz)); if (vma->vm_pgoff) return -EINVAL; /* want no offset */ rsv_schp = &sfp->reserve; mutex_lock(&sfp->f_mutex); if (req_sz > rsv_schp->bufflen) { ret = -ENOMEM; /* cannot map more than reserved buffer */ goto out; } sa = vma->vm_start; length = 1 << (PAGE_SHIFT + rsv_schp->page_order); for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; len = (len < length) ? len : length; sa += len; } sfp->mmap_called = 1; vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = sfp; vma->vm_ops = &sg_mmap_vm_ops; out: mutex_unlock(&sfp->f_mutex); return ret; } static void sg_rq_end_io_usercontext(struct work_struct *work) { struct sg_request *srp = container_of(work, struct sg_request, ew.work); struct sg_fd *sfp = srp->parentfp; sg_finish_rem_req(srp); sg_remove_request(sfp, srp); kref_put(&sfp->f_ref, sg_remove_sfp); } /* * This function is a "bottom half" handler that is called by the mid * level when a command is completed (or has failed). */ static void sg_rq_end_io(struct request *rq, blk_status_t status) { struct sg_request *srp = rq->end_io_data; struct scsi_request *req = scsi_req(rq); Sg_device *sdp; Sg_fd *sfp; unsigned long iflags; unsigned int ms; char *sense; int result, resid, done = 1; if (WARN_ON(srp->done != 0)) return; sfp = srp->parentfp; if (WARN_ON(sfp == NULL)) return; sdp = sfp->parentdp; if (unlikely(atomic_read(&sdp->detaching))) pr_info("%s: device detaching\n", __func__); sense = req->sense; result = req->result; resid = req->resid_len; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, "sg_cmd_done: pack_id=%d, res=0x%x\n", srp->header.pack_id, result)); srp->header.resid = resid; ms = jiffies_to_msecs(jiffies); srp->header.duration = (ms > srp->header.duration) ? (ms - srp->header.duration) : 0; if (0 != result) { struct scsi_sense_hdr sshdr; srp->header.status = 0xff & result; srp->header.masked_status = status_byte(result); srp->header.msg_status = msg_byte(result); srp->header.host_status = host_byte(result); srp->header.driver_status = driver_byte(result); if ((sdp->sgdebug > 0) && ((CHECK_CONDITION == srp->header.masked_status) || (COMMAND_TERMINATED == srp->header.masked_status))) __scsi_print_sense(sdp->device, __func__, sense, SCSI_SENSE_BUFFERSIZE); /* Following if statement is a patch supplied by Eric Youngdale */ if (driver_byte(result) != 0 && scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr) && !scsi_sense_is_deferred(&sshdr) && sshdr.sense_key == UNIT_ATTENTION && sdp->device->removable) { /* Detected possible disc change. Set the bit - this */ /* may be used if there are filesystems using this device */ sdp->device->changed = 1; } } if (req->sense_len) memcpy(srp->sense_b, req->sense, SCSI_SENSE_BUFFERSIZE); /* Rely on write phase to clean out srp status values, so no "else" */ /* * Free the request as soon as it is complete so that its resources * can be reused without waiting for userspace to read() the * result. But keep the associated bio (if any) around until * blk_rq_unmap_user() can be called from user context. */ srp->rq = NULL; scsi_req_free_cmd(scsi_req(rq)); __blk_put_request(rq->q, rq); write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { if (sfp->keep_orphan) srp->sg_io_owned = 0; else done = 0; } srp->done = done; write_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (likely(done)) { /* Now wake up any sg_read() that is waiting for this * packet. */ wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); kref_put(&sfp->f_ref, sg_remove_sfp); } else { INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); schedule_work(&srp->ew.work); } } static const struct file_operations sg_fops = { .owner = THIS_MODULE, .read = sg_read, .write = sg_write, .poll = sg_poll, .unlocked_ioctl = sg_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = sg_compat_ioctl, #endif .open = sg_open, .mmap = sg_mmap, .release = sg_release, .fasync = sg_fasync, .llseek = no_llseek, }; static struct class *sg_sysfs_class; static int sg_sysfs_valid = 0; static Sg_device * sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) { struct request_queue *q = scsidp->request_queue; Sg_device *sdp; unsigned long iflags; int error; u32 k; sdp = kzalloc(sizeof(Sg_device), GFP_KERNEL); if (!sdp) { sdev_printk(KERN_WARNING, scsidp, "%s: kmalloc Sg_device " "failure\n", __func__); return ERR_PTR(-ENOMEM); } idr_preload(GFP_KERNEL); write_lock_irqsave(&sg_index_lock, iflags); error = idr_alloc(&sg_index_idr, sdp, 0, SG_MAX_DEVS, GFP_NOWAIT); if (error < 0) { if (error == -ENOSPC) { sdev_printk(KERN_WARNING, scsidp, "Unable to attach sg device type=%d, minor number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1); error = -ENODEV; } else { sdev_printk(KERN_WARNING, scsidp, "%s: idr " "allocation Sg_device failure: %d\n", __func__, error); } goto out_unlock; } k = error; SCSI_LOG_TIMEOUT(3, sdev_printk(KERN_INFO, scsidp, "sg_alloc: dev=%d \n", k)); sprintf(disk->disk_name, "sg%d", k); disk->first_minor = k; sdp->disk = disk; sdp->device = scsidp; mutex_init(&sdp->open_rel_lock); INIT_LIST_HEAD(&sdp->sfds); init_waitqueue_head(&sdp->open_wait); atomic_set(&sdp->detaching, 0); rwlock_init(&sdp->sfd_lock); sdp->sg_tablesize = queue_max_segments(q); sdp->index = k; kref_init(&sdp->d_ref); error = 0; out_unlock: write_unlock_irqrestore(&sg_index_lock, iflags); idr_preload_end(); if (error) { kfree(sdp); return ERR_PTR(error); } return sdp; } static int sg_add_device(struct device *cl_dev, struct class_interface *cl_intf) { struct scsi_device *scsidp = to_scsi_device(cl_dev->parent); struct gendisk *disk; Sg_device *sdp = NULL; struct cdev * cdev = NULL; int error; unsigned long iflags; disk = alloc_disk(1); if (!disk) { pr_warn("%s: alloc_disk failed\n", __func__); return -ENOMEM; } disk->major = SCSI_GENERIC_MAJOR; error = -ENOMEM; cdev = cdev_alloc(); if (!cdev) { pr_warn("%s: cdev_alloc failed\n", __func__); goto out; } cdev->owner = THIS_MODULE; cdev->ops = &sg_fops; sdp = sg_alloc(disk, scsidp); if (IS_ERR(sdp)) { pr_warn("%s: sg_alloc failed\n", __func__); error = PTR_ERR(sdp); goto out; } error = cdev_add(cdev, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), 1); if (error) goto cdev_add_err; sdp->cdev = cdev; if (sg_sysfs_valid) { struct device *sg_class_member; sg_class_member = device_create(sg_sysfs_class, cl_dev->parent, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), sdp, "%s", disk->disk_name); if (IS_ERR(sg_class_member)) { pr_err("%s: device_create failed\n", __func__); error = PTR_ERR(sg_class_member); goto cdev_add_err; } error = sysfs_create_link(&scsidp->sdev_gendev.kobj, &sg_class_member->kobj, "generic"); if (error) pr_err("%s: unable to make symlink 'generic' back " "to sg%d\n", __func__, sdp->index); } else pr_warn("%s: sg_sys Invalid\n", __func__); sdev_printk(KERN_NOTICE, scsidp, "Attached scsi generic sg%d " "type %d\n", sdp->index, scsidp->type); dev_set_drvdata(cl_dev, sdp); return 0; cdev_add_err: write_lock_irqsave(&sg_index_lock, iflags); idr_remove(&sg_index_idr, sdp->index); write_unlock_irqrestore(&sg_index_lock, iflags); kfree(sdp); out: put_disk(disk); if (cdev) cdev_del(cdev); return error; } static void sg_device_destroy(struct kref *kref) { struct sg_device *sdp = container_of(kref, struct sg_device, d_ref); unsigned long flags; /* CAUTION! Note that the device can still be found via idr_find() * even though the refcount is 0. Therefore, do idr_remove() BEFORE * any other cleanup. */ write_lock_irqsave(&sg_index_lock, flags); idr_remove(&sg_index_idr, sdp->index); write_unlock_irqrestore(&sg_index_lock, flags); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_device_destroy\n")); put_disk(sdp->disk); kfree(sdp); } static void sg_remove_device(struct device *cl_dev, struct class_interface *cl_intf) { struct scsi_device *scsidp = to_scsi_device(cl_dev->parent); Sg_device *sdp = dev_get_drvdata(cl_dev); unsigned long iflags; Sg_fd *sfp; int val; if (!sdp) return; /* want sdp->detaching non-zero as soon as possible */ val = atomic_inc_return(&sdp->detaching); if (val > 1) return; /* only want to do following once per device */ SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "%s\n", __func__)); read_lock_irqsave(&sdp->sfd_lock, iflags); list_for_each_entry(sfp, &sdp->sfds, sfd_siblings) { wake_up_interruptible_all(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_HUP); } wake_up_interruptible_all(&sdp->open_wait); read_unlock_irqrestore(&sdp->sfd_lock, iflags); sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic"); device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); cdev_del(sdp->cdev); sdp->cdev = NULL; kref_put(&sdp->d_ref, sg_device_destroy); } module_param_named(scatter_elem_sz, scatter_elem_sz, int, S_IRUGO | S_IWUSR); module_param_named(def_reserved_size, def_reserved_size, int, S_IRUGO | S_IWUSR); module_param_named(allow_dio, sg_allow_dio, int, S_IRUGO | S_IWUSR); MODULE_AUTHOR("Douglas Gilbert"); MODULE_DESCRIPTION("SCSI generic (sg) driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(SG_VERSION_STR); MODULE_ALIAS_CHARDEV_MAJOR(SCSI_GENERIC_MAJOR); MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element " "size (default: max(SG_SCATTER_SZ, PAGE_SIZE))"); MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd"); MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))"); static int __init init_sg(void) { int rc; if (scatter_elem_sz < PAGE_SIZE) { scatter_elem_sz = PAGE_SIZE; scatter_elem_sz_prev = scatter_elem_sz; } if (def_reserved_size >= 0) sg_big_buff = def_reserved_size; else def_reserved_size = sg_big_buff; rc = register_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS, "sg"); if (rc) return rc; sg_sysfs_class = class_create(THIS_MODULE, "scsi_generic"); if ( IS_ERR(sg_sysfs_class) ) { rc = PTR_ERR(sg_sysfs_class); goto err_out; } sg_sysfs_valid = 1; rc = scsi_register_interface(&sg_interface); if (0 == rc) { #ifdef CONFIG_SCSI_PROC_FS sg_proc_init(); #endif /* CONFIG_SCSI_PROC_FS */ return 0; } class_destroy(sg_sysfs_class); err_out: unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); return rc; } static void __exit exit_sg(void) { #ifdef CONFIG_SCSI_PROC_FS remove_proc_subtree("scsi/sg", NULL); #endif /* CONFIG_SCSI_PROC_FS */ scsi_unregister_interface(&sg_interface); class_destroy(sg_sysfs_class); sg_sysfs_valid = 0; unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); idr_destroy(&sg_index_idr); } static int sg_start_req(Sg_request *srp, unsigned char *cmd) { int res; struct request *rq; struct scsi_request *req; Sg_fd *sfp = srp->parentfp; sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; unsigned int iov_count = hp->iovec_count; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; struct rq_map_data *md, map_data; int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; unsigned char *long_cmdp = NULL; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_start_req: dxfer_len=%d\n", dxfer_len)); if (hp->cmd_len > BLK_MAX_CDB) { long_cmdp = kzalloc(hp->cmd_len, GFP_KERNEL); if (!long_cmdp) return -ENOMEM; } /* * NOTE * * With scsi-mq enabled, there are a fixed number of preallocated * requests equal in number to shost->can_queue. If all of the * preallocated requests are already in use, then blk_get_request() * will sleep until an active command completes, freeing up a request. * Although waiting in an asynchronous interface is less than ideal, we * do not want to use BLK_MQ_REQ_NOWAIT here because userspace might * not expect an EWOULDBLOCK from this condition. */ rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) { kfree(long_cmdp); return PTR_ERR(rq); } req = scsi_req(rq); if (hp->cmd_len > BLK_MAX_CDB) req->cmd = long_cmdp; memcpy(req->cmd, cmd, hp->cmd_len); req->cmd_len = hp->cmd_len; srp->rq = rq; rq->end_io_data = srp; req->retries = SG_DEFAULT_RETRIES; if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO && dxfer_dir != SG_DXFER_UNKNOWN && !iov_count && !sfp->parentdp->device->host->unchecked_isa_dma && blk_rq_aligned(q, (unsigned long)hp->dxferp, dxfer_len)) md = NULL; else md = &map_data; if (md) { mutex_lock(&sfp->f_mutex); if (dxfer_len <= rsv_schp->bufflen && !sfp->res_in_use) { sfp->res_in_use = 1; sg_link_reserve(sfp, srp, dxfer_len); } else if (hp->flags & SG_FLAG_MMAP_IO) { res = -EBUSY; /* sfp->res_in_use == 1 */ if (dxfer_len > rsv_schp->bufflen) res = -ENOMEM; mutex_unlock(&sfp->f_mutex); return res; } else { res = sg_build_indirect(req_schp, sfp, dxfer_len); if (res) { mutex_unlock(&sfp->f_mutex); return res; } } mutex_unlock(&sfp->f_mutex); md->pages = req_schp->pages; md->page_order = req_schp->page_order; md->nr_entries = req_schp->k_use_sg; md->offset = 0; md->null_mapped = hp->dxferp ? 0 : 1; if (dxfer_dir == SG_DXFER_TO_FROM_DEV) md->from_user = 1; else md->from_user = 0; } if (iov_count) { struct iovec *iov = NULL; struct iov_iter i; res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i); if (res < 0) return res; iov_iter_truncate(&i, hp->dxfer_len); if (!iov_iter_count(&i)) { kfree(iov); return -EINVAL; } res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC); kfree(iov); } else res = blk_rq_map_user(q, rq, md, hp->dxferp, hp->dxfer_len, GFP_ATOMIC); if (!res) { srp->bio = rq->bio; if (!md) { req_schp->dio_in_use = 1; hp->info |= SG_INFO_DIRECT_IO; } } return res; } static int sg_finish_rem_req(Sg_request *srp) { int ret = 0; Sg_fd *sfp = srp->parentfp; Sg_scatter_hold *req_schp = &srp->data; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_finish_rem_req: res_used=%d\n", (int) srp->res_used)); if (srp->bio) ret = blk_rq_unmap_user(srp->bio); if (srp->rq) { scsi_req_free_cmd(scsi_req(srp->rq)); blk_put_request(srp->rq); } if (srp->res_used) sg_unlink_reserve(sfp, srp); else sg_remove_scat(sfp, req_schp); return ret; } static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) { int sg_bufflen = tablesize * sizeof(struct page *); gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN; schp->pages = kzalloc(sg_bufflen, gfp_flags); if (!schp->pages) return -ENOMEM; schp->sglist_len = sg_bufflen; return tablesize; /* number of scat_gath elements allocated */ } static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) { int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems; int sg_tablesize = sfp->parentdp->sg_tablesize; int blk_size = buff_size, order; gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN | __GFP_ZERO; struct sg_device *sdp = sfp->parentdp; if (blk_size < 0) return -EFAULT; if (0 == blk_size) ++blk_size; /* don't know why */ /* round request up to next highest SG_SECTOR_SZ byte boundary */ blk_size = ALIGN(blk_size, SG_SECTOR_SZ); SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_indirect: buff_size=%d, blk_size=%d\n", buff_size, blk_size)); /* N.B. ret_sz carried into this block ... */ mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize); if (mx_sc_elems < 0) return mx_sc_elems; /* most likely -ENOMEM */ num = scatter_elem_sz; if (unlikely(num != scatter_elem_sz_prev)) { if (num < PAGE_SIZE) { scatter_elem_sz = PAGE_SIZE; scatter_elem_sz_prev = PAGE_SIZE; } else scatter_elem_sz_prev = num; } if (sdp->device->host->unchecked_isa_dma) gfp_mask |= GFP_DMA; order = get_order(num); retry: ret_sz = 1 << (PAGE_SHIFT + order); for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems; k++, rem_sz -= ret_sz) { num = (rem_sz > scatter_elem_sz_prev) ? scatter_elem_sz_prev : rem_sz; schp->pages[k] = alloc_pages(gfp_mask, order); if (!schp->pages[k]) goto out; if (num == scatter_elem_sz_prev) { if (unlikely(ret_sz > scatter_elem_sz_prev)) { scatter_elem_sz = ret_sz; scatter_elem_sz_prev = ret_sz; } } SCSI_LOG_TIMEOUT(5, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_indirect: k=%d, num=%d, ret_sz=%d\n", k, num, ret_sz)); } /* end of for loop */ schp->page_order = order; schp->k_use_sg = k; SCSI_LOG_TIMEOUT(5, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_indirect: k_use_sg=%d, rem_sz=%d\n", k, rem_sz)); schp->bufflen = blk_size; if (rem_sz > 0) /* must have failed */ return -ENOMEM; return 0; out: for (i = 0; i < k; i++) __free_pages(schp->pages[i], order); if (--order >= 0) goto retry; return -ENOMEM; } static void sg_remove_scat(Sg_fd * sfp, Sg_scatter_hold * schp) { SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg)); if (schp->pages && schp->sglist_len > 0) { if (!schp->dio_in_use) { int k; for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { SCSI_LOG_TIMEOUT(5, sg_printk(KERN_INFO, sfp->parentdp, "sg_remove_scat: k=%d, pg=0x%p\n", k, schp->pages[k])); __free_pages(schp->pages[k], schp->page_order); } kfree(schp->pages); } } memset(schp, 0, sizeof (*schp)); } static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) { Sg_scatter_hold *schp = &srp->data; int k, num; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, srp->parentfp->parentdp, "sg_read_oxfer: num_read_xfer=%d\n", num_read_xfer)); if ((!outp) || (num_read_xfer <= 0)) return 0; num = 1 << (PAGE_SHIFT + schp->page_order); for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { if (__copy_to_user(outp, page_address(schp->pages[k]), num_read_xfer)) return -EFAULT; break; } else { if (__copy_to_user(outp, page_address(schp->pages[k]), num)) return -EFAULT; num_read_xfer -= num; if (num_read_xfer <= 0) break; outp += num; } } return 0; } static void sg_build_reserve(Sg_fd * sfp, int req_size) { Sg_scatter_hold *schp = &sfp->reserve; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_reserve: req_size=%d\n", req_size)); do { if (req_size < PAGE_SIZE) req_size = PAGE_SIZE; if (0 == sg_build_indirect(schp, sfp, req_size)) return; else sg_remove_scat(sfp, schp); req_size >>= 1; /* divide by 2 */ } while (req_size > (PAGE_SIZE / 2)); } static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size) { Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; int k, num, rem; srp->res_used = 1; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_link_reserve: size=%d\n", size)); rem = size; num = 1 << (PAGE_SHIFT + rsv_schp->page_order); for (k = 0; k < rsv_schp->k_use_sg; k++) { if (rem <= num) { req_schp->k_use_sg = k + 1; req_schp->sglist_len = rsv_schp->sglist_len; req_schp->pages = rsv_schp->pages; req_schp->bufflen = size; req_schp->page_order = rsv_schp->page_order; break; } else rem -= num; } if (k >= rsv_schp->k_use_sg) SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sfp->parentdp, "sg_link_reserve: BAD size\n")); } static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) { Sg_scatter_hold *req_schp = &srp->data; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, srp->parentfp->parentdp, "sg_unlink_reserve: req->k_use_sg=%d\n", (int) req_schp->k_use_sg)); req_schp->k_use_sg = 0; req_schp->bufflen = 0; req_schp->pages = NULL; req_schp->page_order = 0; req_schp->sglist_len = 0; srp->res_used = 0; /* Called without mutex lock to avoid deadlock */ sfp->res_in_use = 0; } static Sg_request * sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy) { Sg_request *resp; unsigned long iflags; *busy = false; write_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(resp, &sfp->rq_list, entry) { /* look for requests that are not SG_IO owned */ if ((!resp->sg_io_owned) && ((-1 == pack_id) || (resp->header.pack_id == pack_id))) { switch (resp->done) { case 0: /* request active */ *busy = true; break; case 1: /* request done; response ready to return */ resp->done = 2; /* guard against other readers */ write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return resp; case 2: /* response already being returned */ break; } } } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return NULL; } /* always adds to end of list */ static Sg_request * sg_add_request(Sg_fd * sfp) { int k; unsigned long iflags; Sg_request *rp = sfp->req_arr; write_lock_irqsave(&sfp->rq_list_lock, iflags); if (!list_empty(&sfp->rq_list)) { if (!sfp->cmd_q) goto out_unlock; for (k = 0; k < SG_MAX_QUEUE; ++k, ++rp) { if (!rp->parentfp) break; } if (k >= SG_MAX_QUEUE) goto out_unlock; } memset(rp, 0, sizeof (Sg_request)); rp->parentfp = sfp; rp->header.duration = jiffies_to_msecs(jiffies); list_add_tail(&rp->entry, &sfp->rq_list); write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return rp; out_unlock: write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return NULL; } /* Return of 1 for found; 0 for not found */ static int sg_remove_request(Sg_fd * sfp, Sg_request * srp) { unsigned long iflags; int res = 0; if (!sfp || !srp || list_empty(&sfp->rq_list)) return res; write_lock_irqsave(&sfp->rq_list_lock, iflags); if (!list_empty(&srp->entry)) { list_del(&srp->entry); srp->parentfp = NULL; res = 1; } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); /* * If the device is detaching, wakeup any readers in case we just * removed the last response, which would leave nothing for them to * return other than -ENODEV. */ if (unlikely(atomic_read(&sfp->parentdp->detaching))) wake_up_interruptible_all(&sfp->read_wait); return res; } static Sg_fd * sg_add_sfp(Sg_device * sdp) { Sg_fd *sfp; unsigned long iflags; int bufflen; sfp = kzalloc(sizeof(*sfp), GFP_ATOMIC | __GFP_NOWARN); if (!sfp) return ERR_PTR(-ENOMEM); init_waitqueue_head(&sfp->read_wait); rwlock_init(&sfp->rq_list_lock); INIT_LIST_HEAD(&sfp->rq_list); kref_init(&sfp->f_ref); mutex_init(&sfp->f_mutex); sfp->timeout = SG_DEFAULT_TIMEOUT; sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; sfp->force_packid = SG_DEF_FORCE_PACK_ID; sfp->cmd_q = SG_DEF_COMMAND_Q; sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; write_lock_irqsave(&sdp->sfd_lock, iflags); if (atomic_read(&sdp->detaching)) { write_unlock_irqrestore(&sdp->sfd_lock, iflags); kfree(sfp); return ERR_PTR(-ENODEV); } list_add_tail(&sfp->sfd_siblings, &sdp->sfds); write_unlock_irqrestore(&sdp->sfd_lock, iflags); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_add_sfp: sfp=0x%p\n", sfp)); if (unlikely(sg_big_buff != def_reserved_size)) sg_big_buff = def_reserved_size; bufflen = min_t(int, sg_big_buff, max_sectors_bytes(sdp->device->request_queue)); sg_build_reserve(sfp, bufflen); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_add_sfp: bufflen=%d, k_use_sg=%d\n", sfp->reserve.bufflen, sfp->reserve.k_use_sg)); kref_get(&sdp->d_ref); __module_get(THIS_MODULE); return sfp; } static void sg_remove_sfp_usercontext(struct work_struct *work) { struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); struct sg_device *sdp = sfp->parentdp; Sg_request *srp; unsigned long iflags; /* Cleanup any responses which were never read(). */ write_lock_irqsave(&sfp->rq_list_lock, iflags); while (!list_empty(&sfp->rq_list)) { srp = list_first_entry(&sfp->rq_list, Sg_request, entry); sg_finish_rem_req(srp); list_del(&srp->entry); srp->parentfp = NULL; } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (sfp->reserve.bufflen > 0) { SCSI_LOG_TIMEOUT(6, sg_printk(KERN_INFO, sdp, "sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); sg_remove_scat(sfp, &sfp->reserve); } SCSI_LOG_TIMEOUT(6, sg_printk(KERN_INFO, sdp, "sg_remove_sfp: sfp=0x%p\n", sfp)); kfree(sfp); scsi_device_put(sdp->device); kref_put(&sdp->d_ref, sg_device_destroy); module_put(THIS_MODULE); } static void sg_remove_sfp(struct kref *kref) { struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref); struct sg_device *sdp = sfp->parentdp; unsigned long iflags; write_lock_irqsave(&sdp->sfd_lock, iflags); list_del(&sfp->sfd_siblings); write_unlock_irqrestore(&sdp->sfd_lock, iflags); INIT_WORK(&sfp->ew.work, sg_remove_sfp_usercontext); schedule_work(&sfp->ew.work); } #ifdef CONFIG_SCSI_PROC_FS static int sg_idr_max_id(int id, void *p, void *data) { int *k = data; if (*k < id) *k = id; return 0; } static int sg_last_dev(void) { int k = -1; unsigned long iflags; read_lock_irqsave(&sg_index_lock, iflags); idr_for_each(&sg_index_idr, sg_idr_max_id, &k); read_unlock_irqrestore(&sg_index_lock, iflags); return k + 1; /* origin 1 */ } #endif /* must be called with sg_index_lock held */ static Sg_device *sg_lookup_dev(int dev) { return idr_find(&sg_index_idr, dev); } static Sg_device * sg_get_dev(int dev) { struct sg_device *sdp; unsigned long flags;